From 4498419438746bf94fc6a296ccc2eb61a57e2738 Mon Sep 17 00:00:00 2001 From: jgruber Date: Fri, 7 Apr 2017 00:52:10 -0700 Subject: [PATCH] [regexp] Add tests for recent changes in Annex B See https://github.com/tc39/ecma262/pull/303. BUG=v8:5937,v8:6201 Review-Url: https://codereview.chromium.org/2793313002 Cr-Commit-Position: refs/heads/master@{#44467} --- src/regexp/regexp-parser.cc | 14 ++++++++----- test/mjsunit/regexp.js | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc index 656e88cff8..84044cfc36 100644 --- a/src/regexp/regexp-parser.cc +++ b/src/regexp/regexp-parser.cc @@ -499,9 +499,9 @@ RegExpTree* RegExpParser::ParseDisjunction() { uc32 letter = controlLetter & ~('a' ^ 'A'); if (letter < 'A' || 'Z' < letter) { // controlLetter is not in range 'A'-'Z' or 'a'-'z'. - // This is outside the specification. We match JSC in - // reading the backslash as a literal character instead - // of as starting an escape. + // Read the backslash as a literal character instead of as + // starting an escape. + // ES#prod-annexB-ExtendedPatternCharacter if (unicode()) { // With /u, invalid escapes are not treated as identity escapes. return ReportError(CStrVector("Invalid unicode escape")); @@ -1046,6 +1046,7 @@ uc32 RegExpParser::ParseOctalLiteral() { DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); // For compatibility with some other browsers (not all), we parse // up to three octal digits with a value below 256. + // ES#prod-annexB-LegacyOctalEscapeSequence uc32 value = current() - '0'; Advance(); if ('0' <= current() && current() <= '7') { @@ -1334,8 +1335,9 @@ uc32 RegExpParser::ParseClassCharacterEscape() { case 'c': { uc32 controlLetter = Next(); uc32 letter = controlLetter & ~('A' ^ 'a'); - // For compatibility with JSC, inside a character class. We also accept - // digits and underscore as control characters, unless with /u. + // Inside a character class, we also accept digits and underscore as + // control characters, unless with /u. See Annex B: + // ES#prod-annexB-ClassControlLetter if (letter >= 'A' && letter <= 'Z') { Advance(2); // Control letters mapped to ASCII control characters in the range @@ -1354,6 +1356,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() { } // We match JSC in reading the backslash as a literal // character instead of as starting an escape. + // TODO(v8:6201): Not yet covered by the spec. return '\\'; } case '0': @@ -1373,6 +1376,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() { // For compatibility, we interpret a decimal escape that isn't // a back reference (and therefore either \0 or not valid according // to the specification) as a 1..3 digit octal character code. + // ES#prod-annexB-LegacyOctalEscapeSequence if (unicode()) { // With /u, decimal escape is not interpreted as octal character code. ReportError(CStrVector("Invalid class escape")); diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js index c9a1fd21b3..6fb5660c08 100644 --- a/test/mjsunit/regexp.js +++ b/test/mjsunit/regexp.js @@ -764,3 +764,42 @@ re.lastIndex = NaN; assertEquals(NaN, re.lastIndex); "abc".search(re); assertEquals(NaN, re.lastIndex); + +// Annex B changes: https://github.com/tc39/ecma262/pull/303 + +assertThrows("/{1}/", SyntaxError); +assertTrue(/^{*$/.test("{{{")); +assertTrue(/^}*$/.test("}}}")); +assertTrue(/]/.test("]")); +assertTrue(/^\c%$/.test("\\c%")); // We go into ExtendedPatternCharacter. +assertTrue(/^\d%$/.test("2%")); // ... CharacterClassEscape. +assertTrue(/^\e%$/.test("e%")); // ... IdentityEscape. +assertTrue(/^\ca$/.test("\u{1}")); // ... ControlLetter. +assertTrue(/^\cA$/.test("\u{1}")); // ... ControlLetter. +assertTrue(/^\c9$/.test("\\c9")); // ... ExtendedPatternCharacter. +assertTrue(/^\c$/.test("\\c")); // ... ExtendedPatternCharacter. +assertTrue(/^[\c%]*$/.test("\\c%")); // TODO(v8:6201): Not covered by the spec. +assertTrue(/^[\c:]*$/.test("\\c:")); // TODO(v8:6201): Not covered by the spec. +assertTrue(/^[\c0]*$/.test("\u{10}")); // ... ClassControlLetter. +assertTrue(/^[\c1]*$/.test("\u{11}")); // ('0' % 32 == 0x10) +assertTrue(/^[\c2]*$/.test("\u{12}")); +assertTrue(/^[\c3]*$/.test("\u{13}")); +assertTrue(/^[\c4]*$/.test("\u{14}")); +assertTrue(/^[\c5]*$/.test("\u{15}")); +assertTrue(/^[\c6]*$/.test("\u{16}")); +assertTrue(/^[\c7]*$/.test("\u{17}")); +assertTrue(/^[\c8]*$/.test("\u{18}")); +assertTrue(/^[\c9]*$/.test("\u{19}")); +assertTrue(/^[\c_]*$/.test("\u{1F}")); +assertTrue(/^[\c11]*$/.test("\u{11}1")); +assertTrue(/^[\8]*$/.test("8")); // ... ClassEscape ~~> IdentityEscape. +assertTrue(/^[\7]*$/.test("\u{7}")); // ... ClassEscape + // ~~> LegacyOctalEscapeSequence. +assertTrue(/^[\11]*$/.test("\u{9}")); +assertTrue(/^[\111]*$/.test("\u{49}")); +assertTrue(/^[\222]*$/.test("\u{92}")); +assertTrue(/^[\333]*$/.test("\u{DB}")); +assertTrue(/^[\444]*$/.test("\u{24}4")); +assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion. +assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432")); +assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));