[regexp] Add tests for recent changes in Annex B

See https://github.com/tc39/ecma262/pull/303.

BUG=v8:5937,v8:6201

Review-Url: https://codereview.chromium.org/2793313002
Cr-Commit-Position: refs/heads/master@{#44467}
This commit is contained in:
jgruber 2017-04-07 00:52:10 -07:00 committed by Commit bot
parent a8651c5671
commit 4498419438
2 changed files with 48 additions and 5 deletions

View File

@ -499,9 +499,9 @@ RegExpTree* RegExpParser::ParseDisjunction() {
uc32 letter = controlLetter & ~('a' ^ 'A');
if (letter < 'A' || 'Z' < letter) {
// controlLetter is not in range 'A'-'Z' or 'a'-'z'.
// This is outside the specification. We match JSC in
// reading the backslash as a literal character instead
// of as starting an escape.
// Read the backslash as a literal character instead of as
// starting an escape.
// ES#prod-annexB-ExtendedPatternCharacter
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
return ReportError(CStrVector("Invalid unicode escape"));
@ -1046,6 +1046,7 @@ uc32 RegExpParser::ParseOctalLiteral() {
DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
// For compatibility with some other browsers (not all), we parse
// up to three octal digits with a value below 256.
// ES#prod-annexB-LegacyOctalEscapeSequence
uc32 value = current() - '0';
Advance();
if ('0' <= current() && current() <= '7') {
@ -1334,8 +1335,9 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
case 'c': {
uc32 controlLetter = Next();
uc32 letter = controlLetter & ~('A' ^ 'a');
// For compatibility with JSC, inside a character class. We also accept
// digits and underscore as control characters, unless with /u.
// Inside a character class, we also accept digits and underscore as
// control characters, unless with /u. See Annex B:
// ES#prod-annexB-ClassControlLetter
if (letter >= 'A' && letter <= 'Z') {
Advance(2);
// Control letters mapped to ASCII control characters in the range
@ -1354,6 +1356,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
}
// We match JSC in reading the backslash as a literal
// character instead of as starting an escape.
// TODO(v8:6201): Not yet covered by the spec.
return '\\';
}
case '0':
@ -1373,6 +1376,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
// For compatibility, we interpret a decimal escape that isn't
// a back reference (and therefore either \0 or not valid according
// to the specification) as a 1..3 digit octal character code.
// ES#prod-annexB-LegacyOctalEscapeSequence
if (unicode()) {
// With /u, decimal escape is not interpreted as octal character code.
ReportError(CStrVector("Invalid class escape"));

View File

@ -764,3 +764,42 @@ re.lastIndex = NaN;
assertEquals(NaN, re.lastIndex);
"abc".search(re);
assertEquals(NaN, re.lastIndex);
// Annex B changes: https://github.com/tc39/ecma262/pull/303
assertThrows("/{1}/", SyntaxError);
assertTrue(/^{*$/.test("{{{"));
assertTrue(/^}*$/.test("}}}"));
assertTrue(/]/.test("]"));
assertTrue(/^\c%$/.test("\\c%")); // We go into ExtendedPatternCharacter.
assertTrue(/^\d%$/.test("2%")); // ... CharacterClassEscape.
assertTrue(/^\e%$/.test("e%")); // ... IdentityEscape.
assertTrue(/^\ca$/.test("\u{1}")); // ... ControlLetter.
assertTrue(/^\cA$/.test("\u{1}")); // ... ControlLetter.
assertTrue(/^\c9$/.test("\\c9")); // ... ExtendedPatternCharacter.
assertTrue(/^\c$/.test("\\c")); // ... ExtendedPatternCharacter.
assertTrue(/^[\c%]*$/.test("\\c%")); // TODO(v8:6201): Not covered by the spec.
assertTrue(/^[\c:]*$/.test("\\c:")); // TODO(v8:6201): Not covered by the spec.
assertTrue(/^[\c0]*$/.test("\u{10}")); // ... ClassControlLetter.
assertTrue(/^[\c1]*$/.test("\u{11}")); // ('0' % 32 == 0x10)
assertTrue(/^[\c2]*$/.test("\u{12}"));
assertTrue(/^[\c3]*$/.test("\u{13}"));
assertTrue(/^[\c4]*$/.test("\u{14}"));
assertTrue(/^[\c5]*$/.test("\u{15}"));
assertTrue(/^[\c6]*$/.test("\u{16}"));
assertTrue(/^[\c7]*$/.test("\u{17}"));
assertTrue(/^[\c8]*$/.test("\u{18}"));
assertTrue(/^[\c9]*$/.test("\u{19}"));
assertTrue(/^[\c_]*$/.test("\u{1F}"));
assertTrue(/^[\c11]*$/.test("\u{11}1"));
assertTrue(/^[\8]*$/.test("8")); // ... ClassEscape ~~> IdentityEscape.
assertTrue(/^[\7]*$/.test("\u{7}")); // ... ClassEscape
// ~~> LegacyOctalEscapeSequence.
assertTrue(/^[\11]*$/.test("\u{9}"));
assertTrue(/^[\111]*$/.test("\u{49}"));
assertTrue(/^[\222]*$/.test("\u{92}"));
assertTrue(/^[\333]*$/.test("\u{DB}"));
assertTrue(/^[\444]*$/.test("\u{24}4"));
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));