Scanner: disallow unicode escapes in regexp flags.

The spec explicitly forbids them. V8 never handled them properly either, just the Scanner accepted them (it had code to add them literally to the LiteralBuffer) and later on, Regexp constructor disallowed them. According to the spec, unicode escapes in regexp flags should be an early error ("It is a Syntax Error if IdentifierPart contains a Unicode escape sequence."). Note that Scanner is still more relaxed about regexp flags than the spec. Especially, it accepts any identifier parts (not just a small set of letters) and doesn't check for duplicates. R=rossberg@chromium.org Review URL: https://codereview.chromium.org/700373003 Cr-Commit-Position: refs/heads/master@{#25215} git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@25215 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2014-11-07 14:31:54 +00:00 · 2014-11-07 14:31:54 +00:00 · 2b026851ac
commit 2b026851ac
parent 364cec0034
4 changed files with 6 additions and 29 deletions
--- a/src/scanner.cc
+++ b/src/scanner.cc
@ -1138,24 +1138,6 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
 }


-bool Scanner::ScanLiteralUnicodeEscape() {
-  DCHECK(c0_ == '\\');
-  AddLiteralChar(c0_);
-  Advance();
-  int hex_digits_read = 0;
-  if (c0_ == 'u') {
-    AddLiteralChar(c0_);
-    while (hex_digits_read < 4) {
-      Advance();
-      if (!IsHexDigit(c0_)) break;
-      AddLiteralChar(c0_);
-      ++hex_digits_read;
-    }
-  }
-  return hex_digits_read == 4;
-}
-
-
 bool Scanner::ScanRegExpFlags() {
  // Scan regular expression flags.
  LiteralScope literal(this);
@ -1163,10 +1145,7 @@ bool Scanner::ScanRegExpFlags() {
    if (c0_ != '\\') {
      AddLiteralCharAdvance();
    } else {
-      if (!ScanLiteralUnicodeEscape()) {
-        return false;
-      }
-      Advance();
+      return false;
    }
  }
  literal.Complete();
--- a/src/scanner.h
+++ b/src/scanner.h
@ -637,10 +637,6 @@ class Scanner {
  // Decodes a Unicode escape-sequence which is part of an identifier.
  // If the escape sequence cannot be decoded the result is kBadChar.
  uc32 ScanIdentifierUnicodeEscape();
-  // Scans a Unicode escape-sequence and adds its characters,
-  // uninterpreted, to the current literal. Used for parsing RegExp
-  // flags.
-  bool ScanLiteralUnicodeEscape();

  // Return the current source position.
  int source_pos() {
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@ -4302,7 +4302,9 @@ TEST(InvalidUnicodeEscapes) {
    "var foob\\u123r = 0;",
    "var \\u123roo = 0;",
    "\"foob\\u123rr\"",
-    "/regex/g\\u123r",
+    // No escapes allowed in regexp flags
+    "/regex/\\u0069g",
+    "/regex/\\u006g",
    NULL};
  RunParserSyncTest(context_data, data, kError);
 }
--- a/test/mjsunit/regress/regress-136048.js
+++ b/test/mjsunit/regress/regress-136048.js
@ -26,9 +26,9 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 try {
-  /foo/\u0069
+  eval("/foo/\\u0069")
 } catch (e) {
  assertEquals(
-      "SyntaxError: Invalid flags supplied to RegExp constructor '\\u0069'",
+      "SyntaxError: Invalid regular expression flags",
      e.toString());
 }