Make regexp flag parsing stricter.

BUG=v8:1628 TEST=mjsunit/regress/regress-219 Review URL: http://codereview.chromium.org/7624045 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8973 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2011-08-19 11:02:41 +00:00 · 2011-08-19 11:02:41 +00:00 · d8a123169b
commit d8a123169b
parent dc917453b3
6 changed files with 84 additions and 122 deletions
--- a/src/messages.js
+++ b/src/messages.js
@ -207,7 +207,7 @@ function FormatMessage(message) {
      stack_overflow:               ["Maximum call stack size exceeded"],
      // SyntaxError
      unable_to_parse:              ["Parse error"],
-      duplicate_regexp_flag:        ["Duplicate RegExp flag ", "%0"],
+      invalid_regexp_flags:         ["Invalid flags supplied to RegExp constructor '", "%0", "'"],
      invalid_regexp:               ["Invalid RegExp pattern /", "%0", "/"],
      illegal_break:                ["Illegal break statement"],
      illegal_continue:             ["Illegal continue statement"],
--- a/src/regexp.js
+++ b/src/regexp.js
@ -50,24 +50,29 @@ function DoConstructRegExp(object, pattern, flags) {
  var global = false;
  var ignoreCase = false;
  var multiline = false;
-
  for (var i = 0; i < flags.length; i++) {
    var c = %_CallFunction(flags, i, StringCharAt);
    switch (c) {
      case 'g':
-        // Allow duplicate flags to be consistent with JSC and others.
+        if (global) {
+          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
+        }
        global = true;
        break;
      case 'i':
+        if (ignoreCase) {
+          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
+        }
        ignoreCase = true;
        break;
      case 'm':
+        if (multiline) {
+          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
+        }
        multiline = true;
        break;
      default:
-        // Ignore flags that have no meaning to be consistent with
-        // JSC.
-        break;
+        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
    }
  }

--- a/test/mjsunit/regress/regress-219.js
+++ b/test/mjsunit/regress/regress-219.js
@ -30,6 +30,10 @@
 // We should now allow duplicates of flags.
 // (See http://code.google.com/p/v8/issues/detail?id=219)

+// This has been reversed by issue 1628, since other browsers have also
+// tightened their syntax.
+// (See http://code.google.com/p/v8/issues/detail?id=1628)
+
 // Base tests: we recognize the basic flags

 function assertFlags(re, global, multiline, ignoreCase) {
@ -53,124 +57,92 @@ assertFlags(re, true, true, true)

 // Double i's

-re = /a/ii;
-assertFlags(re, false, false, true)
+assertThrows("/a/ii");

-re = /a/gii;
-assertFlags(re, true, false, true)
+assertThrows("/a/gii");

-re = /a/igi;
-assertFlags(re, true, false, true)
+assertThrows("/a/igi");

-re = /a/iig;
-assertFlags(re, true, false, true)
+assertThrows("/a/iig");

-re = /a/gimi;
-assertFlags(re, true, true, true)
+assertThrows("/a/gimi");

-re = /a/giim;
-assertFlags(re, true, true, true)
+assertThrows("/a/giim");

-re = /a/igim;
-assertFlags(re, true, true, true)
+assertThrows("/a/igim");

+assertThrows(function(){ return RegExp("a", "ii"); })

-re = RegExp("a", "ii");
-assertFlags(re, false, false, true)
+assertThrows(function(){ return RegExp("a", "gii"); })

-re = RegExp("a", "gii");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "igi"); })

-re = RegExp("a", "igi");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "iig"); })

-re = RegExp("a", "iig");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "gimi"); })

-re = RegExp("a", "gimi");
-assertFlags(re, true, true, true)
+assertThrows(function(){ return RegExp("a", "giim"); })

-re = RegExp("a", "giim");
-assertFlags(re, true, true, true)
-
-re = RegExp("a", "igim");
-assertFlags(re, true, true, true)
+assertThrows(function(){ return RegExp("a", "igim"); })

 // Tripple i's

-re = /a/iii;
-assertFlags(re, false, false, true)
+assertThrows("/a/iii");

-re = /a/giii;
-assertFlags(re, true, false, true)
+assertThrows("/a/giii");

-re = /a/igii;
-assertFlags(re, true, false, true)
+assertThrows("/a/igii");

-re = /a/iigi;
-assertFlags(re, true, false, true)
+assertThrows("/a/iigi");

-re = /a/iiig;
-assertFlags(re, true, false, true)
+assertThrows("/a/iiig");

-re = /a/miiig;
-assertFlags(re, true, true, true)
+assertThrows("/a/miiig");

+assertThrows(function(){ return RegExp("a", "iii"); })

-re = RegExp("a", "iii");
-assertFlags(re, false, false, true)
+assertThrows(function(){ return RegExp("a", "giii"); })

-re = RegExp("a", "giii");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "igii"); })

-re = RegExp("a", "igii");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "iigi"); })

-re = RegExp("a", "iigi");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "iiig"); })

-re = RegExp("a", "iiig");
-assertFlags(re, true, false, true)
+assertThrows(function(){ return RegExp("a", "miiig"); })

-re = RegExp("a", "miiig");
-assertFlags(re, true, true, true)
+// Illegal flags - valid flags late in string.

-// Illegal flags - flags late in string.
+assertThrows("/a/arglebargleglopglyf");

-re = /a/arglebargleglopglyf;
-assertFlags(re, true, false, false)
+assertThrows("/a/arglebargleglopglif");

-re = /a/arglebargleglopglif;
-assertFlags(re, true, false, true)
+assertThrows("/a/arglebargleglopglym");

-re = /a/arglebargleglopglym;
-assertFlags(re, true, true, false)
-
-re = /a/arglebargleglopglim;
-assertFlags(re, true, true, true)
+assertThrows("/a/arglebargleglopglim");

 // Case of flags still matters.

-re = /a/gmi;
+var re = /a/gmi;
 assertFlags(re, true, true, true)

-re = /a/Gmi;
-assertFlags(re, false, true, true)
+assertThrows("/a/Gmi");

-re = /a/gMi;
-assertFlags(re, true, false, true)
+assertThrows("/a/gMi");

-re = /a/gmI;
-assertFlags(re, true, true, false)
+assertThrows("/a/gmI");

-re = /a/GMi;
-assertFlags(re, false, false, true)
+assertThrows("/a/GMi");

-re = /a/GmI;
-assertFlags(re, false, true, false)
+assertThrows("/a/GmI");

-re = /a/gMI;
-assertFlags(re, true, false, false)
+assertThrows("/a/gMI");

-re = /a/GMI;
-assertFlags(re, false, false, false)
+assertThrows("/a/GMI");
+
+// Unicode escape sequences are not interpreted.
+
+assertThrows("/a/\\u0067");
+assertThrows("/a/\\u0069");
+assertThrows("/a/\\u006d");
+assertThrows("/a/\\u006D");
--- a/test/mjsunit/regress/regress-87.js
+++ b/test/mjsunit/regress/regress-87.js
@ -25,34 +25,29 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-function testFlags(flagstring, global, ignoreCase, multiline) {
-  var text = "/x/"+flagstring;
-  var re = eval(text);
-  assertEquals(global, re.global, text + ".global");
-  assertEquals(ignoreCase, re.ignoreCase, text + ".ignoreCase");
-  assertEquals(multiline, re.multiline, text + ".multiline");
-}
+// In Issue 87, we allowed unicode escape sequences in RegExp flags.
+// However, according to ES5, they should not be interpreted, but passed
+// verbatim to the RegExp constructor.
+// (On top of that, the original test was bugged and never tested anything).
+// The behavior was changed in r8969 to not interpret escapes, but this
+// test didn't test that, and only failed when making invalid flag characters
+// an error too.

-testFlags("", false, false, false);
+assertThrows("/x/\\u0067");
+assertThrows("/x/\\u0069");
+assertThrows("/x/\\u006d");

-testFlags("\u0067", true, false, false);
+assertThrows("/x/\\u0067i");
+assertThrows("/x/\\u0069m");
+assertThrows("/x/\\u006dg");

-testFlags("\u0069", false, true, false)
+assertThrows("/x/m\\u0067");
+assertThrows("/x/g\\u0069");
+assertThrows("/x/i\\u006d");

-testFlags("\u006d", false, false, true);
-
-testFlags("\u0068", false, false, false);
-
-testFlags("\u0020", false, false, false);
-
-
-testFlags("\u0067g", true, false, false);
-
-testFlags("g\u0067", true, false, false);
-
-testFlags("abc\u0067efg", true, false, false);
-
-testFlags("i\u0067", true, true, false);
-
-testFlags("\u0067i", true, true, false);
+assertThrows("/x/m\\u0067i");
+assertThrows("/x/g\\u0069m");
+assertThrows("/x/i\\u006dg");

+assertThrows("/x/\\u0068");
+assertThrows("/x/\\u0020");
--- a/test/mozilla/mozilla.status
+++ b/test/mozilla/mozilla.status
@ -246,9 +246,8 @@ ecma_3/Number/15.7.4.7-1: FAIL_OK
 ecma_3/Number/15.7.4.6-1: FAIL_OK

 #:=== RegExp:=== 
-# To be compatible with JSC we silently ignore flags that do not make
-# sense.  These tests expects us to throw exceptions.  
-ecma_3/RegExp/regress-57631: FAIL_OK
+# We don't match the syntax error message of Mozilla for invalid
+# RegExp flags.
 ecma_3/RegExp/15.10.4.1-6: FAIL_OK

 # PCRE doesn't allow subpattern nesting deeper than 200, this tests
--- a/test/sputnik/sputnik.status
+++ b/test/sputnik/sputnik.status
@ -56,15 +56,6 @@ S15.10.6.3_A1_T16: FAIL_OK
 # errors, for compatibility.
 S15.10.2.11_A1_T2: FAIL
 S15.10.2.11_A1_T3: FAIL
-S15.10.4.1_A5_T1: FAIL
-S15.10.4.1_A5_T2: FAIL
-S15.10.4.1_A5_T3: FAIL
-S15.10.4.1_A5_T4: FAIL
-S15.10.4.1_A5_T5: FAIL
-S15.10.4.1_A5_T6: FAIL
-S15.10.4.1_A5_T7: FAIL
-S15.10.4.1_A5_T8: FAIL
-S15.10.4.1_A5_T9: FAIL

 # We are more lenient in which string character escapes we allow than
 # the spec (7.8.4 p. 19) wants us to be.  This is for compatibility.