Make regexp flag parsing stricter.

BUG=v8:1628
TEST=mjsunit/regress/regress-219

Review URL: http://codereview.chromium.org/7624045

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8973 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2011-08-19 11:02:41 +00:00
parent dc917453b3
commit d8a123169b
6 changed files with 84 additions and 122 deletions

View File

@ -207,7 +207,7 @@ function FormatMessage(message) {
stack_overflow: ["Maximum call stack size exceeded"],
// SyntaxError
unable_to_parse: ["Parse error"],
duplicate_regexp_flag: ["Duplicate RegExp flag ", "%0"],
invalid_regexp_flags: ["Invalid flags supplied to RegExp constructor '", "%0", "'"],
invalid_regexp: ["Invalid RegExp pattern /", "%0", "/"],
illegal_break: ["Illegal break statement"],
illegal_continue: ["Illegal continue statement"],

View File

@ -50,24 +50,29 @@ function DoConstructRegExp(object, pattern, flags) {
var global = false;
var ignoreCase = false;
var multiline = false;
for (var i = 0; i < flags.length; i++) {
var c = %_CallFunction(flags, i, StringCharAt);
switch (c) {
case 'g':
// Allow duplicate flags to be consistent with JSC and others.
if (global) {
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
global = true;
break;
case 'i':
if (ignoreCase) {
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
ignoreCase = true;
break;
case 'm':
if (multiline) {
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
multiline = true;
break;
default:
// Ignore flags that have no meaning to be consistent with
// JSC.
break;
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
}

View File

@ -30,6 +30,10 @@
// We should now allow duplicates of flags.
// (See http://code.google.com/p/v8/issues/detail?id=219)
// This has been reversed by issue 1628, since other browsers have also
// tightened their syntax.
// (See http://code.google.com/p/v8/issues/detail?id=1628)
// Base tests: we recognize the basic flags
function assertFlags(re, global, multiline, ignoreCase) {
@ -53,124 +57,92 @@ assertFlags(re, true, true, true)
// Double i's
re = /a/ii;
assertFlags(re, false, false, true)
assertThrows("/a/ii");
re = /a/gii;
assertFlags(re, true, false, true)
assertThrows("/a/gii");
re = /a/igi;
assertFlags(re, true, false, true)
assertThrows("/a/igi");
re = /a/iig;
assertFlags(re, true, false, true)
assertThrows("/a/iig");
re = /a/gimi;
assertFlags(re, true, true, true)
assertThrows("/a/gimi");
re = /a/giim;
assertFlags(re, true, true, true)
assertThrows("/a/giim");
re = /a/igim;
assertFlags(re, true, true, true)
assertThrows("/a/igim");
assertThrows(function(){ return RegExp("a", "ii"); })
re = RegExp("a", "ii");
assertFlags(re, false, false, true)
assertThrows(function(){ return RegExp("a", "gii"); })
re = RegExp("a", "gii");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "igi"); })
re = RegExp("a", "igi");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "iig"); })
re = RegExp("a", "iig");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "gimi"); })
re = RegExp("a", "gimi");
assertFlags(re, true, true, true)
assertThrows(function(){ return RegExp("a", "giim"); })
re = RegExp("a", "giim");
assertFlags(re, true, true, true)
re = RegExp("a", "igim");
assertFlags(re, true, true, true)
assertThrows(function(){ return RegExp("a", "igim"); })
// Tripple i's
re = /a/iii;
assertFlags(re, false, false, true)
assertThrows("/a/iii");
re = /a/giii;
assertFlags(re, true, false, true)
assertThrows("/a/giii");
re = /a/igii;
assertFlags(re, true, false, true)
assertThrows("/a/igii");
re = /a/iigi;
assertFlags(re, true, false, true)
assertThrows("/a/iigi");
re = /a/iiig;
assertFlags(re, true, false, true)
assertThrows("/a/iiig");
re = /a/miiig;
assertFlags(re, true, true, true)
assertThrows("/a/miiig");
assertThrows(function(){ return RegExp("a", "iii"); })
re = RegExp("a", "iii");
assertFlags(re, false, false, true)
assertThrows(function(){ return RegExp("a", "giii"); })
re = RegExp("a", "giii");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "igii"); })
re = RegExp("a", "igii");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "iigi"); })
re = RegExp("a", "iigi");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "iiig"); })
re = RegExp("a", "iiig");
assertFlags(re, true, false, true)
assertThrows(function(){ return RegExp("a", "miiig"); })
re = RegExp("a", "miiig");
assertFlags(re, true, true, true)
// Illegal flags - valid flags late in string.
// Illegal flags - flags late in string.
assertThrows("/a/arglebargleglopglyf");
re = /a/arglebargleglopglyf;
assertFlags(re, true, false, false)
assertThrows("/a/arglebargleglopglif");
re = /a/arglebargleglopglif;
assertFlags(re, true, false, true)
assertThrows("/a/arglebargleglopglym");
re = /a/arglebargleglopglym;
assertFlags(re, true, true, false)
re = /a/arglebargleglopglim;
assertFlags(re, true, true, true)
assertThrows("/a/arglebargleglopglim");
// Case of flags still matters.
re = /a/gmi;
var re = /a/gmi;
assertFlags(re, true, true, true)
re = /a/Gmi;
assertFlags(re, false, true, true)
assertThrows("/a/Gmi");
re = /a/gMi;
assertFlags(re, true, false, true)
assertThrows("/a/gMi");
re = /a/gmI;
assertFlags(re, true, true, false)
assertThrows("/a/gmI");
re = /a/GMi;
assertFlags(re, false, false, true)
assertThrows("/a/GMi");
re = /a/GmI;
assertFlags(re, false, true, false)
assertThrows("/a/GmI");
re = /a/gMI;
assertFlags(re, true, false, false)
assertThrows("/a/gMI");
re = /a/GMI;
assertFlags(re, false, false, false)
assertThrows("/a/GMI");
// Unicode escape sequences are not interpreted.
assertThrows("/a/\\u0067");
assertThrows("/a/\\u0069");
assertThrows("/a/\\u006d");
assertThrows("/a/\\u006D");

View File

@ -25,34 +25,29 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
function testFlags(flagstring, global, ignoreCase, multiline) {
var text = "/x/"+flagstring;
var re = eval(text);
assertEquals(global, re.global, text + ".global");
assertEquals(ignoreCase, re.ignoreCase, text + ".ignoreCase");
assertEquals(multiline, re.multiline, text + ".multiline");
}
// In Issue 87, we allowed unicode escape sequences in RegExp flags.
// However, according to ES5, they should not be interpreted, but passed
// verbatim to the RegExp constructor.
// (On top of that, the original test was bugged and never tested anything).
// The behavior was changed in r8969 to not interpret escapes, but this
// test didn't test that, and only failed when making invalid flag characters
// an error too.
testFlags("", false, false, false);
assertThrows("/x/\\u0067");
assertThrows("/x/\\u0069");
assertThrows("/x/\\u006d");
testFlags("\u0067", true, false, false);
assertThrows("/x/\\u0067i");
assertThrows("/x/\\u0069m");
assertThrows("/x/\\u006dg");
testFlags("\u0069", false, true, false)
assertThrows("/x/m\\u0067");
assertThrows("/x/g\\u0069");
assertThrows("/x/i\\u006d");
testFlags("\u006d", false, false, true);
testFlags("\u0068", false, false, false);
testFlags("\u0020", false, false, false);
testFlags("\u0067g", true, false, false);
testFlags("g\u0067", true, false, false);
testFlags("abc\u0067efg", true, false, false);
testFlags("i\u0067", true, true, false);
testFlags("\u0067i", true, true, false);
assertThrows("/x/m\\u0067i");
assertThrows("/x/g\\u0069m");
assertThrows("/x/i\\u006dg");
assertThrows("/x/\\u0068");
assertThrows("/x/\\u0020");

View File

@ -246,9 +246,8 @@ ecma_3/Number/15.7.4.7-1: FAIL_OK
ecma_3/Number/15.7.4.6-1: FAIL_OK
#:=== RegExp:===
# To be compatible with JSC we silently ignore flags that do not make
# sense. These tests expects us to throw exceptions.
ecma_3/RegExp/regress-57631: FAIL_OK
# We don't match the syntax error message of Mozilla for invalid
# RegExp flags.
ecma_3/RegExp/15.10.4.1-6: FAIL_OK
# PCRE doesn't allow subpattern nesting deeper than 200, this tests

View File

@ -56,15 +56,6 @@ S15.10.6.3_A1_T16: FAIL_OK
# errors, for compatibility.
S15.10.2.11_A1_T2: FAIL
S15.10.2.11_A1_T3: FAIL
S15.10.4.1_A5_T1: FAIL
S15.10.4.1_A5_T2: FAIL
S15.10.4.1_A5_T3: FAIL
S15.10.4.1_A5_T4: FAIL
S15.10.4.1_A5_T5: FAIL
S15.10.4.1_A5_T6: FAIL
S15.10.4.1_A5_T7: FAIL
S15.10.4.1_A5_T8: FAIL
S15.10.4.1_A5_T9: FAIL
# We are more lenient in which string character escapes we allow than
# the spec (7.8.4 p. 19) wants us to be. This is for compatibility.