diff --git a/src/scanner.cc b/src/scanner.cc index 7901b5d826..f24af2ed26 100755 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -611,7 +611,7 @@ void Scanner::SeekForward(int pos) { } -void Scanner::ScanEscape() { +bool Scanner::ScanEscape() { uc32 c = c0_; Advance(); @@ -621,7 +621,7 @@ void Scanner::ScanEscape() { if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); // Allow LF+CR newlines in multiline string literals. if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); - return; + return true; } switch (c) { @@ -635,13 +635,13 @@ void Scanner::ScanEscape() { case 't' : c = '\t'; break; case 'u' : { c = ScanHexNumber(4); - if (c < 0) c = 'u'; + if (c < 0) return false; break; } case 'v' : c = '\v'; break; case 'x' : { c = ScanHexNumber(2); - if (c < 0) c = 'x'; + if (c < 0) return false; break; } case '0' : // fall through @@ -654,10 +654,11 @@ void Scanner::ScanEscape() { case '7' : c = ScanOctalEscape(c, 2); break; } - // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these - // should be illegal, but they are commonly handled - // as non-escaped characters by JS VMs. + // According to ECMA-262, section 7.8.4, characters not covered by the + // above cases should be illegal, but they are commonly handled as + // non-escaped characters by JS VMs. AddLiteralChar(c); + return true; } @@ -696,8 +697,7 @@ Token::Value Scanner::ScanString() { uc32 c = c0_; Advance(); if (c == '\\') { - if (c0_ < 0) return Token::ILLEGAL; - ScanEscape(); + if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; } else { AddLiteralChar(c); } diff --git a/src/scanner.h b/src/scanner.h index 045e7d27a6..4de413b885 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -520,13 +520,16 @@ class Scanner { Token::Value ScanIdentifierOrKeyword(); Token::Value ScanIdentifierSuffix(LiteralScope* literal); - void ScanEscape(); Token::Value ScanString(); - // Decodes a unicode escape-sequence which is part of an identifier. + // Scans an escape-sequence which is part of a string and adds the + // decoded character to the current literal. Returns true if a pattern + // is scanned. + bool ScanEscape(); + // Decodes a Unicode escape-sequence which is part of an identifier. // If the escape sequence cannot be decoded the result is kBadChar. uc32 ScanIdentifierUnicodeEscape(); - // Recognizes a uniocde escape-sequence and adds its characters, + // Scans a Unicode escape-sequence and adds its characters, // uninterpreted, to the current literal. Used for parsing RegExp // flags. bool ScanLiteralUnicodeEscape(); diff --git a/test/mjsunit/compiler/literals.js b/test/mjsunit/compiler/literals.js index e910bb3c6a..8607cd9595 100644 --- a/test/mjsunit/compiler/literals.js +++ b/test/mjsunit/compiler/literals.js @@ -36,38 +36,38 @@ assertEquals(8, eval("6;'abc';8")); // Characters just outside the ranges of hex-escapes. // "/" comes just before "0". -assertEquals("x1/", "\x1/"); -assertEquals("u111/", "\u111/"); +assertThrows('"\\x1/"'); +assertThrows('"\\u111/"'); assertEquals("\\x1/", RegExp("\\x1/").source); assertEquals("\\u111/", RegExp("\\u111/").source); // ":" comes just after "9". -assertEquals("x1:", "\x1:"); -assertEquals("u111:", "\u111:"); +assertThrows('"\\x1:"'); +assertThrows('"\\u111:"'); assertEquals("\\x1:", /\x1:/.source); assertEquals("\\u111:", /\u111:/.source); // "`" comes just before "a". -assertEquals("x1`", "\x1`"); -assertEquals("u111`", "\u111`"); +assertThrows('"\\x1`"'); +assertThrows('"\\u111`"'); assertEquals("\\x1`", /\x1`/.source); assertEquals("\\u111`", /\u111`/.source); // "g" comes just before "f". -assertEquals("x1g", "\x1g"); -assertEquals("u111g", "\u111g"); +assertThrows('"\\x1g"'); +assertThrows('"\\u111g"'); assertEquals("\\x1g", /\x1g/.source); assertEquals("\\u111g", /\u111g/.source); // "@" comes just before "A". -assertEquals("x1@", "\x1@"); -assertEquals("u111@", "\u111@"); +assertThrows('"\\x1@"'); +assertThrows('"\\u111@"'); assertEquals("\\x1@", /\x1@/.source); assertEquals("\\u111@", /\u111@/.source); // "G" comes just after "F". -assertEquals("x1G", "\x1G"); -assertEquals("u111G", "\u111G"); +assertThrows('"\\x1G"'); +assertThrows('"\\u111G"'); assertEquals("\\x1G", /\x1G/.source); assertEquals("\\u111G", /\u111G/.source); diff --git a/test/mozilla/mozilla.status b/test/mozilla/mozilla.status index e64959acfc..9eafb4bc3c 100644 --- a/test/mozilla/mozilla.status +++ b/test/mozilla/mozilla.status @@ -592,6 +592,14 @@ js1_5/Regress/regress-416737-01: FAIL_OK js1_5/Regress/regress-416737-02: FAIL_OK +# Illegal escape-sequences in string literals. Has already been fixed +# by most engines (i.e. V8, JSC, Opera and FF). +ecma/Array/15.4.5.1-1: FAIL_OK +ecma/LexicalConventions/7.7.4: FAIL_OK +ecma_2/RegExp/hex-001: FAIL_OK +js1_2/regexp/hexadecimal: FAIL_OK + + ##################### FAILING TESTS ##################### # This section is for tests that fail in V8 and pass in JSC. diff --git a/test/sputnik/sputnik.status b/test/sputnik/sputnik.status index a4c7d57ff0..5cda6fd6ee 100644 --- a/test/sputnik/sputnik.status +++ b/test/sputnik/sputnik.status @@ -52,36 +52,14 @@ S15.10.2.11_A1_T3: FAIL # We are more lenient in which string character escapes we allow than # the spec (7.8.4 p. 19) wants us to be. This is for compatibility. +S7.8.4_A4.3_T1: FAIL_OK S7.8.4_A4.3_T2: FAIL_OK -S7.8.4_A4.3_T2: FAIL_OK -S7.8.4_A6.2_T2: FAIL_OK -S7.8.4_A6.1_T4: FAIL_OK +S7.8.4_A4.3_T3: FAIL_OK S7.8.4_A4.3_T4: FAIL_OK -S7.8.4_A7.2_T2: FAIL_OK -S7.8.4_A7.1_T4: FAIL_OK +S7.8.4_A6.4_T1: FAIL_OK S7.8.4_A6.4_T2: FAIL_OK +S7.8.4_A7.4_T1: FAIL_OK S7.8.4_A7.4_T2: FAIL_OK -S7.8.4_A7.2_T4: FAIL_OK -S7.8.4_A4.3_T6: FAIL_OK -S7.8.4_A7.2_T6: FAIL_OK -S7.8.4_A4.3_T1: FAIL_OK -S7.8.4_A6.2_T1: FAIL_OK -S7.8.4_A4.3_T3: FAIL_OK -S7.8.4_A7.2_T1: FAIL_OK -S7.8.4_A6.4_T1: FAIL_OK -S7.8.4_A7.2_T3: FAIL_OK -S7.8.4_A7.4_T1: FAIL_OK -S7.8.4_A4.3_T5: FAIL_OK -S7.8.4_A7.2_T5: FAIL_OK -S7.8.4_A4.3_T1: FAIL_OK -S7.8.4_A6.2_T1: FAIL_OK -S7.8.4_A4.3_T3: FAIL_OK -S7.8.4_A7.2_T1: FAIL_OK -S7.8.4_A6.4_T1: FAIL_OK -S7.8.4_A7.2_T3: FAIL_OK -S7.8.4_A7.4_T1: FAIL_OK -S7.8.4_A4.3_T5: FAIL_OK -S7.8.4_A7.2_T5: FAIL_OK # Sputnik expects unicode escape sequences in RegExp flags to be interpreted. # The specification requires them to be passed uninterpreted to the RegExp diff --git a/test/test262/test262.status b/test/test262/test262.status index d405d211d1..aacb16c0a5 100644 --- a/test/test262/test262.status +++ b/test/test262/test262.status @@ -60,22 +60,6 @@ S15.1.2.2_A5.1_T1: FAIL_OK S15.8.2.16_A7: PASS || FAIL_OK S15.8.2.18_A7: PASS || FAIL_OK -# We are more lenient in which string character escapes we allow than -# the spec (7.8.4 p. 19) wants us to be. This is for compatibility. -S7.8.4_A6.1_T4: FAIL_OK -S7.8.4_A6.2_T1: FAIL_OK -S7.8.4_A6.2_T2: FAIL_OK -S7.8.4_A7.1_T4: FAIL_OK -S7.8.4_A7.2_T1: FAIL_OK -S7.8.4_A7.2_T2: FAIL_OK -S7.8.4_A7.2_T3: FAIL_OK -S7.8.4_A7.2_T4: FAIL_OK -S7.8.4_A7.2_T5: FAIL_OK -S7.8.4_A7.2_T6: FAIL_OK -Sbp_7.8.4_A6.1_T4: FAIL_OK -Sbp_7.8.4_A6.2_T1: FAIL_OK -Sbp_7.8.4_A6.2_T2: FAIL_OK - # Linux for ia32 (and therefore simulators) default to extended 80 bit floating # point formats, so these tests checking 64-bit FP precision fail. The other # platforms/arch's pass these tests.