From c05a4036b04f331196b2f84b7cbd03dee4e63058 Mon Sep 17 00:00:00 2001 From: arv Date: Thu, 18 Dec 2014 11:41:37 -0800 Subject: [PATCH] ES6 template literals should not use legacy octal strings Correctly handle SyntaxErrors in escape sequences. BUG=v8:3736 LOG=Y R=dslomov@chromium.org, caitpotter88@gmail.com Review URL: https://codereview.chromium.org/811113002 Cr-Commit-Position: refs/heads/master@{#25891} --- src/scanner.cc | 58 +++++++++++++++++++++---------- src/scanner.h | 3 +- test/mjsunit/harmony/templates.js | 24 ++++++++++++- 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 96459f3c66..7cd0895cb3 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -697,13 +697,13 @@ void Scanner::SeekForward(int pos) { } -template +template bool Scanner::ScanEscape() { uc32 c = c0_; Advance(); // Skip escaped newlines. - if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { + if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { // Allow CR+LF newlines in multiline string literals. if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); // Allow LF+CR newlines in multiline string literals. @@ -725,22 +725,44 @@ bool Scanner::ScanEscape() { if (c < 0) return false; break; } - case 'v' : c = '\v'; break; - case 'x' : { + case 'v': + c = '\v'; + break; + case 'x': { c = ScanHexNumber(2); if (c < 0) return false; break; } - case '0' : // fall through - case '1' : // fall through - case '2' : // fall through - case '3' : // fall through - case '4' : // fall through - case '5' : // fall through - case '6' : // fall through + case '0': + if (in_template_literal) { + // \ 0 DecimalDigit is never allowed in templates. + if (IsDecimalDigit(c0_)) { + Advance(); // Advance to include the problematic char. + return false; + } + + // The TV of TemplateCharacter :: \ EscapeSequence is the CV of + // EscapeSequence. + // The CV of EscapeSequence :: 0 is the code unit value 0. + c = 0; + break; + } + // Fall through. + case '1': // fall through + case '2': // fall through + case '3': // fall through + case '4': // fall through + case '5': // fall through + case '6': // fall through case '7': - c = ScanOctalEscape(c, 2); - break; + if (!in_template_literal) { + c = ScanOctalEscape(c, 2); + break; + } + // Fall through + case '8': + case '9': + if (in_template_literal) return false; } // According to ECMA-262, section 7.8.4, characters not covered by the @@ -787,7 +809,7 @@ Token::Value Scanner::ScanString() { uc32 c = c0_; Advance(); if (c == '\\') { - if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; + if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; } else { AddLiteralChar(c); } @@ -818,6 +840,7 @@ Token::Value Scanner::ScanTemplateSpan() { LiteralScope literal(this); StartRawLiteral(); const bool capture_raw = true; + const bool in_template_literal = true; while (true) { uc32 c = c0_; @@ -844,11 +867,8 @@ Token::Value Scanner::ScanTemplateSpan() { AddRawLiteralChar('\n'); } } - } else if (c0_ == '0') { - Advance(); - AddLiteralChar('0'); - } else { - ScanEscape(); + } else if (!ScanEscape()) { + return Token::ILLEGAL; } } else if (c < 0) { // Unterminated template literal diff --git a/src/scanner.h b/src/scanner.h index d96ed579b0..6e668fd492 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -677,8 +677,9 @@ class Scanner { // Scans an escape-sequence which is part of a string and adds the // decoded character to the current literal. Returns true if a pattern // is scanned. - template + template bool ScanEscape(); + // Decodes a Unicode escape-sequence which is part of an identifier. // If the escape sequence cannot be decoded the result is kBadChar. uc32 ScanIdentifierUnicodeEscape(); diff --git a/test/mjsunit/harmony/templates.js b/test/mjsunit/harmony/templates.js index 86caf453a5..e2e89dc67f 100644 --- a/test/mjsunit/harmony/templates.js +++ b/test/mjsunit/harmony/templates.js @@ -253,7 +253,7 @@ var obj = { // The TRV of CharacterEscapeSequence :: NonEscapeCharacter is the CV of the // NonEscapeCharacter. calls = 0; - (function(s) { calls++; assertEquals("\u005Cx", s.raw[0]); })`\x`; + (function(s) { calls++; assertEquals("\u005Cz", s.raw[0]); })`\z`; assertEquals(1, calls); // The TRV of LineTerminatorSequence :: is the code unit value 0x000A. @@ -471,3 +471,25 @@ var obj = { // block }`jkl`; })(); + + +(function testLegacyOctal() { + assertEquals('\u0000', `\0`); + assertEquals('\u0000a', `\0a`); + for (var i = 0; i < 10; i++) { + var code = "`\\0" + i + "`"; + assertThrows(code, SyntaxError); + } + + assertEquals('\\0', String.raw`\0`); +})(); + + +(function testSyntaxErrorsNonEscapeCharacter() { + assertThrows("`\\x`", SyntaxError); + assertThrows("`\\u`", SyntaxError); + for (var i = 1; i < 10; i++) { + var code = "`\\" + i + "`"; + assertThrows(code, SyntaxError); + } +})();