[parser] Add use counter for U+2028 & U+2029

The context is the following proposal to make JSON a subset of JavaScript: https://github.com/tc39/proposal-json-superset There’s interest in performing a side investigation to answer the question of what would happen if we stopped treating U+2028 and U+2029 as `LineTerminator`s *entirely*. (Note that this is separate from the proposal, which just changes how these characters are handled in ECMAScript strings.) This is technically a breaking change, and IMHO it would be wonderful if we could get away with it, but no one really has any data on whether or not we could. Adding this use counter lets us get that data. BUG=v8:6827 Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng Change-Id: Ia22e8db1634df4d3f965bec8e1cfa11cc7b5e9aa Reviewed-on: https://chromium-review.googlesource.com/693155 Commit-Queue: Mathias Bynens <mathias@chromium.org> Reviewed-by: Marja Hölttä <marja@chromium.org> Cr-Commit-Position: refs/heads/master@{#48260}
2017-09-29 15:52:15 -04:00 · 2017-09-29 15:52:15 -04:00 · d3c9812143
commit d3c9812143
parent 7283b57cf8
7 changed files with 97 additions and 45 deletions
--- a/include/v8.h
+++ b/include/v8.h
@ -7048,6 +7048,7 @@ class V8_EXPORT Isolate {
    kPromiseConstructorReturnedUndefined = 38,
    kConstructorNonUndefinedPrimitiveReturn = 39,
    kLabeledExpressionStatement = 40,
+    kLineOrParagraphSeparatorAsLineTerminator = 41,

    // If you add new values here, you'll also need to update Chromium's:
    // UseCounter.h, V8PerIsolateData.cpp, histograms.xml
--- a/src/parsing/parser.cc
+++ b/src/parsing/parser.cc
@ -464,7 +464,7 @@ Parser::Parser(ParseInfo* info)
    : ParserBase<Parser>(info->zone(), &scanner_, info->stack_limit(),
                         info->extension(), info->GetOrCreateAstValueFactory(),
                         info->runtime_call_stats(), true),
-      scanner_(info->unicode_cache()),
+      scanner_(info->unicode_cache(), use_counts_),
      reusable_preparser_(nullptr),
      mode_(PARSE_EAGERLY),  // Lazy mode must be set explicitly.
      source_range_map_(info->source_range_map()),
--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@ -173,14 +173,30 @@ bool Scanner::BookmarkScope::HasBeenApplied() {
  return bookmark_ == kBookmarkWasApplied;
 }

+// LineTerminator:       'JS_Line_Terminator' in point.properties
+// ES#sec-line-terminators lists exactly 4 code points:
+// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
+bool Scanner::IsLineTerminator(uc32 c) {
+  if (c == 0x000A || c == 0x000D) {
+    return true;
+  }
+  if (c == 0x2028 || c == 0x2029) {
+    ++use_counts_[v8::Isolate::UseCounterFeature::
+                      kLineOrParagraphSeparatorAsLineTerminator];
+    return true;
+  }
+  return false;
+}
+
 // ----------------------------------------------------------------------------
 // Scanner

-Scanner::Scanner(UnicodeCache* unicode_cache)
+Scanner::Scanner(UnicodeCache* unicode_cache, int* use_counts)
    : unicode_cache_(unicode_cache),
      octal_pos_(Location::invalid()),
      octal_message_(MessageTemplate::kNone),
-      found_html_comment_(false) {}
+      found_html_comment_(false),
+      use_counts_(use_counts) {}

 void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
  DCHECK_NOT_NULL(source);
@ -439,7 +455,7 @@ Token::Value Scanner::SkipWhiteSpace() {

      // Advance as long as character is a WhiteSpace or LineTerminator.
      // Remember if the latter is the case.
-      if (unibrow::IsLineTerminator(c0_)) {
+      if (IsLineTerminator(c0_)) {
        has_line_terminator_before_next_ = true;
      } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
        break;
@ -496,7 +512,7 @@ Token::Value Scanner::SkipSingleLineComment() {
  // separately by the lexical grammar and becomes part of the
  // stream of input elements for the syntactic grammar (see
  // ECMA-262, section 7.4).
-  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
    Advance();
  }

@ -506,7 +522,7 @@ Token::Value Scanner::SkipSingleLineComment() {

 Token::Value Scanner::SkipSourceURLComment() {
  TryToParseSourceURLComment();
-  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
    Advance();
  }

@ -542,7 +558,7 @@ void Scanner::TryToParseSourceURLComment() {
  while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
    Advance();
  }
-  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
    // Disallowed characters.
    if (c0_ == '"' || c0_ == '\'') {
      value->Reset();
@ -555,7 +571,7 @@ void Scanner::TryToParseSourceURLComment() {
    Advance();
  }
  // Allow whitespace at the end.
-  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
    if (!unicode_cache_->IsWhiteSpace(c0_)) {
      value->Reset();
      break;
@ -572,7 +588,7 @@ Token::Value Scanner::SkipMultiLineComment() {
  while (c0_ != kEndOfInput) {
    uc32 ch = c0_;
    Advance();
-    if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
+    if (c0_ != kEndOfInput && IsLineTerminator(ch)) {
      // Following ECMA-262, section 7.4, a comment containing
      // a newline will make the comment count as a line-terminator.
      has_multiline_comment_before_next_ = true;
@ -968,8 +984,7 @@ bool Scanner::ScanEscape() {
  Advance<capture_raw>();

  // Skip escaped newlines.
-  if (!in_template_literal && c0_ != kEndOfInput &&
-      unibrow::IsLineTerminator(c)) {
+  if (!in_template_literal && c0_ != kEndOfInput && IsLineTerminator(c)) {
    // Allow escaped CR+LF newlines in multiline string literals.
    if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
    return true;
@ -1062,8 +1077,7 @@ Token::Value Scanner::ScanString() {
    AddLiteralChar(c);
  }

-  while (c0_ != quote && c0_ != kEndOfInput &&
-         !unibrow::IsLineTerminator(c0_)) {
+  while (c0_ != quote && c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
    uc32 c = c0_;
    Advance();
    if (c == '\\') {
@ -1119,7 +1133,7 @@ Token::Value Scanner::ScanTemplateSpan() {
      ReduceRawLiteralLength(2);
      break;
    } else if (c == '\\') {
-      if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
+      if (c0_ != kEndOfInput && IsLineTerminator(c0_)) {
        // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
        // code unit sequence.
        uc32 lastChar = c0_;
@ -1660,12 +1674,12 @@ bool Scanner::ScanRegExpPattern() {
  }

  while (c0_ != '/' || in_character_class) {
-    if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
+    if (c0_ == kEndOfInput || IsLineTerminator(c0_)) {
      return false;
    }
    if (c0_ == '\\') {  // Escape sequence.
      AddLiteralCharAdvance();
-      if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
+      if (c0_ == kEndOfInput || IsLineTerminator(c0_)) {
        return false;
      }
      AddLiteralCharAdvance();
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@ -207,7 +207,7 @@ class Scanner {
  static const int kNoOctalLocation = -1;
  static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;

-  explicit Scanner(UnicodeCache* scanner_contants);
+  explicit Scanner(UnicodeCache* scanner_contants, int* use_counts_);

  void Initialize(Utf16CharacterStream* source, bool is_module);

@ -735,6 +735,8 @@ class Scanner {

  bool is_module_;

+  bool IsLineTerminator(uc32 c);
+
  Token::Value ScanTemplateSpan();

  // Return the current source position.
@ -799,6 +801,8 @@ class Scanner {
  // Whether this scanner encountered an HTML comment.
  bool found_html_comment_;

+  int* use_counts_;
+
  MessageTemplate::Template scanner_error_;
  Location scanner_error_location_;
 };
--- a/src/unicode.h
+++ b/src/unicode.h
@ -198,7 +198,7 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
 // ES#sec-line-terminators lists exactly 4 code points:
 // LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
 V8_INLINE bool IsLineTerminator(uchar c) {
-  return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
+  return c == 0x000A || c == 0x000D || c == 0x2028 || c == 0x2029;
 }

 #ifndef V8_INTL_SUPPORT
--- a/test/cctest/parsing/test-scanner.cc
+++ b/test/cctest/parsing/test-scanner.cc
@ -29,6 +29,7 @@ struct ScannerTestHelper {
  std::unique_ptr<UnicodeCache> unicode_cache;
  std::unique_ptr<Utf16CharacterStream> stream;
  std::unique_ptr<Scanner> scanner;
+  int use_counts[v8::Isolate::kUseCounterFeatureCount];

  Scanner* operator->() const { return scanner.get(); }
  Scanner* get() const { return scanner.get(); }
@ -38,8 +39,11 @@ ScannerTestHelper make_scanner(const char* src) {
  ScannerTestHelper helper;
  helper.unicode_cache = std::unique_ptr<UnicodeCache>(new UnicodeCache);
  helper.stream = ScannerStream::ForTesting(src);
-  helper.scanner =
-      std::unique_ptr<Scanner>(new Scanner(helper.unicode_cache.get()));
+  for (int i = 0; i < v8::Isolate::kUseCounterFeatureCount; i++) {
+    helper.use_counts[i] = 0;
+  }
+  helper.scanner = std::unique_ptr<Scanner>(
+      new Scanner(helper.unicode_cache.get(), helper.use_counts));
  helper.scanner->Initialize(helper.stream.get(), false);
  return helper;
 }
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@ -61,6 +61,17 @@ namespace v8 {
 namespace internal {
 namespace test_parsing {

+namespace {
+
+int* global_use_counts = NULL;
+
+void MockUseCounterCallback(v8::Isolate* isolate,
+                            v8::Isolate::UseCounterFeature feature) {
+  ++global_use_counts[feature];
+}
+
+}  // namespace
+
 TEST(ScanKeywords) {
  struct KeywordToken {
    const char* keyword;
@ -82,7 +93,7 @@ TEST(ScanKeywords) {
    CHECK(static_cast<int>(sizeof(buffer)) >= length);
    {
      auto stream = i::ScannerStream::ForTesting(keyword, length);
-      i::Scanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache, global_use_counts);
      scanner.Initialize(stream.get(), false);
      CHECK_EQ(key_token.token, scanner.Next());
      CHECK_EQ(i::Token::EOS, scanner.Next());
@ -90,7 +101,7 @@ TEST(ScanKeywords) {
    // Removing characters will make keyword matching fail.
    {
      auto stream = i::ScannerStream::ForTesting(keyword, length - 1);
-      i::Scanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache, global_use_counts);
      scanner.Initialize(stream.get(), false);
      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
      CHECK_EQ(i::Token::EOS, scanner.Next());
@ -101,7 +112,7 @@ TEST(ScanKeywords) {
      i::MemMove(buffer, keyword, length);
      buffer[length] = chars_to_append[j];
      auto stream = i::ScannerStream::ForTesting(buffer, length + 1);
-      i::Scanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache, global_use_counts);
      scanner.Initialize(stream.get(), false);
      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
      CHECK_EQ(i::Token::EOS, scanner.Next());
@ -111,7 +122,7 @@ TEST(ScanKeywords) {
      i::MemMove(buffer, keyword, length);
      buffer[length - 1] = '_';
      auto stream = i::ScannerStream::ForTesting(buffer, length);
-      i::Scanner scanner(&unicode_cache);
+      i::Scanner scanner(&unicode_cache, global_use_counts);
      scanner.Initialize(stream.get(), false);
      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
      CHECK_EQ(i::Token::EOS, scanner.Next());
@ -177,7 +188,7 @@ TEST(ScanHTMLEndComments) {
  for (int i = 0; tests[i]; i++) {
    const char* source = tests[i];
    auto stream = i::ScannerStream::ForTesting(source);
-    i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+    i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
    scanner.Initialize(stream.get(), false);
    i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    i::AstValueFactory ast_value_factory(
@ -196,7 +207,7 @@ TEST(ScanHTMLEndComments) {
  for (int i = 0; fail_tests[i]; i++) {
    const char* source = fail_tests[i];
    auto stream = i::ScannerStream::ForTesting(source);
-    i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+    i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
    scanner.Initialize(stream.get(), false);
    i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    i::AstValueFactory ast_value_factory(
@ -221,7 +232,7 @@ TEST(ScanHtmlComments) {
  // Disallow HTML comments.
  {
    auto stream = i::ScannerStream::ForTesting(src);
-    i::Scanner scanner(&unicode_cache);
+    i::Scanner scanner(&unicode_cache, global_use_counts);
    scanner.Initialize(stream.get(), true);
    CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
    CHECK_EQ(i::Token::ILLEGAL, scanner.Next());
@ -230,7 +241,7 @@ TEST(ScanHtmlComments) {
  // Skip HTML comments:
  {
    auto stream = i::ScannerStream::ForTesting(src);
-    i::Scanner scanner(&unicode_cache);
+    i::Scanner scanner(&unicode_cache, global_use_counts);
    scanner.Initialize(stream.get(), false);
    CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
    CHECK_EQ(i::Token::EOS, scanner.Next());
@ -389,7 +400,7 @@ TEST(StandAlonePreParser) {
  uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
  for (int i = 0; programs[i]; i++) {
    auto stream = i::ScannerStream::ForTesting(programs[i]);
-    i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+    i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
    scanner.Initialize(stream.get(), false);

    i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -425,7 +436,7 @@ TEST(StandAlonePreParserNoNatives) {
  uintptr_t stack_limit = isolate->stack_guard()->real_climit();
  for (int i = 0; programs[i]; i++) {
    auto stream = i::ScannerStream::ForTesting(programs[i]);
-    i::Scanner scanner(isolate->unicode_cache());
+    i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
    scanner.Initialize(stream.get(), false);

    // Preparser defaults to disallowing natives syntax.
@ -495,7 +506,7 @@ TEST(RegressChromium62639) {
  // failed in debug mode, and sometimes crashed in release mode.

  auto stream = i::ScannerStream::ForTesting(program);
-  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+  i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
  scanner.Initialize(stream.get(), false);
  i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
  i::AstValueFactory ast_value_factory(
@ -570,7 +581,7 @@ TEST(PreParseOverflow) {
  uintptr_t stack_limit = isolate->stack_guard()->real_climit();

  auto stream = i::ScannerStream::ForTesting(program.get(), kProgramSize);
-  i::Scanner scanner(isolate->unicode_cache());
+  i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
  scanner.Initialize(stream.get(), false);

  i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -590,7 +601,7 @@ void TestStreamScanner(i::Utf16CharacterStream* stream,
                       i::Token::Value* expected_tokens,
                       int skip_pos = 0,  // Zero means not skipping.
                       int skip_to = 0) {
-  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+  i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
  scanner.Initialize(stream, false);

  int i = 0;
@ -668,7 +679,7 @@ TEST(StreamScanner) {
 void TestScanRegExp(const char* re_source, const char* expected) {
  auto stream = i::ScannerStream::ForTesting(re_source);
  i::HandleScope scope(CcTest::i_isolate());
-  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
+  i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
  scanner.Initialize(stream.get(), false);

  i::Token::Value start = scanner.peek();
@ -1333,7 +1344,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
  // Preparse the data.
  i::PendingCompilationErrorHandler pending_error_handler;
  if (test_preparser) {
-    i::Scanner scanner(isolate->unicode_cache());
+    i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
    std::unique_ptr<i::Utf16CharacterStream> stream(
        i::ScannerStream::For(source));
    i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -3963,16 +3974,6 @@ TEST(AsmModuleFlag) {
  CHECK(s->IsAsmModule() && s->AsDeclarationScope()->asm_module());
 }

-namespace {
-
-int* global_use_counts = NULL;
-
-void MockUseCounterCallback(v8::Isolate* isolate,
-                            v8::Isolate::UseCounterFeature feature) {
-  ++global_use_counts[feature];
-}
-
-}  // namespace

 TEST(UseAsmUseCount) {
  i::Isolate* isolate = CcTest::i_isolate();
@ -4032,6 +4033,34 @@ TEST(BothModesUseCount) {
  CHECK_LT(0, use_counts[v8::Isolate::kStrictMode]);
 }

+TEST(LineOrParagraphSeparatorAsLineTerminator) {
+  // Tests that both preparsing and parsing accept U+2028 LINE SEPARATOR and
+  // U+2029 PARAGRAPH SEPARATOR as LineTerminator symbols.
+  const char* context_data[][2] = {{"", ""}, {nullptr, nullptr}};
+  const char* statement_data[] = {"\x31\xE2\x80\xA8\x32",  // "1<U+2028>2"
+                                  "\x31\xE2\x80\xA9\x32",  // "1<U+2029>2"
+                                  nullptr};
+
+  RunParserSyncTest(context_data, statement_data, kError);
+}
+
+TEST(LineOrParagraphSeparatorAsLineTerminatorUseCount) {
+  i::Isolate* isolate = CcTest::i_isolate();
+  i::HandleScope scope(isolate);
+  LocalContext env;
+  int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
+  global_use_counts = use_counts;
+  CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
+  CompileRun("");
+  CHECK_EQ(0, use_counts[v8::Isolate::UseCounterFeature::
+                             kLineOrParagraphSeparatorAsLineTerminator]);
+  CompileRun("// Foo\xE2\x80\xA8");  // "// Foo<U+2028>"
+  CHECK_LT(0, use_counts[v8::Isolate::UseCounterFeature::
+                             kLineOrParagraphSeparatorAsLineTerminator]);
+  CompileRun("// Foo\xE2\x80\xA9");  // "// Foo<U+2029>"
+  CHECK_LT(1, use_counts[v8::Isolate::UseCounterFeature::
+                             kLineOrParagraphSeparatorAsLineTerminator]);
+}

 TEST(ErrorsArrowFormalParameters) {
  const char* context_data[][2] = {