[parser] Add use counter for U+2028 & U+2029

The context is the following proposal to make JSON a subset of
JavaScript: https://github.com/tc39/proposal-json-superset

There’s interest in performing a side investigation to answer the
question of what would happen if we stopped treating U+2028 and U+2029
as `LineTerminator`s *entirely*. (Note that this is separate from the
proposal, which just changes how these characters are handled in
ECMAScript strings.) This is technically a breaking change, and IMHO it
would be wonderful if we could get away with it, but no one really has
any data on whether or not we could. Adding this use counter lets us get
that data.

BUG=v8:6827

Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng
Change-Id: Ia22e8db1634df4d3f965bec8e1cfa11cc7b5e9aa
Reviewed-on: https://chromium-review.googlesource.com/693155
Commit-Queue: Mathias Bynens <mathias@chromium.org>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48260}
This commit is contained in:
Mathias Bynens 2017-09-29 15:52:15 -04:00 committed by Commit Bot
parent 7283b57cf8
commit d3c9812143
7 changed files with 97 additions and 45 deletions

View File

@ -7048,6 +7048,7 @@ class V8_EXPORT Isolate {
kPromiseConstructorReturnedUndefined = 38,
kConstructorNonUndefinedPrimitiveReturn = 39,
kLabeledExpressionStatement = 40,
kLineOrParagraphSeparatorAsLineTerminator = 41,
// If you add new values here, you'll also need to update Chromium's:
// UseCounter.h, V8PerIsolateData.cpp, histograms.xml

View File

@ -464,7 +464,7 @@ Parser::Parser(ParseInfo* info)
: ParserBase<Parser>(info->zone(), &scanner_, info->stack_limit(),
info->extension(), info->GetOrCreateAstValueFactory(),
info->runtime_call_stats(), true),
scanner_(info->unicode_cache()),
scanner_(info->unicode_cache(), use_counts_),
reusable_preparser_(nullptr),
mode_(PARSE_EAGERLY), // Lazy mode must be set explicitly.
source_range_map_(info->source_range_map()),

View File

@ -173,14 +173,30 @@ bool Scanner::BookmarkScope::HasBeenApplied() {
return bookmark_ == kBookmarkWasApplied;
}
// LineTerminator: 'JS_Line_Terminator' in point.properties
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
bool Scanner::IsLineTerminator(uc32 c) {
if (c == 0x000A || c == 0x000D) {
return true;
}
if (c == 0x2028 || c == 0x2029) {
++use_counts_[v8::Isolate::UseCounterFeature::
kLineOrParagraphSeparatorAsLineTerminator];
return true;
}
return false;
}
// ----------------------------------------------------------------------------
// Scanner
Scanner::Scanner(UnicodeCache* unicode_cache)
Scanner::Scanner(UnicodeCache* unicode_cache, int* use_counts)
: unicode_cache_(unicode_cache),
octal_pos_(Location::invalid()),
octal_message_(MessageTemplate::kNone),
found_html_comment_(false) {}
found_html_comment_(false),
use_counts_(use_counts) {}
void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
DCHECK_NOT_NULL(source);
@ -439,7 +455,7 @@ Token::Value Scanner::SkipWhiteSpace() {
// Advance as long as character is a WhiteSpace or LineTerminator.
// Remember if the latter is the case.
if (unibrow::IsLineTerminator(c0_)) {
if (IsLineTerminator(c0_)) {
has_line_terminator_before_next_ = true;
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break;
@ -496,7 +512,7 @@ Token::Value Scanner::SkipSingleLineComment() {
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4).
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
Advance();
}
@ -506,7 +522,7 @@ Token::Value Scanner::SkipSingleLineComment() {
Token::Value Scanner::SkipSourceURLComment() {
TryToParseSourceURLComment();
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
Advance();
}
@ -542,7 +558,7 @@ void Scanner::TryToParseSourceURLComment() {
while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
Advance();
}
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
// Disallowed characters.
if (c0_ == '"' || c0_ == '\'') {
value->Reset();
@ -555,7 +571,7 @@ void Scanner::TryToParseSourceURLComment() {
Advance();
}
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
if (!unicode_cache_->IsWhiteSpace(c0_)) {
value->Reset();
break;
@ -572,7 +588,7 @@ Token::Value Scanner::SkipMultiLineComment() {
while (c0_ != kEndOfInput) {
uc32 ch = c0_;
Advance();
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
if (c0_ != kEndOfInput && IsLineTerminator(ch)) {
// Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator.
has_multiline_comment_before_next_ = true;
@ -968,8 +984,7 @@ bool Scanner::ScanEscape() {
Advance<capture_raw>();
// Skip escaped newlines.
if (!in_template_literal && c0_ != kEndOfInput &&
unibrow::IsLineTerminator(c)) {
if (!in_template_literal && c0_ != kEndOfInput && IsLineTerminator(c)) {
// Allow escaped CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
return true;
@ -1062,8 +1077,7 @@ Token::Value Scanner::ScanString() {
AddLiteralChar(c);
}
while (c0_ != quote && c0_ != kEndOfInput &&
!unibrow::IsLineTerminator(c0_)) {
while (c0_ != quote && c0_ != kEndOfInput && !IsLineTerminator(c0_)) {
uc32 c = c0_;
Advance();
if (c == '\\') {
@ -1119,7 +1133,7 @@ Token::Value Scanner::ScanTemplateSpan() {
ReduceRawLiteralLength(2);
break;
} else if (c == '\\') {
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
if (c0_ != kEndOfInput && IsLineTerminator(c0_)) {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
// code unit sequence.
uc32 lastChar = c0_;
@ -1660,12 +1674,12 @@ bool Scanner::ScanRegExpPattern() {
}
while (c0_ != '/' || in_character_class) {
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
if (c0_ == kEndOfInput || IsLineTerminator(c0_)) {
return false;
}
if (c0_ == '\\') { // Escape sequence.
AddLiteralCharAdvance();
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
if (c0_ == kEndOfInput || IsLineTerminator(c0_)) {
return false;
}
AddLiteralCharAdvance();

View File

@ -207,7 +207,7 @@ class Scanner {
static const int kNoOctalLocation = -1;
static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
explicit Scanner(UnicodeCache* scanner_contants);
explicit Scanner(UnicodeCache* scanner_contants, int* use_counts_);
void Initialize(Utf16CharacterStream* source, bool is_module);
@ -735,6 +735,8 @@ class Scanner {
bool is_module_;
bool IsLineTerminator(uc32 c);
Token::Value ScanTemplateSpan();
// Return the current source position.
@ -799,6 +801,8 @@ class Scanner {
// Whether this scanner encountered an HTML comment.
bool found_html_comment_;
int* use_counts_;
MessageTemplate::Template scanner_error_;
Location scanner_error_location_;
};

View File

@ -198,7 +198,7 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
V8_INLINE bool IsLineTerminator(uchar c) {
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
return c == 0x000A || c == 0x000D || c == 0x2028 || c == 0x2029;
}
#ifndef V8_INTL_SUPPORT

View File

@ -29,6 +29,7 @@ struct ScannerTestHelper {
std::unique_ptr<UnicodeCache> unicode_cache;
std::unique_ptr<Utf16CharacterStream> stream;
std::unique_ptr<Scanner> scanner;
int use_counts[v8::Isolate::kUseCounterFeatureCount];
Scanner* operator->() const { return scanner.get(); }
Scanner* get() const { return scanner.get(); }
@ -38,8 +39,11 @@ ScannerTestHelper make_scanner(const char* src) {
ScannerTestHelper helper;
helper.unicode_cache = std::unique_ptr<UnicodeCache>(new UnicodeCache);
helper.stream = ScannerStream::ForTesting(src);
helper.scanner =
std::unique_ptr<Scanner>(new Scanner(helper.unicode_cache.get()));
for (int i = 0; i < v8::Isolate::kUseCounterFeatureCount; i++) {
helper.use_counts[i] = 0;
}
helper.scanner = std::unique_ptr<Scanner>(
new Scanner(helper.unicode_cache.get(), helper.use_counts));
helper.scanner->Initialize(helper.stream.get(), false);
return helper;
}

View File

@ -61,6 +61,17 @@ namespace v8 {
namespace internal {
namespace test_parsing {
namespace {
int* global_use_counts = NULL;
void MockUseCounterCallback(v8::Isolate* isolate,
v8::Isolate::UseCounterFeature feature) {
++global_use_counts[feature];
}
} // namespace
TEST(ScanKeywords) {
struct KeywordToken {
const char* keyword;
@ -82,7 +93,7 @@ TEST(ScanKeywords) {
CHECK(static_cast<int>(sizeof(buffer)) >= length);
{
auto stream = i::ScannerStream::ForTesting(keyword, length);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), false);
CHECK_EQ(key_token.token, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
@ -90,7 +101,7 @@ TEST(ScanKeywords) {
// Removing characters will make keyword matching fail.
{
auto stream = i::ScannerStream::ForTesting(keyword, length - 1);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
@ -101,7 +112,7 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length] = chars_to_append[j];
auto stream = i::ScannerStream::ForTesting(buffer, length + 1);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
@ -111,7 +122,7 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length - 1] = '_';
auto stream = i::ScannerStream::ForTesting(buffer, length);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
@ -177,7 +188,7 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; tests[i]; i++) {
const char* source = tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
@ -196,7 +207,7 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; fail_tests[i]; i++) {
const char* source = fail_tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
@ -221,7 +232,7 @@ TEST(ScanHtmlComments) {
// Disallow HTML comments.
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), true);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::ILLEGAL, scanner.Next());
@ -230,7 +241,7 @@ TEST(ScanHtmlComments) {
// Skip HTML comments:
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache);
i::Scanner scanner(&unicode_cache, global_use_counts);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
@ -389,7 +400,7 @@ TEST(StandAlonePreParser) {
uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -425,7 +436,7 @@ TEST(StandAlonePreParserNoNatives) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(isolate->unicode_cache());
i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
// Preparser defaults to disallowing natives syntax.
@ -495,7 +506,7 @@ TEST(RegressChromium62639) {
// failed in debug mode, and sometimes crashed in release mode.
auto stream = i::ScannerStream::ForTesting(program);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
@ -570,7 +581,7 @@ TEST(PreParseOverflow) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
auto stream = i::ScannerStream::ForTesting(program.get(), kProgramSize);
i::Scanner scanner(isolate->unicode_cache());
i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -590,7 +601,7 @@ void TestStreamScanner(i::Utf16CharacterStream* stream,
i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream, false);
int i = 0;
@ -668,7 +679,7 @@ TEST(StreamScanner) {
void TestScanRegExp(const char* re_source, const char* expected) {
auto stream = i::ScannerStream::ForTesting(re_source);
i::HandleScope scope(CcTest::i_isolate());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), global_use_counts);
scanner.Initialize(stream.get(), false);
i::Token::Value start = scanner.peek();
@ -1333,7 +1344,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
// Preparse the data.
i::PendingCompilationErrorHandler pending_error_handler;
if (test_preparser) {
i::Scanner scanner(isolate->unicode_cache());
i::Scanner scanner(isolate->unicode_cache(), global_use_counts);
std::unique_ptr<i::Utf16CharacterStream> stream(
i::ScannerStream::For(source));
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -3963,16 +3974,6 @@ TEST(AsmModuleFlag) {
CHECK(s->IsAsmModule() && s->AsDeclarationScope()->asm_module());
}
namespace {
int* global_use_counts = NULL;
void MockUseCounterCallback(v8::Isolate* isolate,
v8::Isolate::UseCounterFeature feature) {
++global_use_counts[feature];
}
} // namespace
TEST(UseAsmUseCount) {
i::Isolate* isolate = CcTest::i_isolate();
@ -4032,6 +4033,34 @@ TEST(BothModesUseCount) {
CHECK_LT(0, use_counts[v8::Isolate::kStrictMode]);
}
TEST(LineOrParagraphSeparatorAsLineTerminator) {
// Tests that both preparsing and parsing accept U+2028 LINE SEPARATOR and
// U+2029 PARAGRAPH SEPARATOR as LineTerminator symbols.
const char* context_data[][2] = {{"", ""}, {nullptr, nullptr}};
const char* statement_data[] = {"\x31\xE2\x80\xA8\x32", // "1<U+2028>2"
"\x31\xE2\x80\xA9\x32", // "1<U+2029>2"
nullptr};
RunParserSyncTest(context_data, statement_data, kError);
}
TEST(LineOrParagraphSeparatorAsLineTerminatorUseCount) {
i::Isolate* isolate = CcTest::i_isolate();
i::HandleScope scope(isolate);
LocalContext env;
int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
global_use_counts = use_counts;
CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
CompileRun("");
CHECK_EQ(0, use_counts[v8::Isolate::UseCounterFeature::
kLineOrParagraphSeparatorAsLineTerminator]);
CompileRun("// Foo\xE2\x80\xA8"); // "// Foo<U+2028>"
CHECK_LT(0, use_counts[v8::Isolate::UseCounterFeature::
kLineOrParagraphSeparatorAsLineTerminator]);
CompileRun("// Foo\xE2\x80\xA9"); // "// Foo<U+2029>"
CHECK_LT(1, use_counts[v8::Isolate::UseCounterFeature::
kLineOrParagraphSeparatorAsLineTerminator]);
}
TEST(ErrorsArrowFormalParameters) {
const char* context_data[][2] = {