From 2d40e2f44586a87d6e6b8e791172d3734cd3ae44 Mon Sep 17 00:00:00 2001 From: Toon Verwaest Date: Wed, 1 Aug 2018 16:24:10 +0200 Subject: [PATCH] [scanner] Prepare CharacterStreams for specializing scanner and parser by character type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This templatizes CharacterStream by char type, and makes them subclass ScannerStream. Methods that are widely used by tests are marked virtual on ScannerStream and final on CharacterStream so the specialized scanner will know what to call. ParseInfo passes around ScannerStream, but the scanner requires the explicit CharacterStream. Since AdvanceUntil is templatized by FunctionType, I couldn't mark that virtual; so instead I adjusted those tests to operate directly on ucs2 (not utf8 since we'll drop that in the future). In the end no functionality was changed. Some calls became virtual in tests. This is mainly just preparation. Change-Id: I0b4def65d3eb8fa5c806027c7e9123a590ebbdb5 Reviewed-on: https://chromium-review.googlesource.com/1156690 Commit-Queue: Toon Verwaest Reviewed-by: Michael Starzinger Reviewed-by: Marja Hölttä Cr-Commit-Position: refs/heads/master@{#54848} --- src/asmjs/asm-js.cc | 6 +- src/asmjs/asm-parser.cc | 4 +- src/asmjs/asm-parser.h | 6 +- src/asmjs/asm-scanner.cc | 4 +- src/asmjs/asm-scanner.h | 7 +- .../unoptimized-compile-job.cc | 4 +- src/compiler.cc | 2 +- src/parsing/parse-info.cc | 2 +- src/parsing/parse-info.h | 11 +- src/parsing/parser.cc | 12 +- src/parsing/parsing.cc | 5 +- src/parsing/scanner-character-streams.cc | 23 +- src/parsing/scanner-character-streams.h | 182 ++++++++++++- src/parsing/scanner.cc | 2 +- src/parsing/scanner.h | 166 +----------- test/cctest/parsing/test-scanner-streams.cc | 253 +++++++++--------- test/cctest/parsing/test-scanner.cc | 2 +- test/cctest/test-parsing.cc | 29 +- test/unittests/asmjs/asm-scanner-unittest.cc | 5 +- 19 files changed, 372 insertions(+), 353 deletions(-) diff --git a/src/asmjs/asm-js.cc b/src/asmjs/asm-js.cc index 604207bc0d..b71b92fbde 100644 --- a/src/asmjs/asm-js.cc +++ b/src/asmjs/asm-js.cc @@ -234,13 +234,13 @@ UnoptimizedCompilationJob::Status AsmJsCompilationJob::ExecuteJobImpl() { Zone* compile_zone = compilation_info()->zone(); Zone translate_zone(allocator_, ZONE_NAME); - Utf16CharacterStream* stream = parse_info()->character_stream(); + ScannerStream* stream = parse_info()->character_stream(); base::Optional allow_deref; if (stream->can_access_heap()) { allow_deref.emplace(); } - stream->Seek(compilation_info()->literal()->start_position()); - wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream); + wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream, + compilation_info()->literal()->start_position()); if (!parser.Run()) { if (!FLAG_suppress_asm_messages) { ReportCompilationFailure(parse_info(), parser.failure_location(), diff --git a/src/asmjs/asm-parser.cc b/src/asmjs/asm-parser.cc index fee309d9fb..1b185041df 100644 --- a/src/asmjs/asm-parser.cc +++ b/src/asmjs/asm-parser.cc @@ -69,9 +69,9 @@ namespace wasm { #define TOK(name) AsmJsScanner::kToken_##name AsmJsParser::AsmJsParser(Zone* zone, uintptr_t stack_limit, - Utf16CharacterStream* stream) + ScannerStream* stream, int start) : zone_(zone), - scanner_(stream), + scanner_(static_cast*>(stream), start), module_builder_(new (zone) WasmModuleBuilder(zone)), return_type_(nullptr), stack_limit_(stack_limit), diff --git a/src/asmjs/asm-parser.h b/src/asmjs/asm-parser.h index ac8a05a028..b88d49ab7a 100644 --- a/src/asmjs/asm-parser.h +++ b/src/asmjs/asm-parser.h @@ -16,7 +16,7 @@ namespace v8 { namespace internal { -class Utf16CharacterStream; +class ScannerStream; namespace wasm { @@ -49,8 +49,8 @@ class AsmJsParser { typedef EnumSet StdlibSet; - explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, - Utf16CharacterStream* stream); + explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, ScannerStream* stream, + int start); bool Run(); const char* failure_message() const { return failure_message_; } int failure_location() const { return failure_location_; } diff --git a/src/asmjs/asm-scanner.cc b/src/asmjs/asm-scanner.cc index c7144e3be6..06e4dfcf1f 100644 --- a/src/asmjs/asm-scanner.cc +++ b/src/asmjs/asm-scanner.cc @@ -7,6 +7,7 @@ #include "src/char-predicates-inl.h" #include "src/conversions.h" #include "src/flags.h" +#include "src/parsing/scanner-character-streams.h" #include "src/parsing/scanner.h" #include "src/unicode-cache.h" @@ -19,7 +20,7 @@ namespace { static const int kMaxIdentifierCount = 0xF000000; }; -AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream) +AsmJsScanner::AsmJsScanner(CharacterStream* stream, int start) : stream_(stream), token_(kUninitialized), preceding_token_(kUninitialized), @@ -33,6 +34,7 @@ AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream) double_value_(0.0), unsigned_value_(0), preceded_by_newline_(false) { + stream->Seek(start); #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; STDLIB_MATH_FUNCTION_LIST(V) STDLIB_ARRAY_TYPE_LIST(V) diff --git a/src/asmjs/asm-scanner.h b/src/asmjs/asm-scanner.h index 1f38f0fc66..d0cb6dd848 100644 --- a/src/asmjs/asm-scanner.h +++ b/src/asmjs/asm-scanner.h @@ -16,7 +16,8 @@ namespace v8 { namespace internal { -class Utf16CharacterStream; +template +class CharacterStream; // A custom scanner to extract the token stream needed to parse valid // asm.js: http://asmjs.org/spec/latest/ @@ -31,7 +32,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner { public: typedef int32_t token_t; - explicit AsmJsScanner(Utf16CharacterStream* stream); + AsmJsScanner(CharacterStream* stream, int start); // Get current token. token_t Token() const { return token_; } @@ -136,7 +137,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner { // clang-format on private: - Utf16CharacterStream* stream_; + CharacterStream* stream_; token_t token_; token_t preceding_token_; token_t next_token_; // Only set when in {rewind} state. diff --git a/src/compiler-dispatcher/unoptimized-compile-job.cc b/src/compiler-dispatcher/unoptimized-compile-job.cc index 2e8065ed11..afe97ff221 100644 --- a/src/compiler-dispatcher/unoptimized-compile-job.cc +++ b/src/compiler-dispatcher/unoptimized-compile-job.cc @@ -133,7 +133,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) { DCHECK(script->type() != Script::TYPE_NATIVE); Handle source(String::cast(script->source()), isolate); if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) { - std::unique_ptr stream(ScannerStream::For( + std::unique_ptr stream(ScannerStream::For( isolate, source, shared_->StartPosition(), shared_->EndPosition())); parse_info_->set_character_stream(std::move(stream)); } else { @@ -191,7 +191,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) { .ToHandleChecked(); } wrapper_ = isolate->global_handles()->Create(*wrapper); - std::unique_ptr stream( + std::unique_ptr stream( ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset, shared_->EndPosition() - offset)); parse_info_->set_character_stream(std::move(stream)); diff --git a/src/compiler.cc b/src/compiler.cc index 8d36d76405..27fc346f7d 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -966,7 +966,7 @@ BackgroundCompileTask::BackgroundCompileTask(ScriptStreamingData* source, info->set_runtime_call_stats(nullptr); } info->set_toplevel(); - std::unique_ptr stream( + std::unique_ptr stream( ScannerStream::For(source->source_stream.get(), source->encoding, info->runtime_call_stats())); info->set_character_stream(std::move(stream)); diff --git a/src/parsing/parse-info.cc b/src/parsing/parse-info.cc index 987c5e642d..835cf0f22e 100644 --- a/src/parsing/parse-info.cc +++ b/src/parsing/parse-info.cc @@ -198,7 +198,7 @@ void ParseInfo::AllocateSourceRangeMap() { void ParseInfo::ResetCharacterStream() { character_stream_.reset(); } void ParseInfo::set_character_stream( - std::unique_ptr character_stream) { + std::unique_ptr character_stream) { DCHECK_NULL(character_stream_); character_stream_.swap(character_stream); } diff --git a/src/parsing/parse-info.h b/src/parsing/parse-info.h index 4abf3a1fb0..0d1b40b1e9 100644 --- a/src/parsing/parse-info.h +++ b/src/parsing/parse-info.h @@ -31,7 +31,7 @@ class RuntimeCallStats; class Logger; class SourceRangeMap; class UnicodeCache; -class Utf16CharacterStream; +class ScannerStream; class Zone; // A container for the inputs, configuration options, and outputs of parsing. @@ -97,11 +97,8 @@ class V8_EXPORT_PRIVATE ParseInfo { : NO_PARSE_RESTRICTION; } - Utf16CharacterStream* character_stream() const { - return character_stream_.get(); - } - void set_character_stream( - std::unique_ptr character_stream); + ScannerStream* character_stream() const { return character_stream_.get(); } + void set_character_stream(std::unique_ptr character_stream); void ResetCharacterStream(); v8::Extension* extension() const { return extension_; } @@ -274,7 +271,7 @@ class V8_EXPORT_PRIVATE ParseInfo { MaybeHandle maybe_outer_scope_info_; //----------- Inputs+Outputs of parsing and scope analysis ----------------- - std::unique_ptr character_stream_; + std::unique_ptr character_stream_; ConsumedPreParsedScopeData consumed_preparsed_scope_data_; std::shared_ptr ast_value_factory_; const class AstStringConstants* ast_string_constants_; diff --git a/src/parsing/parser.cc b/src/parsing/parser.cc index 05001019b4..325d722a5f 100644 --- a/src/parsing/parser.cc +++ b/src/parsing/parser.cc @@ -507,7 +507,9 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) { // Initialize parser state. DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info()); - scanner_.Initialize(info->character_stream(), info->is_module()); + auto stream = + static_cast*>(info->character_stream()); + scanner_.Initialize(stream, info->is_module()); FunctionLiteral* result = DoParseProgram(isolate, info); MaybeResetCharacterStream(info, result); @@ -701,7 +703,9 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info, // Initialize parser state. Handle name(shared_info->Name(), isolate); info->set_function_name(ast_value_factory()->GetString(name)); - scanner_.Initialize(info->character_stream(), info->is_module()); + auto stream = + static_cast*>(info->character_stream()); + scanner_.Initialize(stream, info->is_module()); FunctionLiteral* result = DoParseFunction(isolate, info, info->function_name()); @@ -3435,7 +3439,9 @@ void Parser::ParseOnBackground(ParseInfo* info) { DCHECK_NULL(info->literal()); FunctionLiteral* result = nullptr; - scanner_.Initialize(info->character_stream(), info->is_module()); + auto stream = + static_cast*>(info->character_stream()); + scanner_.Initialize(stream, info->is_module()); DCHECK(info->maybe_outer_scope_info().is_null()); DCHECK(original_scope_); diff --git a/src/parsing/parsing.cc b/src/parsing/parsing.cc index 378023cbeb..510700b138 100644 --- a/src/parsing/parsing.cc +++ b/src/parsing/parsing.cc @@ -26,8 +26,7 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) { // Create a character stream for the parser. Handle source(String::cast(info->script()->source()), isolate); isolate->counters()->total_parse_size()->Increment(source->length()); - std::unique_ptr stream( - ScannerStream::For(isolate, source)); + std::unique_ptr stream(ScannerStream::For(isolate, source)); info->set_character_stream(std::move(stream)); Parser parser(info); @@ -61,7 +60,7 @@ bool ParseFunction(ParseInfo* info, Handle shared_info, // Create a character stream for the parser. Handle source(String::cast(info->script()->source()), isolate); isolate->counters()->total_parse_size()->Increment(source->length()); - std::unique_ptr stream( + std::unique_ptr stream( ScannerStream::For(isolate, source, shared_info->StartPosition(), shared_info->EndPosition())); info->set_character_stream(std::move(stream)); diff --git a/src/parsing/scanner-character-streams.cc b/src/parsing/scanner-character-streams.cc index dd7a028d57..06093ebddf 100644 --- a/src/parsing/scanner-character-streams.cc +++ b/src/parsing/scanner-character-streams.cc @@ -157,7 +157,7 @@ class ChunkedStream { // Chars are buffered if either the underlying stream isn't utf-16 or the // underlying utf-16 stream might move (is on-heap). template class ByteStream> -class BufferedCharacterStream : public Utf16CharacterStream { +class BufferedCharacterStream : public CharacterStream { public: template BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) { @@ -194,7 +194,7 @@ class BufferedCharacterStream : public Utf16CharacterStream { // Provides a unbuffered utf-16 view on the bytes from the underlying // ByteStream. template