[scanner] Go back to untemplatized scanning with buffering

This reverts the following 3 CLs:

Revert "[scanner] Templatize scan functions by encoding"
Revert "[asm] Remove invalid static cast of character stream"
Revert "[scanner] Prepare CharacterStreams for specializing scanner and parser by character type"

The original idea behind this work was to avoid copying, converting and
buffering characters to be scanned by specializing the scanner functions. The
additional benefit was for scanner functions to have a bigger window over the
input. Even though we can get a pretty nice speedup from having a larger
window, in practice this rarely helps. The cost is a larger binary.

Since we can't eagerly convert utf8 to utf16 due to memory overhead, we'd also
need to have a specialized version of the scanner just for utf8. That's pretty
complex, and likely won't be better than simply bulk converting and buffering
utf8 as utf16.

Change-Id: Ic3564683932a0097e3f9f51cd88f62c6ac879dcb
Reviewed-on: https://chromium-review.googlesource.com/1183190
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55258}
This commit is contained in:
Toon Verwaest 2018-08-21 11:56:29 +02:00 committed by Commit Bot
parent f30b43ed95
commit fcfd995aa1
20 changed files with 1067 additions and 870 deletions

View File

@ -235,13 +235,13 @@ UnoptimizedCompilationJob::Status AsmJsCompilationJob::ExecuteJobImpl() {
Zone* compile_zone = compilation_info()->zone();
Zone translate_zone(allocator_, ZONE_NAME);
ScannerStream* stream = parse_info()->character_stream();
Utf16CharacterStream* stream = parse_info()->character_stream();
base::Optional<AllowHandleDereference> allow_deref;
if (stream->can_access_heap()) {
allow_deref.emplace();
}
wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream,
compilation_info()->literal()->start_position());
stream->Seek(compilation_info()->literal()->start_position());
wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream);
if (!parser.Run()) {
if (!FLAG_suppress_asm_messages) {
ReportCompilationFailure(parse_info(), parser.failure_location(),

View File

@ -69,9 +69,9 @@ namespace wasm {
#define TOK(name) AsmJsScanner::kToken_##name
AsmJsParser::AsmJsParser(Zone* zone, uintptr_t stack_limit,
ScannerStream* stream, int start)
Utf16CharacterStream* stream)
: zone_(zone),
scanner_(stream, start),
scanner_(stream),
module_builder_(new (zone) WasmModuleBuilder(zone)),
return_type_(nullptr),
stack_limit_(stack_limit),

View File

@ -16,7 +16,7 @@
namespace v8 {
namespace internal {
class ScannerStream;
class Utf16CharacterStream;
namespace wasm {
@ -49,8 +49,8 @@ class AsmJsParser {
typedef EnumSet<StandardMember, uint64_t> StdlibSet;
explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, ScannerStream* stream,
int start);
explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
Utf16CharacterStream* stream);
bool Run();
const char* failure_message() const { return failure_message_; }
int failure_location() const { return failure_location_; }

View File

@ -7,7 +7,6 @@
#include "src/char-predicates-inl.h"
#include "src/conversions.h"
#include "src/flags.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/scanner.h"
#include "src/unicode-cache.h"
@ -20,11 +19,7 @@ namespace {
static const int kMaxIdentifierCount = 0xF000000;
};
#define SPECIALIZE(Call, ...) \
(stream_->is_two_byte() ? Call<uint16_t>(__VA_ARGS__) \
: Call<uint8_t>(__VA_ARGS__))
AsmJsScanner::AsmJsScanner(ScannerStream* stream, int start)
AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
: stream_(stream),
token_(kUninitialized),
preceding_token_(kUninitialized),
@ -38,7 +33,6 @@ AsmJsScanner::AsmJsScanner(ScannerStream* stream, int start)
double_value_(0.0),
unsigned_value_(0),
preceded_by_newline_(false) {
SPECIALIZE(DoSeek, start);
#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V)
@ -55,10 +49,7 @@ AsmJsScanner::AsmJsScanner(ScannerStream* stream, int start)
Next();
}
void AsmJsScanner::Next() { SPECIALIZE(Scan); }
template <typename Char>
void AsmJsScanner::Scan() {
void AsmJsScanner::Next() {
if (rewind_) {
preceding_token_ = token_;
preceding_position_ = position_;
@ -92,8 +83,8 @@ void AsmJsScanner::Scan() {
preceding_position_ = position_;
for (;;) {
position_ = Source<Char>()->pos();
uc32 ch = Advance<Char>();
position_ = stream_->pos();
uc32 ch = stream_->Advance();
switch (ch) {
case ' ':
case '\t':
@ -113,20 +104,20 @@ void AsmJsScanner::Scan() {
case '\'':
case '"':
ConsumeString<Char>(ch);
ConsumeString(ch);
return;
case '/':
ch = Advance<Char>();
ch = stream_->Advance();
if (ch == '/') {
ConsumeCPPComment<Char>();
ConsumeCPPComment();
} else if (ch == '*') {
if (!ConsumeCComment<Char>()) {
if (!ConsumeCComment()) {
token_ = kParseError;
return;
}
} else {
Back<Char>();
stream_->Back();
token_ = '/';
return;
}
@ -138,7 +129,7 @@ void AsmJsScanner::Scan() {
case '>':
case '=':
case '!':
ConsumeCompareOrShift<Char>(ch);
ConsumeCompareOrShift(ch);
return;
#define V(single_char_token) case single_char_token:
@ -150,9 +141,9 @@ void AsmJsScanner::Scan() {
default:
if (IsIdentifierStart(ch)) {
ConsumeIdentifier<Char>(ch);
ConsumeIdentifier(ch);
} else if (IsNumberStart(ch)) {
ConsumeNumber<Char>(ch);
ConsumeNumber(ch);
} else {
// TODO(bradnelson): Support unicode (probably via UnicodeCache).
token_ = kParseError;
@ -220,7 +211,7 @@ std::string AsmJsScanner::Name(token_t token) const {
#endif
void AsmJsScanner::Seek(size_t pos) {
SPECIALIZE(DoSeek, pos);
stream_->Seek(pos);
preceding_token_ = kUninitialized;
token_ = kUninitialized;
next_token_ = kUninitialized;
@ -231,16 +222,15 @@ void AsmJsScanner::Seek(size_t pos) {
Next();
}
template <typename Char>
void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
// Consume characters while still part of the identifier.
identifier_string_.clear();
while (IsIdentifierPart(ch)) {
identifier_string_ += ch;
ch = Advance<Char>();
ch = stream_->Advance();
}
// Go back one for next time.
Back<Char>();
stream_->Back();
// Decode what the identifier means.
if (preceding_token_ == '.') {
@ -280,14 +270,13 @@ void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
}
}
template <typename Char>
void AsmJsScanner::ConsumeNumber(uc32 ch) {
std::string number;
number = ch;
bool has_dot = ch == '.';
bool has_prefix = false;
for (;;) {
ch = Advance<Char>();
ch = stream_->Advance();
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
ch == 'x' ||
@ -306,7 +295,7 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
break;
}
}
Back<Char>();
stream_->Back();
// Special case the most common number.
if (number.size() == 1 && number[0] == '0') {
unsigned_value_ = 0;
@ -333,7 +322,7 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
// problem.
if (number[0] == '.') {
for (size_t k = 1; k < number.size(); ++k) {
Back<Char>();
stream_->Back();
}
token_ = '.';
return;
@ -355,12 +344,11 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
}
}
template <typename Char>
bool AsmJsScanner::ConsumeCComment() {
for (;;) {
uc32 ch = Advance<Char>();
uc32 ch = stream_->Advance();
while (ch == '*') {
ch = Advance<Char>();
ch = stream_->Advance();
if (ch == '/') {
return true;
}
@ -371,36 +359,33 @@ bool AsmJsScanner::ConsumeCComment() {
}
}
template <typename Char>
void AsmJsScanner::ConsumeCPPComment() {
for (;;) {
uc32 ch = Advance<Char>();
uc32 ch = stream_->Advance();
if (ch == '\n' || ch == kEndOfInput) {
return;
}
}
}
template <typename Char>
void AsmJsScanner::ConsumeString(uc32 quote) {
// Only string allowed is 'use asm' / "use asm".
const char* expected = "use asm";
for (; *expected != '\0'; ++expected) {
if (Advance<Char>() != *expected) {
if (stream_->Advance() != *expected) {
token_ = kParseError;
return;
}
}
if (Advance<Char>() != quote) {
if (stream_->Advance() != quote) {
token_ = kParseError;
return;
}
token_ = kToken_UseAsm;
}
template <typename Char>
void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
uc32 next_ch = Advance<Char>();
uc32 next_ch = stream_->Advance();
if (next_ch == '=') {
switch (ch) {
case '<':
@ -421,14 +406,14 @@ void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
} else if (ch == '<' && next_ch == '<') {
token_ = kToken_SHL;
} else if (ch == '>' && next_ch == '>') {
if (Advance<Char>() == '>') {
if (stream_->Advance() == '>') {
token_ = kToken_SHR;
} else {
token_ = kToken_SAR;
Back<Char>();
stream_->Back();
}
} else {
Back<Char>();
stream_->Back();
token_ = ch;
}
}
@ -443,7 +428,5 @@ bool AsmJsScanner::IsNumberStart(uc32 ch) {
return ch == '.' || IsDecimalDigit(ch);
}
#undef SPECIALIZE
} // namespace internal
} // namespace v8

View File

@ -16,9 +16,7 @@
namespace v8 {
namespace internal {
class ScannerStream;
template <typename Char>
class CharacterStream;
class Utf16CharacterStream;
// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
@ -33,7 +31,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
public:
typedef int32_t token_t;
AsmJsScanner(ScannerStream* stream, int start);
explicit AsmJsScanner(Utf16CharacterStream* stream);
// Get current token.
token_t Token() const { return token_; }
@ -138,7 +136,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
// clang-format on
private:
ScannerStream* const stream_;
Utf16CharacterStream* stream_;
token_t token_;
token_t preceding_token_;
token_t next_token_; // Only set when in {rewind} state.
@ -156,37 +154,12 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
uint32_t unsigned_value_;
bool preceded_by_newline_;
template <typename Char>
void Scan();
template <typename Char>
inline CharacterStream<Char>* Source() {
return static_cast<CharacterStream<Char>*>(stream_);
}
template <typename Char>
inline uc32 Advance() {
return Source<Char>()->Advance();
}
template <typename Char>
inline void Back() {
return Source<Char>()->Back();
}
template <typename Char>
void DoSeek(size_t pos) {
Source<Char>()->Seek(pos);
}
// Consume multiple characters.
template <typename Char>
void ConsumeIdentifier(uc32 ch);
template <typename Char>
void ConsumeNumber(uc32 ch);
template <typename Char>
bool ConsumeCComment();
template <typename Char>
void ConsumeCPPComment();
template <typename Char>
void ConsumeString(uc32 quote);
template <typename Char>
void ConsumeCompareOrShift(uc32 ch);
// Classify character categories.

View File

@ -133,7 +133,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
DCHECK(script->type() != Script::TYPE_NATIVE);
Handle<String> source(String::cast(script->source()), isolate);
if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) {
std::unique_ptr<ScannerStream> stream(ScannerStream::For(
std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For(
isolate, source, shared_->StartPosition(), shared_->EndPosition()));
parse_info_->set_character_stream(std::move(stream));
} else {
@ -191,7 +191,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
.ToHandleChecked();
}
wrapper_ = isolate->global_handles()->Create(*wrapper);
std::unique_ptr<ScannerStream> stream(
std::unique_ptr<Utf16CharacterStream> stream(
ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset,
shared_->EndPosition() - offset));
parse_info_->set_character_stream(std::move(stream));

View File

@ -966,7 +966,7 @@ BackgroundCompileTask::BackgroundCompileTask(ScriptStreamingData* source,
info->set_runtime_call_stats(nullptr);
}
info->set_toplevel();
std::unique_ptr<ScannerStream> stream(
std::unique_ptr<Utf16CharacterStream> stream(
ScannerStream::For(source->source_stream.get(), source->encoding,
info->runtime_call_stats()));
info->set_character_stream(std::move(stream));

View File

@ -188,7 +188,7 @@ void ParseInfo::AllocateSourceRangeMap() {
void ParseInfo::ResetCharacterStream() { character_stream_.reset(); }
void ParseInfo::set_character_stream(
std::unique_ptr<ScannerStream> character_stream) {
std::unique_ptr<Utf16CharacterStream> character_stream) {
DCHECK_NULL(character_stream_);
character_stream_.swap(character_stream);
}

View File

@ -31,7 +31,7 @@ class RuntimeCallStats;
class Logger;
class SourceRangeMap;
class UnicodeCache;
class ScannerStream;
class Utf16CharacterStream;
class Zone;
// A container for the inputs, configuration options, and outputs of parsing.
@ -101,8 +101,11 @@ class V8_EXPORT_PRIVATE ParseInfo {
: NO_PARSE_RESTRICTION;
}
ScannerStream* character_stream() const { return character_stream_.get(); }
void set_character_stream(std::unique_ptr<ScannerStream> character_stream);
Utf16CharacterStream* character_stream() const {
return character_stream_.get();
}
void set_character_stream(
std::unique_ptr<Utf16CharacterStream> character_stream);
void ResetCharacterStream();
v8::Extension* extension() const { return extension_; }
@ -270,7 +273,7 @@ class V8_EXPORT_PRIVATE ParseInfo {
MaybeHandle<ScopeInfo> maybe_outer_scope_info_;
//----------- Inputs+Outputs of parsing and scope analysis -----------------
std::unique_ptr<ScannerStream> character_stream_;
std::unique_ptr<Utf16CharacterStream> character_stream_;
ConsumedPreParsedScopeData consumed_preparsed_scope_data_;
std::shared_ptr<AstValueFactory> ast_value_factory_;
const class AstStringConstants* ast_string_constants_;

View File

@ -412,8 +412,7 @@ Parser::Parser(ParseInfo* info)
info->runtime_call_stats(), info->logger(),
info->script().is_null() ? -1 : info->script()->id(),
info->is_module(), true),
scanner_(info->unicode_cache(), info->character_stream(),
info->is_module()),
scanner_(info->unicode_cache()),
reusable_preparser_(nullptr),
mode_(PARSE_EAGERLY), // Lazy mode must be set explicitly.
source_range_map_(info->source_range_map()),
@ -508,8 +507,7 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) {
// Initialize parser state.
DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info());
scanner_.Initialize();
scanner_.Initialize(info->character_stream(), info->is_module());
FunctionLiteral* result = DoParseProgram(isolate, info);
MaybeResetCharacterStream(info, result);
@ -703,8 +701,7 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info,
// Initialize parser state.
Handle<String> name(shared_info->Name(), isolate);
info->set_function_name(ast_value_factory()->GetString(name));
scanner_.Initialize();
scanner_.Initialize(info->character_stream(), info->is_module());
FunctionLiteral* result =
DoParseFunction(isolate, info, info->function_name());
@ -3450,8 +3447,7 @@ void Parser::ParseOnBackground(ParseInfo* info) {
DCHECK_NULL(info->literal());
FunctionLiteral* result = nullptr;
scanner_.Initialize();
scanner_.Initialize(info->character_stream(), info->is_module());
DCHECK(info->maybe_outer_scope_info().is_null());
DCHECK(original_scope_);

View File

@ -26,7 +26,8 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) {
// Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<ScannerStream> stream(ScannerStream::For(isolate, source));
std::unique_ptr<Utf16CharacterStream> stream(
ScannerStream::For(isolate, source));
info->set_character_stream(std::move(stream));
Parser parser(info);
@ -60,7 +61,7 @@ bool ParseFunction(ParseInfo* info, Handle<SharedFunctionInfo> shared_info,
// Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<ScannerStream> stream(
std::unique_ptr<Utf16CharacterStream> stream(
ScannerStream::For(isolate, source, shared_info->StartPosition(),
shared_info->EndPosition()));
info->set_character_stream(std::move(stream));

View File

@ -250,45 +250,82 @@ class Utf8ChunkedStream : public ChunkedStream<uint16_t> {
bool seen_bom_ = false;
};
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <typename Char, template <typename T> class ByteStream>
class UnbufferedCharacterStream : public CharacterStream<Char> {
// Provides a buffered utf-16 view on the bytes from the underlying ByteStream.
// Chars are buffered if either the underlying stream isn't utf-16 or the
// underlying utf-16 stream might move (is on-heap).
template <template <typename T> class ByteStream>
class BufferedCharacterStream : public Utf16CharacterStream {
public:
template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
this->buffer_pos_ = pos;
BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
buffer_pos_ = pos;
}
protected:
bool ReadBlock() final {
size_t position = this->pos();
this->buffer_pos_ = position;
Range<Char> range = this->byte_stream_.GetDataAt(position);
this->buffer_start_ = range.start;
this->buffer_end_ = range.end;
this->buffer_cursor_ = range.start;
if (range.length() == 0) return false;
size_t position = pos();
buffer_pos_ = position;
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
DCHECK(!range.unaligned_start());
DCHECK_LE(this->buffer_start_, this->buffer_end_);
Range<uint8_t> range = byte_stream_.GetDataAt(position);
if (range.length() == 0) {
buffer_end_ = buffer_start_;
return false;
}
size_t length = Min(kBufferSize, range.length());
i::CopyCharsUnsigned(buffer_, range.start, length);
buffer_end_ = &buffer_[length];
return true;
}
bool can_access_heap() final { return ByteStream<Char>::kCanAccessHeap; }
bool can_access_heap() final { return ByteStream<uint8_t>::kCanAccessHeap; }
ByteStream<Char> byte_stream_;
private:
static const size_t kBufferSize = 512;
uc16 buffer_[kBufferSize];
ByteStream<uint8_t> byte_stream_;
};
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public Utf16CharacterStream {
public:
template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
buffer_pos_ = pos;
}
protected:
bool ReadBlock() final {
size_t position = pos();
buffer_pos_ = position;
Range<uint16_t> range = byte_stream_.GetDataAt(position);
buffer_start_ = range.start;
buffer_end_ = range.end;
buffer_cursor_ = buffer_start_;
if (range.length() == 0) return false;
DCHECK(!range.unaligned_start());
DCHECK_LE(buffer_start_, buffer_end_);
return true;
}
bool can_access_heap() final { return ByteStream<uint16_t>::kCanAccessHeap; }
ByteStream<uint16_t> byte_stream_;
};
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <typename Char>
class RelocatingCharacterStream
: public UnbufferedCharacterStream<Char, OnHeapStream> {
: public UnbufferedCharacterStream<OnHeapStream> {
public:
template <class... TArgs>
RelocatingCharacterStream(Isolate* isolate, size_t pos, TArgs... args)
: UnbufferedCharacterStream<Char, OnHeapStream>(pos, args...),
: UnbufferedCharacterStream<OnHeapStream>(pos, args...),
isolate_(isolate) {
isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
v8::kGCTypeAll, this);
@ -304,46 +341,353 @@ class RelocatingCharacterStream
v8::GCType type,
v8::GCCallbackFlags flags,
void* stream) {
reinterpret_cast<RelocatingCharacterStream<Char>*>(stream)
reinterpret_cast<RelocatingCharacterStream*>(stream)
->UpdateBufferPointers();
}
void UpdateBufferPointers() {
Range<Char> range = this->byte_stream_.GetDataAt(0);
if (range.start != this->buffer_start_) {
this->buffer_cursor_ =
(this->buffer_cursor_ - this->buffer_start_) + range.start;
this->buffer_start_ = range.start;
this->buffer_end_ = range.end;
Range<uint16_t> range = byte_stream_.GetDataAt(0);
if (range.start != buffer_start_) {
buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
buffer_start_ = range.start;
buffer_end_ = range.end;
}
}
Isolate* isolate_;
};
// ----------------------------------------------------------------------------
// BufferedUtf16CharacterStreams
//
// A buffered character stream based on a random access character
// source (ReadBlock can be called with pos() pointing to any position,
// even positions before the current).
//
// TODO(verwaest): Remove together with Utf8 external streaming streams.
class BufferedUtf16CharacterStream : public Utf16CharacterStream {
public:
BufferedUtf16CharacterStream();
protected:
static const size_t kBufferSize = 512;
bool ReadBlock() final;
// FillBuffer should read up to kBufferSize characters at position and store
// them into buffer_[0..]. It returns the number of characters stored.
virtual size_t FillBuffer(size_t position) = 0;
// Fixed sized buffer that this class reads from.
// The base class' buffer_start_ should always point to buffer_.
uc16 buffer_[kBufferSize];
};
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
: Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
bool BufferedUtf16CharacterStream::ReadBlock() {
DCHECK_EQ(buffer_start_, buffer_);
size_t position = pos();
buffer_pos_ = position;
buffer_cursor_ = buffer_;
buffer_end_ = buffer_ + FillBuffer(position);
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
return buffer_cursor_ < buffer_end_;
}
// ----------------------------------------------------------------------------
// Utf8ExternalStreamingStream - chunked streaming of Utf-8 data.
//
// This implementation is fairly complex, since data arrives in chunks which
// may 'cut' arbitrarily into utf-8 characters. Also, seeking to a given
// character position is tricky because the byte position cannot be dericed
// from the character position.
//
// TODO(verwaest): Decode utf8 chunks into utf16 chunks on the blink side
// instead so we don't need to buffer.
class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
public:
Utf8ExternalStreamingStream(
ScriptCompiler::ExternalSourceStream* source_stream,
RuntimeCallStats* stats)
: current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
source_stream_(source_stream),
stats_(stats) {}
~Utf8ExternalStreamingStream() final {
for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data;
}
bool can_access_heap() final { return false; }
protected:
size_t FillBuffer(size_t position) final;
private:
// A position within the data stream. It stores:
// - The 'physical' position (# of bytes in the stream),
// - the 'logical' position (# of ucs-2 characters, also within the stream),
// - a possibly incomplete utf-8 char at the current 'physical' position.
struct StreamPosition {
size_t bytes;
size_t chars;
uint32_t incomplete_char;
unibrow::Utf8::State state;
};
// Position contains a StreamPosition and the index of the chunk the position
// points into. (The chunk_no could be derived from pos, but that'd be
// an expensive search through all chunks.)
struct Position {
size_t chunk_no;
StreamPosition pos;
};
// A chunk in the list of chunks, containing:
// - The chunk data (data pointer and length), and
// - the position at the first byte of the chunk.
struct Chunk {
const uint8_t* data;
size_t length;
StreamPosition start;
};
// Within the current chunk, skip forward from current_ towards position.
bool SkipToPosition(size_t position);
// Within the current chunk, fill the buffer_ (while it has capacity).
void FillBufferFromCurrentChunk();
// Fetch a new chunk (assuming current_ is at the end of the current data).
bool FetchChunk();
// Search through the chunks and set current_ to point to the given position.
// (This call is potentially expensive.)
void SearchPosition(size_t position);
std::vector<Chunk> chunks_;
Position current_;
ScriptCompiler::ExternalSourceStream* source_stream_;
RuntimeCallStats* stats_;
};
bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
DCHECK_LE(current_.pos.chars, position); // We can only skip forward.
// Already there? Then return immediately.
if (current_.pos.chars == position) return true;
const Chunk& chunk = chunks_[current_.chunk_no];
DCHECK(current_.pos.bytes >= chunk.start.bytes);
unibrow::Utf8::State state = chunk.start.state;
uint32_t incomplete_char = chunk.start.incomplete_char;
size_t it = current_.pos.bytes - chunk.start.bytes;
size_t chars = chunk.start.chars;
while (it < chunk.length && chars < position) {
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
chunk.data[it], &it, &state, &incomplete_char);
if (t == kUtf8Bom && current_.pos.chars == 0) {
// BOM detected at beginning of the stream. Don't copy it.
} else if (t != unibrow::Utf8::kIncomplete) {
chars++;
if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
}
}
current_.pos.bytes += it;
current_.pos.chars = chars;
current_.pos.incomplete_char = incomplete_char;
current_.pos.state = state;
current_.chunk_no += (it == chunk.length);
return current_.pos.chars == position;
}
void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
DCHECK_LT(current_.chunk_no, chunks_.size());
DCHECK_EQ(buffer_start_, buffer_cursor_);
DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
const Chunk& chunk = chunks_[current_.chunk_no];
// The buffer_ is writable, but buffer_*_ members are const. So we get a
// non-const pointer into buffer that points to the same char as buffer_end_.
uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
DCHECK_EQ(cursor, buffer_end_);
unibrow::Utf8::State state = current_.pos.state;
uint32_t incomplete_char = current_.pos.incomplete_char;
// If the current chunk is the last (empty) chunk we'll have to process
// any left-over, partial characters.
if (chunk.length == 0) {
unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
if (t != unibrow::Utf8::kBufferEmpty) {
DCHECK_EQ(t, unibrow::Utf8::kBadChar);
*cursor = static_cast<uc16>(t);
buffer_end_++;
current_.pos.chars++;
current_.pos.incomplete_char = 0;
current_.pos.state = state;
}
return;
}
size_t it = current_.pos.bytes - chunk.start.bytes;
while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
chunk.data[it], &it, &state, &incomplete_char);
if (V8_LIKELY(t < kUtf8Bom)) {
*(cursor++) = static_cast<uc16>(t); // The by most frequent case.
} else if (t == unibrow::Utf8::kIncomplete) {
continue;
} else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
// BOM detected at beginning of the stream. Don't copy it.
} else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
*(cursor++) = static_cast<uc16>(t);
} else {
*(cursor++) = unibrow::Utf16::LeadSurrogate(t);
*(cursor++) = unibrow::Utf16::TrailSurrogate(t);
}
}
current_.pos.bytes = chunk.start.bytes + it;
current_.pos.chars += (cursor - buffer_end_);
current_.pos.incomplete_char = incomplete_char;
current_.pos.state = state;
current_.chunk_no += (it == chunk.length);
buffer_end_ = cursor;
}
bool Utf8ExternalStreamingStream::FetchChunk() {
RuntimeCallTimerScope scope(stats_,
RuntimeCallCounterId::kGetMoreDataCallback);
DCHECK_EQ(current_.chunk_no, chunks_.size());
DCHECK(chunks_.empty() || chunks_.back().length != 0);
const uint8_t* chunk = nullptr;
size_t length = source_stream_->GetMoreData(&chunk);
chunks_.push_back({chunk, length, current_.pos});
return length > 0;
}
void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
// If current_ already points to the right position, we're done.
//
// This is expected to be the common case, since we typically call
// FillBuffer right after the current buffer.
if (current_.pos.chars == position) return;
// No chunks. Fetch at least one, so we can assume !chunks_.empty() below.
if (chunks_.empty()) {
DCHECK_EQ(current_.chunk_no, 0u);
DCHECK_EQ(current_.pos.bytes, 0u);
DCHECK_EQ(current_.pos.chars, 0u);
FetchChunk();
}
// Search for the last chunk whose start position is less or equal to
// position.
size_t chunk_no = chunks_.size() - 1;
while (chunk_no > 0 && chunks_[chunk_no].start.chars > position) {
chunk_no--;
}
// Did we find the terminating (zero-length) chunk? Then we're seeking
// behind the end of the data, and position does not exist.
// Set current_ to point to the terminating chunk.
if (chunks_[chunk_no].length == 0) {
current_ = {chunk_no, chunks_[chunk_no].start};
return;
}
// Did we find the non-last chunk? Then our position must be within chunk_no.
if (chunk_no + 1 < chunks_.size()) {
// Fancy-pants optimization for ASCII chunks within a utf-8 stream.
// (Many web sites declare utf-8 encoding, but use only (or almost only) the
// ASCII subset for their JavaScript sources. We can exploit this, by
// checking whether the # bytes in a chunk are equal to the # chars, and if
// so avoid the expensive SkipToPosition.)
bool ascii_only_chunk =
chunks_[chunk_no].start.incomplete_char == 0 &&
(chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
(chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
if (ascii_only_chunk) {
size_t skip = position - chunks_[chunk_no].start.chars;
current_ = {chunk_no,
{chunks_[chunk_no].start.bytes + skip,
chunks_[chunk_no].start.chars + skip, 0,
unibrow::Utf8::State::kAccept}};
} else {
current_ = {chunk_no, chunks_[chunk_no].start};
SkipToPosition(position);
}
// Since position was within the chunk, SkipToPosition should have found
// something.
DCHECK_EQ(position, current_.pos.chars);
return;
}
// What's left: We're in the last, non-terminating chunk. Our position
// may be in the chunk, but it may also be in 'future' chunks, which we'll
// have to obtain.
DCHECK_EQ(chunk_no, chunks_.size() - 1);
current_ = {chunk_no, chunks_[chunk_no].start};
bool have_more_data = true;
bool found = SkipToPosition(position);
while (have_more_data && !found) {
DCHECK_EQ(current_.chunk_no, chunks_.size());
have_more_data = FetchChunk();
found = have_more_data && SkipToPosition(position);
}
// We'll return with a postion != the desired position only if we're out
// of data. In that case, we'll point to the terminating chunk.
DCHECK_EQ(found, current_.pos.chars == position);
DCHECK_EQ(have_more_data, chunks_.back().length != 0);
DCHECK_IMPLIES(!found, !have_more_data);
DCHECK_IMPLIES(!found, current_.chunk_no == chunks_.size() - 1);
}
size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
buffer_cursor_ = buffer_;
buffer_end_ = buffer_;
SearchPosition(position);
bool out_of_data = current_.chunk_no != chunks_.size() &&
chunks_[current_.chunk_no].length == 0 &&
current_.pos.incomplete_char == 0;
if (out_of_data) return 0;
// Fill the buffer, until we have at least one char (or are out of data).
// (The embedder might give us 1-byte blocks within a utf-8 char, so we
// can't guarantee progress with one chunk. Thus we iterate.)
while (!out_of_data && buffer_cursor_ == buffer_end_) {
// At end of current data, but there might be more? Then fetch it.
if (current_.chunk_no == chunks_.size()) {
out_of_data = !FetchChunk();
}
FillBufferFromCurrentChunk();
}
DCHECK_EQ(current_.pos.chars - position,
static_cast<size_t>(buffer_end_ - buffer_cursor_));
return buffer_end_ - buffer_cursor_;
}
// ----------------------------------------------------------------------------
// ScannerStream: Create stream instances.
#define SPECIALIZE(Call, ...) \
(is_two_byte_ \
? static_cast<CharacterStream<uint16_t>*>(this)->Call(__VA_ARGS__) \
: static_cast<CharacterStream<uint8_t>*>(this)->Call(__VA_ARGS__))
uc32 ScannerStream::Advance() { return SPECIALIZE(Advance); }
void ScannerStream::Seek(size_t pos) { SPECIALIZE(Seek, pos); }
size_t ScannerStream::pos() { return SPECIALIZE(pos); }
void ScannerStream::Back() { SPECIALIZE(Back); }
#undef SPECIALIZE
ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data) {
Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
Handle<String> data) {
return ScannerStream::For(isolate, data, 0, data->length());
}
ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos) {
DCHECK_GE(start_pos, 0);
DCHECK_LE(start_pos, end_pos);
@ -359,22 +703,21 @@ ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
data = String::Flatten(isolate, data);
}
if (data->IsExternalOneByteString()) {
return new UnbufferedCharacterStream<uint8_t, ExternalStringStream>(
return new BufferedCharacterStream<ExternalStringStream>(
static_cast<size_t>(start_pos),
ExternalOneByteString::cast(*data)->GetChars() + start_offset,
static_cast<size_t>(end_pos));
} else if (data->IsExternalTwoByteString()) {
return new UnbufferedCharacterStream<uint16_t, ExternalStringStream>(
return new UnbufferedCharacterStream<ExternalStringStream>(
static_cast<size_t>(start_pos),
ExternalTwoByteString::cast(*data)->GetChars() + start_offset,
static_cast<size_t>(end_pos));
} else if (data->IsSeqOneByteString()) {
return new RelocatingCharacterStream<uint8_t>(
isolate, static_cast<size_t>(start_pos),
Handle<SeqOneByteString>::cast(data), start_offset,
static_cast<size_t>(end_pos));
return new BufferedCharacterStream<OnHeapStream>(
static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
start_offset, static_cast<size_t>(end_pos));
} else if (data->IsSeqTwoByteString()) {
return new RelocatingCharacterStream<uint16_t>(
return new RelocatingCharacterStream(
isolate, static_cast<size_t>(start_pos),
Handle<SeqTwoByteString>::cast(data), start_offset,
static_cast<size_t>(end_pos));
@ -383,33 +726,32 @@ ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
}
}
std::unique_ptr<CharacterStream<uint8_t>> ScannerStream::ForTesting(
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
const char* data) {
return ScannerStream::ForTesting(data, strlen(data));
}
std::unique_ptr<CharacterStream<uint8_t>> ScannerStream::ForTesting(
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
const char* data, size_t length) {
return std::unique_ptr<CharacterStream<uint8_t>>(
new UnbufferedCharacterStream<uint8_t, ExternalStringStream>(
return std::unique_ptr<Utf16CharacterStream>(
new BufferedCharacterStream<ExternalStringStream>(
static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
static_cast<size_t>(length)));
}
ScannerStream* ScannerStream::For(
Utf16CharacterStream* ScannerStream::For(
ScriptCompiler::ExternalSourceStream* source_stream,
v8::ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats) {
switch (encoding) {
case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
return new UnbufferedCharacterStream<uint16_t, ChunkedStream>(
return new UnbufferedCharacterStream<ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
return new UnbufferedCharacterStream<uint8_t, ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
return new BufferedCharacterStream<ChunkedStream>(static_cast<size_t>(0),
source_stream, stats);
case v8::ScriptCompiler::StreamedSource::UTF8:
return new UnbufferedCharacterStream<uint16_t, Utf8ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
return new Utf8ExternalStreamingStream(source_stream, stats);
}
UNREACHABLE();
}

View File

@ -5,8 +5,6 @@
#ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#include <algorithm>
#include "include/v8.h" // for v8::ScriptCompiler
#include "src/globals.h"
@ -15,178 +13,23 @@ namespace internal {
template <typename T>
class Handle;
template <typename Char>
class CharacterStream;
class Utf16CharacterStream;
class RuntimeCallStats;
class String;
class V8_EXPORT_PRIVATE ScannerStream {
public:
static const uc32 kEndOfInput = -1;
static ScannerStream* For(Isolate* isolate, Handle<String> data);
static ScannerStream* For(Isolate* isolate, Handle<String> data,
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data);
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos);
static ScannerStream* For(ScriptCompiler::ExternalSourceStream* source_stream,
static Utf16CharacterStream* For(
ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats);
// For testing:
static std::unique_ptr<CharacterStream<uint8_t>> ForTesting(const char* data);
static std::unique_ptr<CharacterStream<uint8_t>> ForTesting(const char* data,
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data);
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data,
size_t length);
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
uc32 Advance();
void Seek(size_t pos);
size_t pos();
void Back();
void Back2();
virtual ~ScannerStream() {}
bool is_two_byte() const { return is_two_byte_; }
protected:
explicit ScannerStream(bool is_two_byte) : is_two_byte_(is_two_byte) {}
private:
const bool is_two_byte_;
};
template <typename Char>
class CharacterStream : public ScannerStream {
public:
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() {
uc32 result = Peek();
buffer_cursor_++;
return result;
}
inline uc32 Peek() {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*buffer_cursor_);
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*buffer_cursor_);
} else {
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](Char raw_c0) {
uc32 c0 = static_cast<uc32>(raw_c0);
return check(c0);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
inline size_t pos() const {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
protected:
CharacterStream(const Char* buffer_start, const Char* buffer_cursor,
const Char* buffer_end, size_t buffer_pos)
: ScannerStream(sizeof(Char) == 2),
buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
CharacterStream() : CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const Char* buffer_start_;
const Char* buffer_cursor_;
const Char* buffer_end_;
size_t buffer_pos_;
};
} // namespace internal

View File

@ -11,9 +11,8 @@
namespace v8 {
namespace internal {
template <typename Char>
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
int start_position = SourcePos<Char>();
int start_position = source_pos();
while (true) {
// We won't skip behind the end of input.
@ -26,11 +25,11 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break;
}
Advance<Char>();
Advance();
}
// Return whether or not we skipped any characters.
if (SourcePos<Char>() == start_position) {
if (source_pos() == start_position) {
DCHECK_NE('0', c0_);
return Token::ILLEGAL;
}

File diff suppressed because it is too large Load Diff

View File

@ -7,12 +7,13 @@
#ifndef V8_PARSING_SCANNER_H_
#define V8_PARSING_SCANNER_H_
#include <algorithm>
#include "src/allocation.h"
#include "src/base/logging.h"
#include "src/char-predicates.h"
#include "src/globals.h"
#include "src/messages.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/token.h"
#include "src/unicode-decoder.h"
#include "src/unicode.h"
@ -29,6 +30,150 @@ class ExternalTwoByteString;
class ParserRecorder;
class UnicodeCache;
// ---------------------------------------------------------------------
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
// A code unit is a 16 bit value representing either a 16 bit code point
// or one part of a surrogate pair that make a single 21 bit code point.
class Utf16CharacterStream {
public:
static const uc32 kEndOfInput = -1;
virtual ~Utf16CharacterStream() {}
inline uc32 Peek() {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*buffer_cursor_);
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*buffer_cursor_);
} else {
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() {
uc32 result = Peek();
buffer_cursor_++;
return result;
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
uc32 c0_ = static_cast<uc32>(raw_c0_);
return check(c0_);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
inline size_t pos() const {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
Utf16CharacterStream(const uint16_t* buffer_start,
const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const uint16_t* buffer_start_;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
size_t buffer_pos_;
};
// ----------------------------------------------------------------------------
// JavaScript Scanner.
@ -76,10 +221,11 @@ class Scanner {
// -1 is outside of the range of any real source code.
static const int kNoOctalLocation = -1;
static const uc32 kEndOfInput = ScannerStream::kEndOfInput;
static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
Scanner(UnicodeCache* scanner_contants, ScannerStream* source,
bool is_module);
explicit Scanner(UnicodeCache* scanner_contants);
void Initialize(Utf16CharacterStream* source, bool is_module);
// Returns the next token and advances input.
Token::Value Next();
@ -209,18 +355,18 @@ class Scanner {
return has_line_terminator_after_next_;
}
#define SPECIALIZE(Call) \
(source_->is_two_byte() ? Call<uint16_t>() : Call<uint8_t>())
// Scans the input as a regular expression pattern, next token must be /(=).
// Returns true if a pattern is scanned.
bool ScanRegExpPattern() { return SPECIALIZE(ScanRegExpPattern); }
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<RegExp::Flags> ScanRegExpFlags() { return SPECIALIZE(ScanRegExpFlags); }
Maybe<RegExp::Flags> ScanRegExpFlags();
// Scans the input as a template literal
Token::Value ScanTemplateStart() { return SPECIALIZE(ScanTemplateStart); }
Token::Value ScanTemplateStart();
Token::Value ScanTemplateContinuation() {
return SPECIALIZE(ScanTemplateContinuation);
DCHECK_EQ(next_.token, Token::RBRACE);
next_.location.beg_pos = source_pos() - 1; // We already consumed }
return ScanTemplateSpan();
}
Handle<String> SourceUrl(Isolate* isolate) const;
@ -243,34 +389,6 @@ class Scanner {
allow_harmony_numeric_separator_ = allow;
}
// Call this after setting source_ to the input.
void Initialize() {
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = nullptr;
current_.raw_literal_chars = nullptr;
current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = nullptr;
next_next_.raw_literal_chars = nullptr;
next_next_.invalid_template_escape_message = MessageTemplate::kNone;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
// Set c0_ (one character ahead)
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
SPECIALIZE(Advance);
// Scan the first token.
SPECIALIZE(Scan);
}
private:
// Scoped helper for saving & restoring scanner error state.
// This is used for tagged template literals, in which normally forbidden
@ -405,10 +523,33 @@ class Scanner {
const int kMaxAscii = 127;
// Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
template <typename Char, bool capture_raw>
template <bool capture_raw>
uc32 ScanOctalEscape(uc32 c, int length);
#undef SPECIALIZE
// Call this after setting source_ to the input.
void Init() {
// Set c0_ (one character ahead)
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = nullptr;
current_.raw_literal_chars = nullptr;
current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = nullptr;
next_next_.raw_literal_chars = nullptr;
next_next_.invalid_template_escape_message = MessageTemplate::kNone;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
}
void ReportScannerError(const Location& location,
MessageTemplate::Template error) {
@ -470,77 +611,61 @@ class Scanner {
next_.raw_literal_chars = nullptr;
}
template <typename Char>
inline void AddLiteralCharAdvance() {
AddLiteralChar(c0_);
Advance<Char>();
}
template <typename Char>
CharacterStream<Char>* Source() {
return static_cast<CharacterStream<Char>*>(source_);
}
template <typename Char>
void Seek(size_t pos) {
Source<Char>()->Seek(pos);
Advance();
}
// Low-level scanning support.
template <typename Char, bool capture_raw = false>
template <bool capture_raw = false>
void Advance() {
if (capture_raw) AddRawLiteralChar(c0_);
c0_ = Source<Char>()->Advance();
if (capture_raw) {
AddRawLiteralChar(c0_);
}
c0_ = source_->Advance();
}
template <typename Char, typename FunctionType>
template <typename FunctionType>
V8_INLINE void AdvanceUntil(FunctionType check) {
c0_ = Source<Char>()->AdvanceUntil(check);
c0_ = source_->AdvanceUntil(check);
}
template <typename Char>
bool CombineSurrogatePair() {
if (sizeof(Char) == 1) return false;
DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = Source<Char>()->Advance();
uc32 c1 = source_->Advance();
DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
if (unibrow::Utf16::IsTrailSurrogate(c1)) {
c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
return true;
}
Source<Char>()->Back();
source_->Back();
}
return false;
}
template <typename Char>
void PushBack(uc32 ch) {
DCHECK_LE(c0_, static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode));
source_->Back();
c0_ = ch;
}
template <typename Char>
uc32 Peek() {
return Source<Char>()->Peek();
}
uc32 Peek() const { return source_->Peek(); }
template <typename Char>
inline Token::Value Select(Token::Value tok) {
Advance<Char>();
Advance();
return tok;
}
template <typename Char>
inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
Advance<Char>();
Advance();
if (c0_ == next) {
Advance<Char>();
Advance();
return then;
}
} else {
return else_;
}
}
// Returns the literal string, if any, for the current token (the
// token last returned by Next()). The string is 0-terminated.
// Literal strings are collected for identifiers, strings, numbers as well
@ -596,111 +721,65 @@ class Scanner {
return current_.raw_literal_chars->is_one_byte();
}
template <typename Char, bool capture_raw, bool unicode = false>
template <bool capture_raw, bool unicode = false>
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
// number can be 000000001, so it's very long in characters but its value is
// small.
template <typename Char, bool capture_raw>
template <bool capture_raw>
uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
// Scans a single JavaScript token.
template <typename Char>
void Scan();
template <typename Char>
V8_INLINE Token::Value SkipWhiteSpace();
template <typename Char>
Token::Value SkipSingleHTMLComment();
template <typename Char>
Token::Value SkipSingleLineComment();
template <typename Char>
Token::Value SkipSourceURLComment();
template <typename Char>
void TryToParseSourceURLComment();
template <typename Char>
Token::Value SkipMultiLineComment();
// Scans a possible HTML comment -- begins with '<!'.
template <typename Char>
Token::Value ScanHtmlComment();
template <typename Char>
bool ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
bool is_check_first_digit);
template <typename Char>
bool ScanDecimalDigits();
// Optimized function to scan decimal number as Smi.
template <typename Char>
bool ScanDecimalAsSmi(uint64_t* value);
template <typename Char>
bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value);
template <typename Char>
bool ScanHexDigits();
template <typename Char>
bool ScanBinaryDigits();
template <typename Char>
bool ScanSignedInteger();
template <typename Char>
bool ScanOctalDigits();
template <typename Char>
bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind);
template <typename Char>
Token::Value ScanNumber(bool seen_period);
template <typename Char>
inline Token::Value ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner<Char>(&literal);
}
template <typename Char>
Token::Value ScanIdentifierOrKeyword();
Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
template <typename Char>
Token::Value ScanString();
template <typename Char>
Token::Value ScanPrivateName();
// Scans an escape-sequence which is part of a string and adds the
// decoded character to the current literal. Returns true if a pattern
// is scanned.
template <typename Char, bool capture_raw>
template <bool capture_raw>
bool ScanEscape();
// Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
template <typename Char>
uc32 ScanIdentifierUnicodeEscape();
// Helper for the above functions.
template <typename Char, bool capture_raw>
template <bool capture_raw>
uc32 ScanUnicodeEscape();
template <typename Char>
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
template <typename Char>
Maybe<RegExp::Flags> ScanRegExpFlags();
// Scans the input as a template literal
template <typename Char>
Token::Value ScanTemplateStart();
template <typename Char>
Token::Value ScanTemplateContinuation() {
DCHECK_EQ(next_.token, Token::RBRACE);
next_.location.beg_pos = SourcePos<Char>() - 1; // We already consumed }
return ScanTemplateSpan<Char>();
}
bool is_module_;
template <typename Char>
Token::Value ScanTemplateSpan();
// Return the current source position.
template <typename Char>
int SourcePos() {
return static_cast<int>(Source<Char>()->pos()) -
kCharacterLookaheadBufferSize;
int source_pos() {
return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
}
static bool LiteralContainsEscapes(const TokenDesc& token) {
@ -738,8 +817,8 @@ class Scanner {
TokenDesc next_; // desc for next token (one token look-ahead)
TokenDesc next_next_; // desc for the token after next (after PeakAhead())
// Input stream. Must be initialized to a ScannerStream.
ScannerStream* const source_;
// Input stream. Must be initialized to an Utf16CharacterStream.
Utf16CharacterStream* source_;
// Last-seen positions of potentially problematic tokens.
Location octal_pos_;

View File

@ -12,29 +12,30 @@ namespace {
// Implement ExternalSourceStream based on const char**.
// This will take each string as one chunk. The last chunk must be empty.
template <typename Char = char>
class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
public:
explicit ChunkSource(const Char** chunks) : current_(0) {
explicit ChunkSource(const char** chunks) : current_(0) {
do {
chunks_.push_back({*chunks, string_length(*chunks)});
chunks_.push_back(
{reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
chunks++;
} while (chunks_.back().len > 0);
}
explicit ChunkSource(const Char* chunks) : current_(0) {
explicit ChunkSource(const char* chunks) : current_(0) {
do {
size_t length = string_length(chunks);
chunks_.push_back({chunks, length});
chunks += length + 1;
chunks_.push_back(
{reinterpret_cast<const uint8_t*>(chunks), strlen(chunks)});
chunks += strlen(chunks) + 1;
} while (chunks_.back().len > 0);
}
ChunkSource(const Char* data, size_t len, bool extra_chunky) : current_(0) {
ChunkSource(const uint8_t* data, size_t char_size, size_t len,
bool extra_chunky)
: current_(0) {
// If extra_chunky, we'll use increasingly large chunk sizes. If not, we'll
// have a single chunk of full length. Make sure that chunks are always
// aligned to char-size though.
size_t chunk_size = extra_chunky ? 1 : len;
for (size_t i = 0; i < len; i += chunk_size, chunk_size += 1) {
size_t chunk_size = extra_chunky ? char_size : len;
for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
}
chunks_.push_back({nullptr, 0});
@ -45,23 +46,15 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
size_t GetMoreData(const uint8_t** src) override {
DCHECK_LT(current_, chunks_.size());
Chunk& next = chunks_[current_++];
const uint8_t* chunk = reinterpret_cast<const uint8_t*>(next.ptr);
size_t bytelength = next.len * sizeof(Char);
uint8_t* copy = new uint8_t[bytelength];
i::MemMove(copy, chunk, bytelength);
*src = copy;
return bytelength;
uint8_t* chunk = new uint8_t[next.len];
i::MemMove(chunk, next.ptr, next.len);
*src = chunk;
return next.len;
}
private:
static size_t string_length(const Char* string) {
size_t length = 0;
while (*(string++) != 0) length++;
return length;
}
struct Chunk {
const Char* ptr;
const uint8_t* ptr;
size_t len;
};
std::vector<Chunk> chunks_;
@ -112,15 +105,16 @@ const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357,
TEST(Utf8StreamAsciiOnly) {
const char* chunks[] = {"abc", "def", "ghi", ""};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(i::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
// Read the data without dying.
v8::internal::uc32 c;
do {
c = stream->Advance();
} while (c != i::ScannerStream::kEndOfInput);
} while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
}
TEST(Utf8StreamBOM) {
@ -129,15 +123,16 @@ TEST(Utf8StreamBOM) {
strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));
const char* chunks[] = {data, "\0"};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
// Read the data without tripping over the BOM.
for (size_t i = 0; unicode_ucs2[i]; i++) {
CHECK_EQ(unicode_ucs2[i], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());
// Make sure seek works.
stream->Seek(0);
@ -147,7 +142,7 @@ TEST(Utf8StreamBOM) {
CHECK_EQ(unicode_ucs2[5], stream->Advance());
// Try again, but make sure we have to seek 'backwards'.
while (i::ScannerStream::kEndOfInput != stream->Advance()) {
while (v8::internal::Utf16CharacterStream::kEndOfInput != stream->Advance()) {
// Do nothing. We merely advance the stream to the end of its input.
}
stream->Seek(5);
@ -162,8 +157,9 @@ TEST(Utf8SplitBOM) {
{
const char* chunks[] = {partial_bom, data, "\0"};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
// Read the data without tripping over the BOM.
@ -177,8 +173,9 @@ TEST(Utf8SplitBOM) {
char bom_byte_2[] = "\xbb";
{
const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
// Read the data without tripping over the BOM.
@ -191,36 +188,35 @@ TEST(Utf8SplitBOM) {
TEST(Utf8SplitMultiBOM) {
// Construct chunks with a split BOM followed by another split BOM.
const char* chunks = "\xef\xbb\0\xbf\xef\xbb\0\xbf\0\0";
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<i::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
// Read the data, ensuring we get exactly one of the two BOMs back.
CHECK_EQ(0xFEFF, stream->Advance());
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
TEST(Ucs2AdvanceUntil) {
TEST(Utf8AdvanceUntil) {
// Test utf-8 advancing until a certain char.
const char line_term = '\n';
const size_t kLen = arraysize(unicode_ucs2) - 1;
uint16_t data[kLen + 1];
memcpy(data, unicode_ucs2, kLen * 2);
const size_t kLen = arraysize(unicode_utf8);
char data[kLen + 1];
strncpy(data, unicode_utf8, kLen);
data[kLen - 1] = line_term;
data[kLen] = '\0';
{
uint16_t end[] = {0};
const uint16_t* chunks[] = {data, end};
ChunkSource<uint16_t> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
const char* chunks[] = {data, "\0"};
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
int32_t res = static_cast<i::CharacterStream<uint16_t>*>(stream.get())
->AdvanceUntil([](int32_t c0) {
return unibrow::IsLineTerminator(c0);
});
int32_t res = stream->AdvanceUntil(
[](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
CHECK_EQ(line_term, res);
}
}
@ -232,71 +228,68 @@ TEST(AdvanceMatchAdvanceUntil) {
{
const char* chunks[] = {data, "\0"};
ChunkSource<char> chunk_source_a(chunks);
ChunkSource chunk_source_a(chunks);
std::unique_ptr<v8::internal::ScannerStream> stream_advance(
std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance(
v8::internal::ScannerStream::For(
&chunk_source_a, v8::ScriptCompiler::StreamedSource::ONE_BYTE,
&chunk_source_a, v8::ScriptCompiler::StreamedSource::UTF8,
nullptr));
ChunkSource<char> chunk_source_au(chunks);
std::unique_ptr<v8::internal::ScannerStream> stream_advance_until(
ChunkSource chunk_source_au(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance_until(
v8::internal::ScannerStream::For(
&chunk_source_au, v8::ScriptCompiler::StreamedSource::ONE_BYTE,
&chunk_source_au, v8::ScriptCompiler::StreamedSource::UTF8,
nullptr));
int32_t au_c0 =
static_cast<i::CharacterStream<uint8_t>*>(stream_advance_until.get())
->AdvanceUntil(
[](int32_t c0) { return unibrow::IsLineTerminator(c0); });
int32_t au_c0_ = stream_advance_until->AdvanceUntil(
[](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
int32_t a_c0 = '0';
while (!unibrow::IsLineTerminator(a_c0)) {
a_c0 = stream_advance->Advance();
int32_t a_c0_ = '0';
while (!unibrow::IsLineTerminator(a_c0_)) {
a_c0_ = stream_advance->Advance();
}
// Check both advances methods have the same output
CHECK_EQ(a_c0, au_c0);
CHECK_EQ(a_c0_, au_c0_);
// Check if both set the cursor to the correct position by advancing both
// streams by one character.
a_c0 = stream_advance->Advance();
au_c0 = stream_advance_until->Advance();
CHECK_EQ(a_c0, au_c0);
a_c0_ = stream_advance->Advance();
au_c0_ = stream_advance_until->Advance();
CHECK_EQ(a_c0_, au_c0_);
}
}
TEST(Ucs2AdvanceUntilOverChunkBoundaries) {
TEST(Utf8AdvanceUntilOverChunkBoundaries) {
// Test utf-8 advancing until a certain char, crossing chunk boundaries.
// Split the test string at each byte and pass it to the stream. This way,
// we'll have a split at each possible boundary.
const size_t kLen = arraysize(unicode_ucs2) - 1;
uint16_t buffer[kLen + 4];
for (size_t i = 1; i < kLen; i++) {
size_t len = strlen(unicode_utf8);
char buffer[arraysize(unicode_utf8) + 4];
for (size_t i = 1; i < len; i++) {
// Copy source string into buffer, splitting it at i.
// Then add three chunks, 0..i-1, i..strlen-1, empty.
memcpy(buffer, unicode_ucs2, i * 2);
memcpy(buffer + i + 1, unicode_ucs2 + i, (kLen - i) * 2);
strncpy(buffer, unicode_utf8, i);
strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
buffer[i] = '\0';
buffer[kLen + 1] = '\n';
buffer[kLen + 2] = '\0';
buffer[kLen + 3] = '\0';
const uint16_t* chunks[] = {buffer, buffer + i + 1, buffer + kLen + 2};
buffer[len + 1] = '\n';
buffer[len + 2] = '\0';
buffer[len + 3] = '\0';
const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
ChunkSource<uint16_t> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
int32_t res = static_cast<i::CharacterStream<uint16_t>*>(stream.get())
->AdvanceUntil([](int32_t c0) {
return unibrow::IsLineTerminator(c0);
});
CHECK_EQ(buffer[kLen + 1], res);
int32_t res = stream->AdvanceUntil(
[](int32_t c0_) { return unibrow::IsLineTerminator(c0_); });
CHECK_EQ(buffer[len + 1], res);
}
}
TEST(Ucs2ChunkBoundaries) {
TEST(Utf8ChunkBoundaries) {
// Test utf-8 parsing at chunk boundaries.
// Split the test string at each byte and pass it to the stream. This way,
@ -313,14 +306,16 @@ TEST(Ucs2ChunkBoundaries) {
buffer[len + 2] = '\0';
const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; unicode_ucs2[i]; i++) {
CHECK_EQ(unicode_ucs2[i], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
stream->Advance());
}
}
@ -340,20 +335,22 @@ TEST(Utf8SingleByteChunks) {
const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
buffer + len + 3};
ChunkSource<char> chunk_source(chunks);
std::unique_ptr<i::ScannerStream> stream(v8::internal::ScannerStream::For(
ChunkSource chunk_source(chunks);
std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
v8::internal::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t j = 0; unicode_ucs2[j]; j++) {
CHECK_EQ(unicode_ucs2[j], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
stream->Advance());
}
}
#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
void TestCharacterStream(const char* reference, i::ScannerStream* stream,
void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
unsigned length, unsigned start, unsigned end) {
// Read streams one char at a time
unsigned i;
@ -362,7 +359,7 @@ void TestCharacterStream(const char* reference, i::ScannerStream* stream,
CHECK_EQU(reference[i], stream->Advance());
}
CHECK_EQU(end, stream->pos());
CHECK_EQU(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQU(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
CHECK_EQU(end + 1, stream->pos());
stream->Back();
@ -423,7 +420,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
TestExternalResource resource(uc16_buffer.get(), length);
i::Handle<i::String> uc16_string(
factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
std::unique_ptr<i::ScannerStream> uc16_stream(
std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
i::ScannerStream::For(isolate, uc16_string, start, end));
TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
@ -443,7 +440,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
i::Handle<i::String> ext_one_byte_string(
factory->NewExternalStringFromOneByte(&one_byte_resource)
.ToHandleChecked());
std::unique_ptr<i::ScannerStream> one_byte_stream(
std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
i::ScannerStream::For(isolate, ext_one_byte_string, start, end));
TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
end);
@ -455,7 +452,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
// 1-byte generic i::String
{
std::unique_ptr<i::ScannerStream> string_stream(
std::unique_ptr<i::Utf16CharacterStream> string_stream(
i::ScannerStream::For(isolate, one_byte_string, start, end));
TestCharacterStream(one_byte_source, string_stream.get(), length, start,
end);
@ -465,7 +462,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
{
i::Handle<i::String> two_byte_string =
factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
std::unique_ptr<i::ScannerStream> two_byte_string_stream(
std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
i::ScannerStream::For(isolate, two_byte_string, start, end));
TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
start, end);
@ -477,18 +474,18 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
// 1-byte streaming stream, single + many chunks.
{
const char* data = reinterpret_cast<const char*>(one_byte_vector.begin());
const char* data_end = reinterpret_cast<const char*>(one_byte_vector.end());
const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end();
ChunkSource<char> single_chunk(data, data_end - data, false);
std::unique_ptr<i::ScannerStream> one_byte_streaming_stream(
ChunkSource single_chunk(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
i::ScannerStream::For(&single_chunk,
v8::ScriptCompiler::StreamedSource::ONE_BYTE,
nullptr));
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
length, start, end);
ChunkSource<char> many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 1, data_end - data, true);
one_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
@ -497,17 +494,16 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
// UTF-8 streaming stream, single + many chunks.
{
const char* data = reinterpret_cast<const char*>(one_byte_vector.begin());
const char* data_end = reinterpret_cast<const char*>(one_byte_vector.end());
ChunkSource<char> chunks(data, data_end - data, false);
std::unique_ptr<i::ScannerStream> utf8_streaming_stream(
const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end();
ChunkSource chunks(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,
nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
start, end);
ChunkSource<char> many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 1, data_end - data, true);
utf8_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
@ -516,18 +512,18 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
// 2-byte streaming stream, single + many chunks.
{
const uint16_t* data =
reinterpret_cast<const uint16_t*>(two_byte_vector.begin());
const uint16_t* data_end =
reinterpret_cast<const uint16_t*>(two_byte_vector.end());
ChunkSource<uint16_t> chunks(data, data_end - data, false);
std::unique_ptr<i::ScannerStream> two_byte_streaming_stream(
const uint8_t* data =
reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
const uint8_t* data_end =
reinterpret_cast<const uint8_t*>(two_byte_vector.end());
ChunkSource chunks(data, 2, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
length, start, end);
ChunkSource<uint16_t> many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 2, data_end - data, true);
two_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
@ -558,7 +554,7 @@ TEST(CharacterStreams) {
// Regression test for crbug.com/651333. Read invalid utf-8.
TEST(Regress651333) {
const char bytes[] =
const uint8_t bytes[] =
"A\xf1"
"ad"; // Anad, with n == n-with-tilde.
const uint16_t unicode[] = {65, 65533, 97, 100};
@ -569,13 +565,13 @@ TEST(Regress651333) {
// Read len bytes from bytes, and compare against the expected unicode
// characters. Expect kBadChar ( == Unicode replacement char == code point
// 65533) instead of the incorrectly coded Latin1 char.
ChunkSource<char> chunks(bytes, len, false);
std::unique_ptr<i::ScannerStream> stream(i::ScannerStream::For(
ChunkSource chunks(bytes, 1, len, false);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < len; i++) {
CHECK_EQ(unicode[i], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
@ -583,18 +579,18 @@ void TestChunkStreamAgainstReference(
const char* cases[],
const std::vector<std::vector<uint16_t>>& unicode_expected) {
for (size_t c = 0; c < unicode_expected.size(); ++c) {
ChunkSource<char> chunk_source(cases[c]);
std::unique_ptr<i::ScannerStream> stream(i::ScannerStream::For(
ChunkSource chunk_source(cases[c]);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < unicode_expected[c].size(); i++) {
CHECK_EQ(unicode_expected[c][i], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
stream->Seek(0);
for (size_t i = 0; i < unicode_expected[c].size(); i++) {
CHECK_EQ(unicode_expected[c][i], stream->Advance());
}
CHECK_EQ(i::ScannerStream::kEndOfInput, stream->Advance());
CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());
}
}
@ -686,7 +682,7 @@ TEST(RelocatingCharacterStream) {
i_isolate->factory()
->NewStringFromTwoByte(two_byte_vector, i::NOT_TENURED)
.ToHandleChecked();
std::unique_ptr<i::ScannerStream> two_byte_string_stream(
std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
i::ScannerStream::For(i_isolate, two_byte_string, 0, length));
CHECK_EQ('a', two_byte_string_stream->Advance());
CHECK_EQ('b', two_byte_string_stream->Advance());

View File

@ -27,7 +27,7 @@ struct ScannerTestHelper {
scanner(std::move(other.scanner)) {}
std::unique_ptr<UnicodeCache> unicode_cache;
std::unique_ptr<CharacterStream<uint8_t>> stream;
std::unique_ptr<Utf16CharacterStream> stream;
std::unique_ptr<Scanner> scanner;
Scanner* operator->() const { return scanner.get(); }
@ -38,9 +38,9 @@ ScannerTestHelper make_scanner(const char* src) {
ScannerTestHelper helper;
helper.unicode_cache = std::unique_ptr<UnicodeCache>(new UnicodeCache);
helper.stream = ScannerStream::ForTesting(src);
helper.scanner = std::unique_ptr<Scanner>(
new Scanner(helper.unicode_cache.get(), helper.stream.get(), false));
helper.scanner->Initialize();
helper.scanner =
std::unique_ptr<Scanner>(new Scanner(helper.unicode_cache.get()));
helper.scanner->Initialize(helper.stream.get(), false);
return helper;
}

View File

@ -92,16 +92,16 @@ TEST(ScanKeywords) {
CHECK(static_cast<int>(sizeof(buffer)) >= length);
{
auto stream = i::ScannerStream::ForTesting(keyword, length);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
CHECK_EQ(key_token.token, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
// Removing characters will make keyword matching fail.
{
auto stream = i::ScannerStream::ForTesting(keyword, length - 1);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
@ -111,8 +111,8 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length] = chars_to_append[j];
auto stream = i::ScannerStream::ForTesting(buffer, length + 1);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
@ -121,8 +121,8 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length - 1] = '_';
auto stream = i::ScannerStream::ForTesting(buffer, length);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
@ -188,8 +188,8 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; tests[i]; i++) {
const char* source = tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
i_isolate->ast_string_constants(),
@ -207,8 +207,8 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; fail_tests[i]; i++) {
const char* source = fail_tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
i_isolate->ast_string_constants(),
@ -232,8 +232,8 @@ TEST(ScanHtmlComments) {
// Disallow HTML comments.
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache, stream.get(), true);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), true);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::ILLEGAL, scanner.Next());
}
@ -241,8 +241,8 @@ TEST(ScanHtmlComments) {
// Skip HTML comments:
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
@ -280,8 +280,8 @@ TEST(StandAlonePreParser) {
uintptr_t stack_limit = i_isolate->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
@ -313,8 +313,8 @@ TEST(StandAlonePreParserNoNatives) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
// Preparser defaults to disallowing natives syntax.
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
@ -348,8 +348,8 @@ TEST(RegressChromium62639) {
// failed in debug mode, and sometimes crashed in release mode.
auto stream = i::ScannerStream::ForTesting(program);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
&zone, CcTest::i_isolate()->ast_string_constants(),
@ -381,8 +381,8 @@ TEST(PreParseOverflow) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
auto stream = i::ScannerStream::ForTesting(program.get(), kProgramSize);
i::Scanner scanner(isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
@ -396,12 +396,12 @@ TEST(PreParseOverflow) {
CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
}
void TestStreamScanner(i::ScannerStream* stream,
void TestStreamScanner(i::Utf16CharacterStream* stream,
i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream, false);
scanner.Initialize();
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream, false);
int i = 0;
do {
@ -419,7 +419,8 @@ void TestStreamScanner(i::ScannerStream* stream,
TEST(StreamScanner) {
v8::V8::Initialize();
const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
auto stream1(i::ScannerStream::ForTesting(str1));
std::unique_ptr<i::Utf16CharacterStream> stream1(
i::ScannerStream::ForTesting(str1));
i::Token::Value expectations1[] = {
i::Token::LBRACE,
i::Token::IDENTIFIER,
@ -437,7 +438,8 @@ TEST(StreamScanner) {
TestStreamScanner(stream1.get(), expectations1, 0, 0);
const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
auto stream2(i::ScannerStream::ForTesting(str2));
std::unique_ptr<i::Utf16CharacterStream> stream2(
i::ScannerStream::ForTesting(str2));
i::Token::Value expectations2[] = {
i::Token::CASE,
i::Token::DEFAULT,
@ -467,7 +469,8 @@ TEST(StreamScanner) {
for (int i = 0; i <= 4; i++) {
expectations3[6 - i] = i::Token::ILLEGAL;
expectations3[5 - i] = i::Token::EOS;
auto stream3(i::ScannerStream::ForTesting(str3));
std::unique_ptr<i::Utf16CharacterStream> stream3(
i::ScannerStream::ForTesting(str3));
TestStreamScanner(stream3.get(), expectations3, 1, 1 + i);
}
}
@ -475,8 +478,8 @@ TEST(StreamScanner) {
void TestScanRegExp(const char* re_source, const char* expected) {
auto stream = i::ScannerStream::ForTesting(re_source);
i::HandleScope scope(CcTest::i_isolate());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Token::Value start = scanner.peek();
CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
@ -1141,6 +1144,8 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
parser->set_allow_natives(flags.Contains(kAllowNatives));
parser->set_allow_harmony_public_fields(
flags.Contains(kAllowHarmonyPublicFields));
parser->set_allow_harmony_private_fields(
flags.Contains(kAllowHarmonyPrivateFields));
parser->set_allow_harmony_static_fields(
flags.Contains(kAllowHarmonyStaticFields));
parser->set_allow_harmony_dynamic_import(
@ -1149,9 +1154,6 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
flags.Contains(kAllowHarmonyImportMeta));
parser->set_allow_harmony_do_expressions(
flags.Contains(kAllowHarmonyDoExpressions));
parser->set_allow_harmony_private_fields(
flags.Contains(kAllowHarmonyPrivateFields));
parser->set_allow_harmony_numeric_separator(
flags.Contains(kAllowHarmonyNumericSeparator));
}
@ -1169,9 +1171,9 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
// Preparse the data.
i::PendingCompilationErrorHandler pending_error_handler;
if (test_preparser) {
std::unique_ptr<i::ScannerStream> stream(
i::Scanner scanner(isolate->unicode_cache());
std::unique_ptr<i::Utf16CharacterStream> stream(
i::ScannerStream::For(isolate, source));
i::Scanner scanner(isolate->unicode_cache(), stream.get(), is_module);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
&zone, CcTest::i_isolate()->ast_string_constants(),
@ -1181,8 +1183,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
isolate->counters()->runtime_call_stats(),
isolate->logger(), -1, is_module);
SetParserFlags(&preparser, flags);
scanner.Initialize();
// Make sure we can the first token with the correct flags.
scanner.Initialize(stream.get(), is_module);
i::PreParser::PreParseResult result = preparser.PreParseProgram();
CHECK_EQ(i::PreParser::kPreParseSuccess, result);
}

View File

@ -17,7 +17,7 @@ class AsmJsScannerTest : public ::testing::Test {
protected:
void SetupScanner(const char* source) {
stream = ScannerStream::ForTesting(source);
scanner.reset(new AsmJsScanner(stream.get(), 0));
scanner.reset(new AsmJsScanner(stream.get()));
}
void Skip(AsmJsScanner::token_t t) {
@ -41,7 +41,7 @@ class AsmJsScannerTest : public ::testing::Test {
CHECK_EQ(scanner->Token(), AsmJsScanner::kParseError);
}
std::unique_ptr<ScannerStream> stream;
std::unique_ptr<Utf16CharacterStream> stream;
std::unique_ptr<AsmJsScanner> scanner;
};