Optimized scanner to avoid virtual calls for every character read.
Review URL: http://codereview.chromium.org/5545006 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5935 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
670aea0218
commit
b55add7e20
14
src/api.cc
14
src/api.cc
@ -1165,14 +1165,22 @@ void ObjectTemplate::SetInternalFieldCount(int value) {
|
||||
|
||||
|
||||
ScriptData* ScriptData::PreCompile(const char* input, int length) {
|
||||
unibrow::Utf8InputBuffer<> buf(input, length);
|
||||
return i::ParserApi::PreParse(i::Handle<i::String>(), &buf, NULL);
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
reinterpret_cast<const unsigned char*>(input), length);
|
||||
return i::ParserApi::PreParse(&stream, NULL);
|
||||
}
|
||||
|
||||
|
||||
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
|
||||
i::Handle<i::String> str = Utils::OpenHandle(*source);
|
||||
return i::ParserApi::PreParse(str, NULL, NULL);
|
||||
if (str->IsExternalTwoByteString()) {
|
||||
i::ExternalTwoByteStringUC16CharacterStream stream(
|
||||
i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
|
||||
return i::ParserApi::PreParse(&stream, NULL);
|
||||
} else {
|
||||
i::GenericStringUC16CharacterStream stream(str, 0, str->length());
|
||||
return i::ParserApi::PreParse(&stream, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -231,6 +231,8 @@ static inline void CheckNonEqualsHelper(const char* file,
|
||||
|
||||
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||
|
||||
|
||||
// This is inspired by the static assertion facility in boost. This
|
||||
|
@ -461,7 +461,14 @@ Handle<SharedFunctionInfo> Compiler::Compile(Handle<String> source,
|
||||
ScriptDataImpl* pre_data = input_pre_data;
|
||||
if (pre_data == NULL
|
||||
&& source_length >= FLAG_min_preparse_length) {
|
||||
pre_data = ParserApi::PartialPreParse(source, NULL, extension);
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
|
||||
pre_data = ParserApi::PartialPreParse(&stream, extension);
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source, 0, source->length());
|
||||
pre_data = ParserApi::PartialPreParse(&stream, extension);
|
||||
}
|
||||
}
|
||||
|
||||
// Create a script object describing the script to be compiled.
|
||||
|
119
src/parser.cc
119
src/parser.cc
@ -609,7 +609,25 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
|
||||
|
||||
// Initialize parser state.
|
||||
source->TryFlatten();
|
||||
scanner_.Initialize(source);
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
// Notice that the stream is destroyed at the end of the branch block.
|
||||
// The last line of the blocks can't be moved outside, even though they're
|
||||
// identical calls.
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
|
||||
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
|
||||
return DoParseProgram(source, in_global_context, &zone_scope);
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source, 0, source->length());
|
||||
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
|
||||
return DoParseProgram(source, in_global_context, &zone_scope);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FunctionLiteral* Parser::DoParseProgram(Handle<String> source,
|
||||
bool in_global_context,
|
||||
ZoneScope* zone_scope) {
|
||||
ASSERT(target_stack_ == NULL);
|
||||
if (pre_data_ != NULL) pre_data_->Initialize();
|
||||
|
||||
@ -655,25 +673,45 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
|
||||
|
||||
// If there was a syntax error we have to get rid of the AST
|
||||
// and it is not safe to do so before the scope has been deleted.
|
||||
if (result == NULL) zone_scope.DeleteOnExit();
|
||||
if (result == NULL) zone_scope->DeleteOnExit();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
|
||||
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
|
||||
HistogramTimerScope timer(&Counters::parse_lazy);
|
||||
Handle<String> source(String::cast(script_->source()));
|
||||
Counters::total_parse_size.Increment(source->length());
|
||||
|
||||
// Initialize parser state.
|
||||
source->TryFlatten();
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source),
|
||||
info->start_position(),
|
||||
info->end_position());
|
||||
FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
|
||||
return result;
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source,
|
||||
info->start_position(),
|
||||
info->end_position());
|
||||
FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info,
|
||||
UC16CharacterStream* source,
|
||||
ZoneScope* zone_scope) {
|
||||
scanner_.Initialize(source, JavaScriptScanner::kAllLiterals);
|
||||
ASSERT(target_stack_ == NULL);
|
||||
|
||||
Handle<String> name(String::cast(info->name()));
|
||||
fni_ = new FuncNameInferrer();
|
||||
fni_->PushEnclosingName(name);
|
||||
|
||||
// Initialize parser state.
|
||||
source->TryFlatten();
|
||||
scanner_.Initialize(source, info->start_position(), info->end_position());
|
||||
ASSERT(target_stack_ == NULL);
|
||||
mode_ = PARSE_EAGERLY;
|
||||
|
||||
// Place holder for the result.
|
||||
@ -705,7 +743,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
|
||||
// not safe to do before scope has been deleted.
|
||||
if (result == NULL) {
|
||||
Top::StackOverflow();
|
||||
zone_scope.DeleteOnExit();
|
||||
zone_scope->DeleteOnExit();
|
||||
} else {
|
||||
Handle<String> inferred_name(info->inferred_name());
|
||||
result->set_inferred_name(inferred_name);
|
||||
@ -719,12 +757,12 @@ Handle<String> Parser::GetSymbol(bool* ok) {
|
||||
if (pre_data() != NULL) {
|
||||
symbol_id = pre_data()->GetSymbolIdentifier();
|
||||
}
|
||||
return LookupSymbol(symbol_id, scanner_.literal());
|
||||
return LookupSymbol(symbol_id, scanner().literal());
|
||||
}
|
||||
|
||||
|
||||
void Parser::ReportMessage(const char* type, Vector<const char*> args) {
|
||||
Scanner::Location source_location = scanner_.location();
|
||||
Scanner::Location source_location = scanner().location();
|
||||
ReportMessageAt(source_location, type, args);
|
||||
}
|
||||
|
||||
@ -1641,7 +1679,7 @@ Statement* Parser::ParseContinueStatement(bool* ok) {
|
||||
Expect(Token::CONTINUE, CHECK_OK);
|
||||
Handle<String> label = Handle<String>::null();
|
||||
Token::Value tok = peek();
|
||||
if (!scanner_.has_line_terminator_before_next() &&
|
||||
if (!scanner().has_line_terminator_before_next() &&
|
||||
tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
|
||||
label = ParseIdentifier(CHECK_OK);
|
||||
}
|
||||
@ -1667,7 +1705,7 @@ Statement* Parser::ParseBreakStatement(ZoneStringList* labels, bool* ok) {
|
||||
Expect(Token::BREAK, CHECK_OK);
|
||||
Handle<String> label;
|
||||
Token::Value tok = peek();
|
||||
if (!scanner_.has_line_terminator_before_next() &&
|
||||
if (!scanner().has_line_terminator_before_next() &&
|
||||
tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
|
||||
label = ParseIdentifier(CHECK_OK);
|
||||
}
|
||||
@ -1712,7 +1750,7 @@ Statement* Parser::ParseReturnStatement(bool* ok) {
|
||||
}
|
||||
|
||||
Token::Value tok = peek();
|
||||
if (scanner_.has_line_terminator_before_next() ||
|
||||
if (scanner().has_line_terminator_before_next() ||
|
||||
tok == Token::SEMICOLON ||
|
||||
tok == Token::RBRACE ||
|
||||
tok == Token::EOS) {
|
||||
@ -1844,7 +1882,7 @@ Statement* Parser::ParseThrowStatement(bool* ok) {
|
||||
|
||||
Expect(Token::THROW, CHECK_OK);
|
||||
int pos = scanner().location().beg_pos;
|
||||
if (scanner_.has_line_terminator_before_next()) {
|
||||
if (scanner().has_line_terminator_before_next()) {
|
||||
ReportMessage("newline_after_throw", Vector<const char*>::empty());
|
||||
*ok = false;
|
||||
return NULL;
|
||||
@ -2408,7 +2446,8 @@ Expression* Parser::ParsePostfixExpression(bool* ok) {
|
||||
// LeftHandSideExpression ('++' | '--')?
|
||||
|
||||
Expression* expression = ParseLeftHandSideExpression(CHECK_OK);
|
||||
if (!scanner_.has_line_terminator_before_next() && Token::IsCountOp(peek())) {
|
||||
if (!scanner().has_line_terminator_before_next() &&
|
||||
Token::IsCountOp(peek())) {
|
||||
// Signal a reference error if the expression is an invalid
|
||||
// left-hand side expression. We could report this as a syntax
|
||||
// error here but for compatibility with JSC we choose to report the
|
||||
@ -2677,7 +2716,7 @@ Expression* Parser::ParsePrimaryExpression(bool* ok) {
|
||||
case Token::NUMBER: {
|
||||
Consume(Token::NUMBER);
|
||||
double value =
|
||||
StringToDouble(scanner_.literal(), ALLOW_HEX | ALLOW_OCTALS);
|
||||
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
|
||||
result = NewNumberLiteral(value);
|
||||
break;
|
||||
}
|
||||
@ -3028,7 +3067,7 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
|
||||
case Token::NUMBER: {
|
||||
Consume(Token::NUMBER);
|
||||
double value =
|
||||
StringToDouble(scanner_.literal(), ALLOW_HEX | ALLOW_OCTALS);
|
||||
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
|
||||
key = NewNumberLiteral(value);
|
||||
break;
|
||||
}
|
||||
@ -3089,7 +3128,7 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
|
||||
|
||||
|
||||
Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
|
||||
if (!scanner_.ScanRegExpPattern(seen_equal)) {
|
||||
if (!scanner().ScanRegExpPattern(seen_equal)) {
|
||||
Next();
|
||||
ReportMessage("unterminated_regexp", Vector<const char*>::empty());
|
||||
*ok = false;
|
||||
@ -3099,10 +3138,10 @@ Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
|
||||
int literal_index = temp_scope_->NextMaterializedLiteralIndex();
|
||||
|
||||
Handle<String> js_pattern =
|
||||
Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
|
||||
scanner_.ScanRegExpFlags();
|
||||
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
|
||||
scanner().ScanRegExpFlags();
|
||||
Handle<String> js_flags =
|
||||
Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
|
||||
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
|
||||
Next();
|
||||
|
||||
return new RegExpLiteral(js_pattern, js_flags, literal_index);
|
||||
@ -3158,7 +3197,7 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
|
||||
// FormalParameterList ::
|
||||
// '(' (Identifier)*[','] ')'
|
||||
Expect(Token::LPAREN, CHECK_OK);
|
||||
int start_pos = scanner_.location().beg_pos;
|
||||
int start_pos = scanner().location().beg_pos;
|
||||
bool done = (peek() == Token::RPAREN);
|
||||
while (!done) {
|
||||
Handle<String> param_name = ParseIdentifier(CHECK_OK);
|
||||
@ -3195,7 +3234,7 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
|
||||
bool is_lazily_compiled =
|
||||
mode() == PARSE_LAZILY && top_scope_->HasTrivialOuterContext();
|
||||
|
||||
int function_block_pos = scanner_.location().beg_pos;
|
||||
int function_block_pos = scanner().location().beg_pos;
|
||||
int materialized_literal_count;
|
||||
int expected_property_count;
|
||||
int end_pos;
|
||||
@ -3212,7 +3251,8 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
|
||||
ReportInvalidPreparseData(name, CHECK_OK);
|
||||
}
|
||||
Counters::total_preparse_skipped.Increment(end_pos - function_block_pos);
|
||||
scanner_.SeekForward(end_pos);
|
||||
// Seek to position just before terminal '}'.
|
||||
scanner().SeekForward(end_pos - 1);
|
||||
materialized_literal_count = entry.literal_count();
|
||||
expected_property_count = entry.property_count();
|
||||
only_simple_this_property_assignments = false;
|
||||
@ -3228,7 +3268,7 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
|
||||
this_property_assignments = temp_scope.this_property_assignments();
|
||||
|
||||
Expect(Token::RBRACE, CHECK_OK);
|
||||
end_pos = scanner_.location().end_pos;
|
||||
end_pos = scanner().location().end_pos;
|
||||
}
|
||||
|
||||
FunctionLiteral* function_literal =
|
||||
@ -3332,7 +3372,7 @@ void Parser::ExpectSemicolon(bool* ok) {
|
||||
Next();
|
||||
return;
|
||||
}
|
||||
if (scanner_.has_line_terminator_before_next() ||
|
||||
if (scanner().has_line_terminator_before_next() ||
|
||||
tok == Token::RBRACE ||
|
||||
tok == Token::EOS) {
|
||||
return;
|
||||
@ -3383,8 +3423,8 @@ Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get,
|
||||
bool* ok) {
|
||||
Expect(Token::IDENTIFIER, ok);
|
||||
if (!*ok) return Handle<String>();
|
||||
if (scanner_.literal_length() == 3) {
|
||||
const char* token = scanner_.literal_string();
|
||||
if (scanner().literal_length() == 3) {
|
||||
const char* token = scanner().literal_string();
|
||||
*is_get = strcmp(token, "get") == 0;
|
||||
*is_set = !*is_get && strcmp(token, "set") == 0;
|
||||
}
|
||||
@ -3503,8 +3543,8 @@ Expression* Parser::NewThrowError(Handle<String> constructor,
|
||||
// ----------------------------------------------------------------------------
|
||||
// JSON
|
||||
|
||||
Handle<Object> JsonParser::ParseJson(Handle<String> source) {
|
||||
source->TryFlatten();
|
||||
Handle<Object> JsonParser::ParseJson(Handle<String> script,
|
||||
UC16CharacterStream* source) {
|
||||
scanner_.Initialize(source);
|
||||
stack_overflow_ = false;
|
||||
Handle<Object> result = ParseJsonValue();
|
||||
@ -3540,7 +3580,7 @@ Handle<Object> JsonParser::ParseJson(Handle<String> source) {
|
||||
}
|
||||
|
||||
Scanner::Location source_location = scanner_.location();
|
||||
MessageLocation location(Factory::NewScript(source),
|
||||
MessageLocation location(Factory::NewScript(script),
|
||||
source_location.beg_pos,
|
||||
source_location.end_pos);
|
||||
int argc = (name_opt == NULL) ? 0 : 1;
|
||||
@ -4555,13 +4595,12 @@ int ScriptDataImpl::ReadNumber(byte** source) {
|
||||
|
||||
|
||||
// Create a Scanner for the preparser to use as input, and preparse the source.
|
||||
static ScriptDataImpl* DoPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
|
||||
bool allow_lazy,
|
||||
ParserRecorder* recorder,
|
||||
int literal_flags) {
|
||||
V8JavaScriptScanner scanner;
|
||||
scanner.Initialize(source, stream, literal_flags);
|
||||
scanner.Initialize(source, literal_flags);
|
||||
intptr_t stack_limit = StackGuard::real_climit();
|
||||
if (!preparser::PreParser::PreParseProgram(&scanner,
|
||||
recorder,
|
||||
@ -4580,8 +4619,7 @@ static ScriptDataImpl* DoPreParse(Handle<String> source,
|
||||
|
||||
// Preparse, but only collect data that is immediately useful,
|
||||
// even if the preparser data is only used once.
|
||||
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
ScriptDataImpl* ParserApi::PartialPreParse(UC16CharacterStream* source,
|
||||
v8::Extension* extension) {
|
||||
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
||||
if (!allow_lazy) {
|
||||
@ -4590,22 +4628,19 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||
return NULL;
|
||||
}
|
||||
PartialParserRecorder recorder;
|
||||
|
||||
return DoPreParse(source, stream, allow_lazy, &recorder,
|
||||
return DoPreParse(source, allow_lazy, &recorder,
|
||||
JavaScriptScanner::kNoLiterals);
|
||||
}
|
||||
|
||||
|
||||
ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source,
|
||||
v8::Extension* extension) {
|
||||
Handle<Script> no_script;
|
||||
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
||||
CompleteParserRecorder recorder;
|
||||
int kPreParseLiteralsFlags =
|
||||
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
|
||||
return DoPreParse(source, stream, allow_lazy,
|
||||
&recorder, kPreParseLiteralsFlags);
|
||||
return DoPreParse(source, allow_lazy, &recorder, kPreParseLiteralsFlags);
|
||||
}
|
||||
|
||||
|
||||
|
33
src/parser.h
33
src/parser.h
@ -169,14 +169,12 @@ class ParserApi {
|
||||
static bool Parse(CompilationInfo* info);
|
||||
|
||||
// Generic preparser generating full preparse data.
|
||||
static ScriptDataImpl* PreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
static ScriptDataImpl* PreParse(UC16CharacterStream* source,
|
||||
v8::Extension* extension);
|
||||
|
||||
// Preparser that only does preprocessing that makes sense if only used
|
||||
// immediately after.
|
||||
static ScriptDataImpl* PartialPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
static ScriptDataImpl* PartialPreParse(UC16CharacterStream* source,
|
||||
v8::Extension* extension);
|
||||
};
|
||||
|
||||
@ -435,18 +433,26 @@ class Parser {
|
||||
Vector<const char*> args);
|
||||
|
||||
protected:
|
||||
FunctionLiteral* ParseLazy(Handle<SharedFunctionInfo> info,
|
||||
UC16CharacterStream* source,
|
||||
ZoneScope* zone_scope);
|
||||
enum Mode {
|
||||
PARSE_LAZILY,
|
||||
PARSE_EAGERLY
|
||||
};
|
||||
|
||||
// Called by ParseProgram after setting up the scanner.
|
||||
FunctionLiteral* DoParseProgram(Handle<String> source,
|
||||
bool in_global_context,
|
||||
ZoneScope* zone_scope);
|
||||
|
||||
// Report syntax error
|
||||
void ReportUnexpectedToken(Token::Value token);
|
||||
void ReportInvalidPreparseData(Handle<String> name, bool* ok);
|
||||
void ReportMessage(const char* message, Vector<const char*> args);
|
||||
|
||||
bool inside_with() const { return with_nesting_level_ > 0; }
|
||||
Scanner& scanner() { return scanner_; }
|
||||
V8JavaScriptScanner& scanner() { return scanner_; }
|
||||
Mode mode() const { return mode_; }
|
||||
ScriptDataImpl* pre_data() const { return pre_data_; }
|
||||
|
||||
@ -548,7 +554,7 @@ class Parser {
|
||||
|
||||
INLINE(Token::Value peek()) {
|
||||
if (stack_overflow_) return Token::ILLEGAL;
|
||||
return scanner_.peek();
|
||||
return scanner().peek();
|
||||
}
|
||||
|
||||
INLINE(Token::Value Next()) {
|
||||
@ -560,9 +566,11 @@ class Parser {
|
||||
}
|
||||
if (StackLimitCheck().HasOverflowed()) {
|
||||
// Any further calls to Next or peek will return the illegal token.
|
||||
// The current call must return the next token, which might already
|
||||
// have been peek'ed.
|
||||
stack_overflow_ = true;
|
||||
}
|
||||
return scanner_.Next();
|
||||
return scanner().Next();
|
||||
}
|
||||
|
||||
INLINE(void Consume(Token::Value token));
|
||||
@ -702,7 +710,14 @@ class JsonParser BASE_EMBEDDED {
|
||||
// Parse JSON input as a single JSON value.
|
||||
// Returns null handle and sets exception if parsing failed.
|
||||
static Handle<Object> Parse(Handle<String> source) {
|
||||
return JsonParser().ParseJson(source);
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
|
||||
return JsonParser().ParseJson(source, &stream);
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source, 0, source->length());
|
||||
return JsonParser().ParseJson(source, &stream);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
@ -710,7 +725,7 @@ class JsonParser BASE_EMBEDDED {
|
||||
~JsonParser() { }
|
||||
|
||||
// Parse a string containing a single JSON value.
|
||||
Handle<Object> ParseJson(Handle<String>);
|
||||
Handle<Object> ParseJson(Handle<String> script, UC16CharacterStream* source);
|
||||
// Parse a single JSON value from input (grammar production JSONValue).
|
||||
// A JSON value is either a (double-quoted) string literal, a number literal,
|
||||
// one of "true", "false", or "null", or an object or array literal.
|
||||
|
@ -39,39 +39,82 @@ namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// UTF16Buffer based on a v8::UnicodeInputStream.
|
||||
class InputStreamUTF16Buffer : public UTF16Buffer {
|
||||
class InputStreamUTF16Buffer : public UC16CharacterStream {
|
||||
public:
|
||||
explicit InputStreamUTF16Buffer(UnicodeInputStream* stream)
|
||||
: UTF16Buffer(),
|
||||
stream_(stream) { }
|
||||
explicit InputStreamUTF16Buffer(v8::UnicodeInputStream* stream)
|
||||
: UC16CharacterStream(),
|
||||
stream_(stream),
|
||||
pushback_active_(false) {
|
||||
buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize;
|
||||
}
|
||||
|
||||
virtual ~InputStreamUTF16Buffer() { }
|
||||
|
||||
virtual void PushBack(uc32 ch) {
|
||||
virtual void PushBack(uc16 ch) {
|
||||
ASSERT(pos_ > 0);
|
||||
if (buffer_cursor_ > buffer_) {
|
||||
// While we can stay within the buffer, just do so.
|
||||
*--buffer_cursor_ = ch;
|
||||
pos_--;
|
||||
return;
|
||||
}
|
||||
if (!pushback_active_) {
|
||||
// Push back the entire buffer to the stream and let the
|
||||
// stream handle pushbacks from now.
|
||||
// We leave buffer_cursor_ == buffer_end_, so the next read
|
||||
// will fill the buffer from the current position.
|
||||
// This should happen exceedingly rarely.
|
||||
while (buffer_end_ > buffer_) {
|
||||
stream_->PushBack(*--buffer_end_);
|
||||
}
|
||||
buffer_cursor_ = buffer_end_;
|
||||
pushback_active_ = true;
|
||||
}
|
||||
stream_->PushBack(ch);
|
||||
pos_--;
|
||||
}
|
||||
|
||||
virtual uc32 Advance() {
|
||||
uc32 result = stream_->Next();
|
||||
if (result >= 0) pos_++;
|
||||
return result;
|
||||
protected:
|
||||
virtual bool ReadBlock() {
|
||||
// Copy the top of the buffer into the pushback area.
|
||||
pushback_active_ = false;
|
||||
int32_t value;
|
||||
uc16* buffer_start = buffer_ + kPushBackSize;
|
||||
buffer_cursor_ = buffer_end_ = buffer_start;
|
||||
while ((value = stream_->Next()) >= 0) {
|
||||
if (value > static_cast<int32_t>(unibrow::Utf8::kMaxThreeByteChar)) {
|
||||
value = unibrow::Utf8::kBadChar;
|
||||
}
|
||||
// buffer_end_ is a const pointer, but buffer_ is writable.
|
||||
buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value);
|
||||
if (buffer_end_ == buffer_ + kPushBackSize + kBufferSize) break;
|
||||
}
|
||||
return buffer_end_ > buffer_start;
|
||||
}
|
||||
|
||||
virtual void SeekForward(int pos) {
|
||||
virtual unsigned SlowSeekForward(unsigned pos) {
|
||||
// Seeking in the input is not used by preparsing.
|
||||
// It's only used by the real parser based on preparser data.
|
||||
UNIMPLEMENTED();
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
static const unsigned kBufferSize = 512;
|
||||
static const unsigned kPushBackSize = 16;
|
||||
v8::UnicodeInputStream* const stream_;
|
||||
// Buffer holding first kPushBackSize characters of pushback buffer,
|
||||
// then kBufferSize chars of read-ahead.
|
||||
// The pushback buffer is only used if pushing back characters past
|
||||
// the start of a block.
|
||||
uc16 buffer_[kBufferSize + kPushBackSize];
|
||||
bool pushback_active_;
|
||||
};
|
||||
|
||||
|
||||
class StandAloneJavaScriptScanner : public JavaScriptScanner {
|
||||
public:
|
||||
void Initialize(UTF16Buffer* source) {
|
||||
void Initialize(UC16CharacterStream* source) {
|
||||
source_ = source;
|
||||
literal_flags_ = kLiteralString | kLiteralIdentifier;
|
||||
Init();
|
||||
|
@ -1078,6 +1078,7 @@ PreParser::Expression PreParser::ParseFunctionLiteral(bool* ok) {
|
||||
|
||||
Expect(i::Token::RBRACE, CHECK_OK);
|
||||
|
||||
// Position right after terminal '}'.
|
||||
int end_pos = scanner_->location().end_pos;
|
||||
log_->LogFunction(function_block_pos, end_pos,
|
||||
function_scope.materialized_literal_count(),
|
||||
|
@ -34,12 +34,6 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// UTF16Buffer
|
||||
|
||||
UTF16Buffer::UTF16Buffer()
|
||||
: pos_(0), end_(kNoEndPosition) { }
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// LiteralCollector
|
||||
|
||||
@ -92,7 +86,7 @@ bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {
|
||||
// ----------------------------------------------------------------------------
|
||||
// Scanner
|
||||
|
||||
Scanner::Scanner() : source_(NULL) {}
|
||||
Scanner::Scanner() { }
|
||||
|
||||
|
||||
uc32 Scanner::ScanHexEscape(uc32 c, int length) {
|
||||
@ -142,8 +136,7 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
|
||||
// ----------------------------------------------------------------------------
|
||||
// JavaScriptScanner
|
||||
|
||||
JavaScriptScanner::JavaScriptScanner()
|
||||
: has_line_terminator_before_next_(false) {}
|
||||
JavaScriptScanner::JavaScriptScanner() : Scanner() {}
|
||||
|
||||
|
||||
Token::Value JavaScriptScanner::Next() {
|
||||
@ -503,13 +496,23 @@ void JavaScriptScanner::Scan() {
|
||||
|
||||
|
||||
void JavaScriptScanner::SeekForward(int pos) {
|
||||
source_->SeekForward(pos - 1);
|
||||
Advance();
|
||||
// This function is only called to seek to the location
|
||||
// of the end of a function (at the "}" token). It doesn't matter
|
||||
// whether there was a line terminator in the part we skip.
|
||||
has_line_terminator_before_next_ = false;
|
||||
// After this call, we will have the token at the given position as
|
||||
// the "next" token. The "current" token will be invalid.
|
||||
if (pos == next_.location.beg_pos) return;
|
||||
int current_pos = source_pos();
|
||||
ASSERT_EQ(next_.location.end_pos, current_pos);
|
||||
// Positions inside the lookahead token aren't supported.
|
||||
ASSERT(pos >= current_pos);
|
||||
if (pos != current_pos) {
|
||||
source_->SeekForward(pos - source_->pos());
|
||||
Advance();
|
||||
// This function is only called to seek to the location
|
||||
// of the end of a function (at the "}" token). It doesn't matter
|
||||
// whether there was a line terminator in the part we skip.
|
||||
has_line_terminator_before_next_ = false;
|
||||
}
|
||||
Scan();
|
||||
ASSERT_EQ(Token::RBRACE, next_.token);
|
||||
}
|
||||
|
||||
|
||||
|
@ -52,31 +52,75 @@ inline int HexValue(uc32 c) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// UTF16Buffer - scanner input source with pushback.
|
||||
|
||||
class UTF16Buffer {
|
||||
// ---------------------------------------------------------------------
|
||||
// Buffered stream of characters, using an internal UC16 buffer.
|
||||
|
||||
class UC16CharacterStream {
|
||||
public:
|
||||
UTF16Buffer();
|
||||
virtual ~UTF16Buffer() {}
|
||||
UC16CharacterStream() : pos_(0) { }
|
||||
virtual ~UC16CharacterStream() { }
|
||||
|
||||
virtual void PushBack(uc32 ch) = 0;
|
||||
// Returns a value < 0 when the buffer end is reached.
|
||||
virtual uc32 Advance() = 0;
|
||||
virtual void SeekForward(int pos) = 0;
|
||||
// Returns and advances past the next UC16 character in the input
|
||||
// stream. If there are no more characters, it returns a negative
|
||||
// value.
|
||||
inline int32_t Advance() {
|
||||
if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
|
||||
pos_++;
|
||||
return *(buffer_cursor_++);
|
||||
}
|
||||
// Note: currently the following increment is necessary to avoid a
|
||||
// parser problem! The scanner treats the final kEndOfInput as
|
||||
// a character with a position, and does math relative to that
|
||||
// position.
|
||||
pos_++;
|
||||
|
||||
int pos() const { return pos_; }
|
||||
return kEndOfInput;
|
||||
}
|
||||
|
||||
static const int kNoEndPosition = 1;
|
||||
// Return the current position in the character stream.
|
||||
// Starts at zero.
|
||||
inline unsigned pos() const { return pos_; }
|
||||
|
||||
// Skips forward past the next character_count UC16 characters
|
||||
// in the input, or until the end of input if that comes sooner.
|
||||
// Returns the number of characters actually skipped. If less
|
||||
// than character_count,
|
||||
inline unsigned SeekForward(unsigned character_count) {
|
||||
unsigned buffered_chars =
|
||||
static_cast<unsigned>(buffer_end_ - buffer_cursor_);
|
||||
if (character_count <= buffered_chars) {
|
||||
buffer_cursor_ += character_count;
|
||||
pos_ += character_count;
|
||||
return character_count;
|
||||
}
|
||||
return SlowSeekForward(character_count);
|
||||
}
|
||||
|
||||
// Pushes back the most recently read UC16 character, i.e.,
|
||||
// the value returned by the most recent call to Advance.
|
||||
// Must not be used right after calling SeekForward.
|
||||
virtual void PushBack(uc16 character) = 0;
|
||||
|
||||
protected:
|
||||
// Initial value of end_ before the input stream is initialized.
|
||||
static const int32_t kEndOfInput = -1;
|
||||
|
||||
int pos_; // Current position in the buffer.
|
||||
int end_; // Position where scanning should stop (EOF).
|
||||
// Ensures that the buffer_cursor_ points to the character at
|
||||
// position pos_ of the input, if possible. If the position
|
||||
// is at or after the end of the input, return false. If there
|
||||
// are more characters available, return true.
|
||||
virtual bool ReadBlock() = 0;
|
||||
virtual unsigned SlowSeekForward(unsigned character_count) = 0;
|
||||
|
||||
const uc16* buffer_cursor_;
|
||||
const uc16* buffer_end_;
|
||||
unsigned pos_;
|
||||
};
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Constants used by scanners.
|
||||
|
||||
class ScannerConstants : AllStatic {
|
||||
public:
|
||||
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
||||
@ -277,7 +321,7 @@ class Scanner {
|
||||
// Low-level scanning support.
|
||||
void Advance() { c0_ = source_->Advance(); }
|
||||
void PushBack(uc32 ch) {
|
||||
source_->PushBack(ch);
|
||||
source_->PushBack(c0_);
|
||||
c0_ = ch;
|
||||
}
|
||||
|
||||
@ -307,8 +351,8 @@ class Scanner {
|
||||
TokenDesc current_; // desc for current token (as returned by Next())
|
||||
TokenDesc next_; // desc for next token (one token look-ahead)
|
||||
|
||||
// Input stream. Must be initialized to an UTF16Buffer.
|
||||
UTF16Buffer* source_;
|
||||
// Input stream. Must be initialized to an UC16CharacterStream.
|
||||
UC16CharacterStream* source_;
|
||||
|
||||
// Buffer to hold literal values (identifiers, strings, numbers)
|
||||
// using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
|
||||
|
367
src/scanner.cc
367
src/scanner.cc
@ -36,63 +36,265 @@ namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// UTF16Buffer
|
||||
// BufferedUC16CharacterStreams
|
||||
|
||||
// CharacterStreamUTF16Buffer
|
||||
CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
|
||||
: pushback_buffer_(0), last_(0), stream_(NULL) { }
|
||||
BufferedUC16CharacterStream::BufferedUC16CharacterStream()
|
||||
: UC16CharacterStream(),
|
||||
pushback_limit_(NULL) {
|
||||
// Initialize buffer as being empty. First read will fill the buffer.
|
||||
buffer_cursor_ = buffer_;
|
||||
buffer_end_ = buffer_;
|
||||
}
|
||||
|
||||
BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { }
|
||||
|
||||
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
|
||||
unibrow::CharacterStream* input,
|
||||
int start_position,
|
||||
int end_position) {
|
||||
stream_ = input;
|
||||
if (start_position > 0) {
|
||||
SeekForward(start_position);
|
||||
void BufferedUC16CharacterStream::PushBack(uc16 character) {
|
||||
if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) {
|
||||
// buffer_ is writable, buffer_cursor_ is const pointer.
|
||||
buffer_[--buffer_cursor_ - buffer_] = character;
|
||||
pos_--;
|
||||
return;
|
||||
}
|
||||
end_ = end_position != kNoEndPosition ? end_position : kMaxInt;
|
||||
SlowPushBack(character);
|
||||
}
|
||||
|
||||
|
||||
void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
|
||||
pushback_buffer()->Add(last_);
|
||||
last_ = ch;
|
||||
void BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
|
||||
// In pushback mode, the end of the buffer contains pushback,
|
||||
// and the start of the buffer (from buffer start to pushback_limit_)
|
||||
// contains valid data that comes just after the pushback.
|
||||
// We NULL the pushback_limit_ if pushing all the way back to the
|
||||
// start of the buffer.
|
||||
|
||||
if (pushback_limit_ == NULL) {
|
||||
// Enter pushback mode.
|
||||
pushback_limit_ = buffer_end_;
|
||||
buffer_end_ = buffer_ + kBufferSize;
|
||||
buffer_cursor_ = buffer_end_;
|
||||
}
|
||||
ASSERT(pushback_limit_ > buffer_);
|
||||
ASSERT(pos_ > 0);
|
||||
buffer_[--buffer_cursor_ - buffer_] = character;
|
||||
if (buffer_cursor_ == buffer_) {
|
||||
pushback_limit_ = NULL;
|
||||
} else if (buffer_cursor_ < pushback_limit_) {
|
||||
pushback_limit_ = buffer_cursor_;
|
||||
}
|
||||
pos_--;
|
||||
}
|
||||
|
||||
|
||||
uc32 CharacterStreamUTF16Buffer::Advance() {
|
||||
ASSERT(end_ != kNoEndPosition);
|
||||
ASSERT(end_ >= 0);
|
||||
// NOTE: It is of importance to Persian / Farsi resources that we do
|
||||
// *not* strip format control characters in the scanner; see
|
||||
//
|
||||
// https://bugzilla.mozilla.org/show_bug.cgi?id=274152
|
||||
//
|
||||
// So, even though ECMA-262, section 7.1, page 11, dictates that we
|
||||
// must remove Unicode format-control characters, we do not. This is
|
||||
// in line with how IE and SpiderMonkey handles it.
|
||||
if (!pushback_buffer()->is_empty()) {
|
||||
pos_++;
|
||||
return last_ = pushback_buffer()->RemoveLast();
|
||||
} else if (stream_->has_more() && pos_ < end_) {
|
||||
pos_++;
|
||||
uc32 next = stream_->GetNext();
|
||||
return last_ = next;
|
||||
} else {
|
||||
// Note: currently the following increment is necessary to avoid a
|
||||
// test-parser problem!
|
||||
pos_++;
|
||||
return last_ = static_cast<uc32>(-1);
|
||||
bool BufferedUC16CharacterStream::ReadBlock() {
|
||||
if (pushback_limit_ != NULL) {
|
||||
buffer_cursor_ = buffer_;
|
||||
buffer_end_ = pushback_limit_;
|
||||
pushback_limit_ = NULL;
|
||||
ASSERT(buffer_cursor_ != buffer_end_);
|
||||
return true;
|
||||
}
|
||||
unsigned length = FillBuffer(pos_, kBufferSize);
|
||||
buffer_cursor_ = buffer_;
|
||||
buffer_end_ = buffer_ + length;
|
||||
return length > 0;
|
||||
}
|
||||
|
||||
|
||||
unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
|
||||
// Leave pushback mode (i.e., ignore that there might be valid data
|
||||
// in the buffer before the pushback_limit_ point).
|
||||
pushback_limit_ = NULL;
|
||||
return BufferSeekForward(delta);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// GenericStringUC16CharacterStream
|
||||
|
||||
|
||||
GenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
|
||||
Handle<String> data,
|
||||
unsigned start_position,
|
||||
unsigned end_position)
|
||||
: string_(data),
|
||||
length_(end_position) {
|
||||
ASSERT(end_position >= start_position);
|
||||
buffer_cursor_ = buffer_;
|
||||
buffer_end_ = buffer_;
|
||||
pos_ = start_position;
|
||||
}
|
||||
|
||||
|
||||
GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { }
|
||||
|
||||
|
||||
unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned old_pos = pos_;
|
||||
pos_ = Min(pos_ + delta, length_);
|
||||
ReadBlock();
|
||||
return pos_ - old_pos;
|
||||
}
|
||||
|
||||
|
||||
unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
|
||||
unsigned length) {
|
||||
if (from_pos >= length_) return 0;
|
||||
if (from_pos + length > length_) {
|
||||
length = length_ - from_pos;
|
||||
}
|
||||
String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Utf8ToUC16CharacterStream
|
||||
Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
|
||||
unsigned length)
|
||||
: BufferedUC16CharacterStream(),
|
||||
raw_data_(data),
|
||||
raw_data_length_(length),
|
||||
raw_data_pos_(0),
|
||||
raw_character_position_(0) {
|
||||
ReadBlock();
|
||||
}
|
||||
|
||||
|
||||
Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { }
|
||||
|
||||
|
||||
unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned old_pos = pos_;
|
||||
unsigned target_pos = pos_ + delta;
|
||||
SetRawPosition(target_pos);
|
||||
pos_ = raw_character_position_;
|
||||
ReadBlock();
|
||||
return pos_ - old_pos;
|
||||
}
|
||||
|
||||
|
||||
unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
|
||||
unsigned length) {
|
||||
static const unibrow::uchar kMaxUC16Character = 0xffff;
|
||||
SetRawPosition(char_position);
|
||||
if (raw_character_position_ != char_position) {
|
||||
// char_position was not a valid position in the stream (hit the end
|
||||
// while spooling to it).
|
||||
return 0u;
|
||||
}
|
||||
unsigned i = 0;
|
||||
while (i < length) {
|
||||
if (raw_data_pos_ == raw_data_length_) break;
|
||||
unibrow::uchar c = raw_data_[raw_data_pos_];
|
||||
if (c <= unibrow::Utf8::kMaxOneByteChar) {
|
||||
raw_data_pos_++;
|
||||
} else {
|
||||
c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
|
||||
raw_data_length_ - raw_data_pos_,
|
||||
&raw_data_pos_);
|
||||
// Don't allow characters outside of the BMP.
|
||||
if (c > kMaxUC16Character) {
|
||||
c = unibrow::Utf8::kBadChar;
|
||||
}
|
||||
}
|
||||
buffer_[i++] = static_cast<uc16>(c);
|
||||
}
|
||||
raw_character_position_ = char_position + i;
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
static const byte kUtf8MultiByteMask = 0xC0;
|
||||
static const byte kUtf8MultiByteCharStart = 0xC0;
|
||||
static const byte kUtf8MultiByteCharFollower = 0x80;
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
static bool IsUtf8MultiCharacterStart(byte first_byte) {
|
||||
return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static bool IsUtf8MultiCharacterFollower(byte later_byte) {
|
||||
return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
|
||||
}
|
||||
|
||||
|
||||
// Move the cursor back to point at the preceding UTF-8 character start
|
||||
// in the buffer.
|
||||
static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
|
||||
byte character = buffer[--*cursor];
|
||||
if (character > unibrow::Utf8::kMaxOneByteChar) {
|
||||
ASSERT(IsUtf8MultiCharacterFollower(character));
|
||||
// Last byte of a multi-byte character encoding. Step backwards until
|
||||
// pointing to the first byte of the encoding, recognized by having the
|
||||
// top two bits set.
|
||||
while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
|
||||
ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CharacterStreamUTF16Buffer::SeekForward(int pos) {
|
||||
pos_ = pos;
|
||||
ASSERT(pushback_buffer()->is_empty());
|
||||
stream_->Seek(pos);
|
||||
// Move the cursor forward to point at the next following UTF-8 character start
|
||||
// in the buffer.
|
||||
static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
|
||||
byte character = buffer[(*cursor)++];
|
||||
if (character > unibrow::Utf8::kMaxOneByteChar) {
|
||||
// First character of a multi-byte character encoding.
|
||||
// The number of most-significant one-bits determines the length of the
|
||||
// encoding:
|
||||
// 110..... - (0xCx, 0xDx) one additional byte (minimum).
|
||||
// 1110.... - (0xEx) two additional bytes.
|
||||
// 11110... - (0xFx) three additional bytes (maximum).
|
||||
ASSERT(IsUtf8MultiCharacterStart(character));
|
||||
// Additional bytes is:
|
||||
// 1 if value in range 0xC0 .. 0xDF.
|
||||
// 2 if value in range 0xE0 .. 0xEF.
|
||||
// 3 if value in range 0xF0 .. 0xF7.
|
||||
// Encode that in a single value.
|
||||
unsigned additional_bytes =
|
||||
((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
|
||||
*cursor += additional_bytes;
|
||||
ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
|
||||
if (raw_character_position_ > target_position) {
|
||||
// Spool backwards in utf8 buffer.
|
||||
do {
|
||||
Utf8CharacterBack(raw_data_, &raw_data_pos_);
|
||||
raw_character_position_--;
|
||||
} while (raw_character_position_ > target_position);
|
||||
return;
|
||||
}
|
||||
// Spool forwards in the utf8 buffer.
|
||||
while (raw_character_position_ < target_position) {
|
||||
if (raw_data_pos_ == raw_data_length_) return;
|
||||
Utf8CharacterForward(raw_data_, &raw_data_pos_);
|
||||
raw_character_position_++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// ExternalTwoByteStringUC16CharacterStream
|
||||
|
||||
ExternalTwoByteStringUC16CharacterStream::
|
||||
~ExternalTwoByteStringUC16CharacterStream() { }
|
||||
|
||||
|
||||
ExternalTwoByteStringUC16CharacterStream
|
||||
::ExternalTwoByteStringUC16CharacterStream(
|
||||
Handle<ExternalTwoByteString> data,
|
||||
int start_position,
|
||||
int end_position)
|
||||
: UC16CharacterStream(),
|
||||
source_(data),
|
||||
raw_data_(data->GetTwoByteData(start_position)) {
|
||||
buffer_cursor_ = raw_data_,
|
||||
buffer_end_ = raw_data_ + (end_position - start_position);
|
||||
pos_ = start_position;
|
||||
}
|
||||
|
||||
|
||||
@ -115,15 +317,19 @@ void Scanner::LiteralScope::Complete() {
|
||||
complete_ = true;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// V8JavaScriptScanner
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
V8JavaScriptScanner::V8JavaScriptScanner() : JavaScriptScanner() { }
|
||||
|
||||
|
||||
void V8JavaScriptScanner::Initialize(UC16CharacterStream* source,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, NULL, 0, source->length());
|
||||
source_ = source;
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
// Need to capture identifiers in order to recognize "get" and "set"
|
||||
// in object literals.
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
@ -133,79 +339,14 @@ void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
}
|
||||
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, stream,
|
||||
0, UTF16Buffer::kNoEndPosition);
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
has_line_terminator_before_next_ = true;
|
||||
SkipWhiteSpace();
|
||||
Scan();
|
||||
}
|
||||
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
int start_position,
|
||||
int end_position,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, NULL,
|
||||
start_position, end_position);
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
has_line_terminator_before_next_ = true;
|
||||
SkipWhiteSpace();
|
||||
Scan();
|
||||
}
|
||||
|
||||
|
||||
UTF16Buffer* StreamInitializer::Init(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
int start_position,
|
||||
int end_position) {
|
||||
// Either initialize the scanner from a character stream or from a
|
||||
// string.
|
||||
ASSERT(source.is_null() || stream == NULL);
|
||||
|
||||
// Initialize the source buffer.
|
||||
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
|
||||
two_byte_string_buffer_.Initialize(
|
||||
Handle<ExternalTwoByteString>::cast(source),
|
||||
start_position,
|
||||
end_position);
|
||||
return &two_byte_string_buffer_;
|
||||
} else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {
|
||||
ascii_string_buffer_.Initialize(
|
||||
Handle<ExternalAsciiString>::cast(source),
|
||||
start_position,
|
||||
end_position);
|
||||
return &ascii_string_buffer_;
|
||||
} else {
|
||||
if (!source.is_null()) {
|
||||
safe_string_input_buffer_.Reset(source.location());
|
||||
stream = &safe_string_input_buffer_;
|
||||
}
|
||||
char_stream_buffer_.Initialize(source,
|
||||
stream,
|
||||
start_position,
|
||||
end_position);
|
||||
return &char_stream_buffer_;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// JsonScanner
|
||||
|
||||
JsonScanner::JsonScanner() {}
|
||||
JsonScanner::JsonScanner() : Scanner() { }
|
||||
|
||||
|
||||
void JsonScanner::Initialize(Handle<String> source) {
|
||||
source_ = stream_initializer_.Init(source, NULL, 0, source->length());
|
||||
void JsonScanner::Initialize(UC16CharacterStream* source) {
|
||||
source_ = source;
|
||||
Init();
|
||||
// Skip initial whitespace.
|
||||
SkipJsonWhiteSpace();
|
||||
|
199
src/scanner.h
199
src/scanner.h
@ -35,67 +35,97 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// UTF16 buffer to read characters from a character stream.
|
||||
class CharacterStreamUTF16Buffer: public UTF16Buffer {
|
||||
// A buffered character stream based on a random access character
|
||||
// source (ReadBlock can be called with pos_ pointing to any position,
|
||||
// even positions before the current).
|
||||
class BufferedUC16CharacterStream: public UC16CharacterStream {
|
||||
public:
|
||||
CharacterStreamUTF16Buffer();
|
||||
virtual ~CharacterStreamUTF16Buffer() {}
|
||||
void Initialize(Handle<String> data,
|
||||
unibrow::CharacterStream* stream,
|
||||
int start_position,
|
||||
int end_position);
|
||||
virtual void PushBack(uc32 ch);
|
||||
virtual uc32 Advance();
|
||||
virtual void SeekForward(int pos);
|
||||
BufferedUC16CharacterStream();
|
||||
virtual ~BufferedUC16CharacterStream();
|
||||
|
||||
private:
|
||||
List<uc32> pushback_buffer_;
|
||||
uc32 last_;
|
||||
unibrow::CharacterStream* stream_;
|
||||
virtual void PushBack(uc16 character);
|
||||
|
||||
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
|
||||
protected:
|
||||
static const unsigned kBufferSize = 512;
|
||||
static const unsigned kPushBackStepSize = 16;
|
||||
|
||||
virtual unsigned SlowSeekForward(unsigned delta);
|
||||
virtual bool ReadBlock();
|
||||
virtual void SlowPushBack(uc16 character);
|
||||
|
||||
virtual unsigned BufferSeekForward(unsigned delta) = 0;
|
||||
virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
|
||||
|
||||
const uc16* pushback_limit_;
|
||||
uc16 buffer_[kBufferSize];
|
||||
};
|
||||
|
||||
|
||||
// Generic string stream.
|
||||
class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
public:
|
||||
GenericStringUC16CharacterStream(Handle<String> data,
|
||||
unsigned start_position,
|
||||
unsigned end_position);
|
||||
virtual ~GenericStringUC16CharacterStream();
|
||||
|
||||
protected:
|
||||
virtual unsigned BufferSeekForward(unsigned delta);
|
||||
virtual unsigned FillBuffer(unsigned position, unsigned length);
|
||||
|
||||
Handle<String> string_;
|
||||
unsigned start_position_;
|
||||
unsigned length_;
|
||||
};
|
||||
|
||||
|
||||
// UC16 stream based on a literal UTF-8 string.
|
||||
class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
public:
|
||||
Utf8ToUC16CharacterStream(const byte* data, unsigned length);
|
||||
virtual ~Utf8ToUC16CharacterStream();
|
||||
|
||||
protected:
|
||||
virtual unsigned BufferSeekForward(unsigned delta);
|
||||
virtual unsigned FillBuffer(unsigned char_position, unsigned length);
|
||||
void SetRawPosition(unsigned char_position);
|
||||
|
||||
const byte* raw_data_;
|
||||
unsigned raw_data_length_; // Measured in bytes, not characters.
|
||||
unsigned raw_data_pos_;
|
||||
// The character position of the character at raw_data[raw_data_pos_].
|
||||
// Not necessarily the same as pos_.
|
||||
unsigned raw_character_position_;
|
||||
};
|
||||
|
||||
|
||||
// UTF16 buffer to read characters from an external string.
|
||||
template <typename StringType, typename CharType>
|
||||
class ExternalStringUTF16Buffer: public UTF16Buffer {
|
||||
class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
|
||||
public:
|
||||
ExternalStringUTF16Buffer();
|
||||
virtual ~ExternalStringUTF16Buffer() {}
|
||||
void Initialize(Handle<StringType> data,
|
||||
int start_position,
|
||||
int end_position);
|
||||
virtual void PushBack(uc32 ch);
|
||||
virtual uc32 Advance();
|
||||
virtual void SeekForward(int pos);
|
||||
ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
|
||||
int start_position,
|
||||
int end_position);
|
||||
virtual ~ExternalTwoByteStringUC16CharacterStream();
|
||||
|
||||
private:
|
||||
const CharType* raw_data_; // Pointer to the actual array of characters.
|
||||
virtual void PushBack(uc16 character) {
|
||||
ASSERT(buffer_cursor_ > raw_data_);
|
||||
buffer_cursor_--;
|
||||
pos_--;
|
||||
}
|
||||
protected:
|
||||
virtual unsigned SlowSeekForward(unsigned delta) {
|
||||
// Fast case always handles seeking.
|
||||
return 0;
|
||||
}
|
||||
virtual bool ReadBlock() {
|
||||
// Entire string is read at start.
|
||||
return false;
|
||||
}
|
||||
Handle<ExternalTwoByteString> source_;
|
||||
const uc16* raw_data_; // Pointer to the actual array of characters.
|
||||
};
|
||||
|
||||
|
||||
// Initializes a UTF16Buffer as input stream, using one of a number
|
||||
// of strategies depending on the available character sources.
|
||||
class StreamInitializer {
|
||||
public:
|
||||
UTF16Buffer* Init(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
int start_position,
|
||||
int end_position);
|
||||
private:
|
||||
// Different UTF16 buffers used to pull characters from. Based on input one of
|
||||
// these will be initialized as the actual data source.
|
||||
CharacterStreamUTF16Buffer char_stream_buffer_;
|
||||
ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
|
||||
two_byte_string_buffer_;
|
||||
ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
|
||||
|
||||
// Used to convert the source string into a character stream when a stream
|
||||
// is not passed to the scanner.
|
||||
SafeStringInputBuffer safe_string_input_buffer_;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// V8JavaScriptScanner
|
||||
// JavaScript scanner getting its input from either a V8 String or a unicode
|
||||
@ -103,19 +133,9 @@ class StreamInitializer {
|
||||
|
||||
class V8JavaScriptScanner : public JavaScriptScanner {
|
||||
public:
|
||||
V8JavaScriptScanner() {}
|
||||
|
||||
// Initialize the Scanner to scan source.
|
||||
void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
|
||||
void Initialize(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
V8JavaScriptScanner();
|
||||
void Initialize(UC16CharacterStream* source,
|
||||
int literal_flags = kAllLiterals);
|
||||
void Initialize(Handle<String> source,
|
||||
int start_position, int end_position,
|
||||
int literal_flags = kAllLiterals);
|
||||
|
||||
protected:
|
||||
StreamInitializer stream_initializer_;
|
||||
};
|
||||
|
||||
|
||||
@ -123,8 +143,7 @@ class JsonScanner : public Scanner {
|
||||
public:
|
||||
JsonScanner();
|
||||
|
||||
// Initialize the Scanner to scan source.
|
||||
void Initialize(Handle<String> source);
|
||||
void Initialize(UC16CharacterStream* source);
|
||||
|
||||
// Returns the next token.
|
||||
Token::Value Next();
|
||||
@ -138,7 +157,7 @@ class JsonScanner : public Scanner {
|
||||
// Recognizes all of the single-character tokens directly, or calls a function
|
||||
// to scan a number, string or identifier literal.
|
||||
// The only allowed whitespace characters between tokens are tab,
|
||||
// carrige-return, newline and space.
|
||||
// carriage-return, newline and space.
|
||||
void ScanJson();
|
||||
|
||||
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
|
||||
@ -159,60 +178,8 @@ class JsonScanner : public Scanner {
|
||||
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
|
||||
// JSONNullLiteral).
|
||||
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
|
||||
|
||||
StreamInitializer stream_initializer_;
|
||||
};
|
||||
|
||||
|
||||
// ExternalStringUTF16Buffer
|
||||
template <typename StringType, typename CharType>
|
||||
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
|
||||
: raw_data_(NULL) { }
|
||||
|
||||
|
||||
template <typename StringType, typename CharType>
|
||||
void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
|
||||
Handle<StringType> data,
|
||||
int start_position,
|
||||
int end_position) {
|
||||
ASSERT(!data.is_null());
|
||||
raw_data_ = data->resource()->data();
|
||||
|
||||
ASSERT(end_position <= data->length());
|
||||
if (start_position > 0) {
|
||||
SeekForward(start_position);
|
||||
}
|
||||
end_ =
|
||||
end_position != kNoEndPosition ? end_position : data->length();
|
||||
}
|
||||
|
||||
|
||||
template <typename StringType, typename CharType>
|
||||
uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
|
||||
if (pos_ < end_) {
|
||||
return raw_data_[pos_++];
|
||||
} else {
|
||||
// note: currently the following increment is necessary to avoid a
|
||||
// test-parser problem!
|
||||
pos_++;
|
||||
return static_cast<uc32>(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename StringType, typename CharType>
|
||||
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
|
||||
pos_--;
|
||||
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
|
||||
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
|
||||
}
|
||||
|
||||
|
||||
template <typename StringType, typename CharType>
|
||||
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
|
||||
pos_ = pos;
|
||||
}
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_SCANNER_H_
|
||||
|
@ -260,10 +260,12 @@ TEST(StandAlonePreParser) {
|
||||
uintptr_t stack_limit = i::StackGuard::real_climit();
|
||||
for (int i = 0; programs[i]; i++) {
|
||||
const char* program = programs[i];
|
||||
unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::CompleteParserRecorder log;
|
||||
i::V8JavaScriptScanner scanner;
|
||||
scanner.Initialize(i::Handle<i::String>::null(), &stream);
|
||||
scanner.Initialize(&stream);
|
||||
|
||||
v8::preparser::PreParser::PreParseResult result =
|
||||
v8::preparser::PreParser::PreParseProgram(&scanner,
|
||||
@ -289,9 +291,10 @@ TEST(RegressChromium62639) {
|
||||
// and then used the invalid currently scanned literal. This always
|
||||
// failed in debug mode, and sometimes crashed in release mode.
|
||||
|
||||
unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
|
||||
i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::ScriptDataImpl* data =
|
||||
i::ParserApi::PreParse(i::Handle<i::String>::null(), &stream, NULL);
|
||||
i::ParserApi::PreParse(&stream, NULL);
|
||||
CHECK(data->HasError());
|
||||
delete data;
|
||||
}
|
||||
@ -310,10 +313,10 @@ TEST(Regress928) {
|
||||
"try { } catch (e) { var foo = function () { /* first */ } }"
|
||||
"var bar = function () { /* second */ }";
|
||||
|
||||
unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
|
||||
i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::ScriptDataImpl* data =
|
||||
i::ParserApi::PartialPreParse(i::Handle<i::String>::null(),
|
||||
&stream, NULL);
|
||||
i::ParserApi::PartialPreParse(&stream, NULL);
|
||||
CHECK(!data->HasError());
|
||||
|
||||
data->Initialize();
|
||||
@ -347,10 +350,12 @@ TEST(PreParseOverflow) {
|
||||
|
||||
uintptr_t stack_limit = i::StackGuard::real_climit();
|
||||
|
||||
unibrow::Utf8InputBuffer<256> stream(*program, strlen(*program));
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(*program),
|
||||
static_cast<unsigned>(kProgramSize));
|
||||
i::CompleteParserRecorder log;
|
||||
i::V8JavaScriptScanner scanner;
|
||||
scanner.Initialize(i::Handle<i::String>::null(), &stream);
|
||||
scanner.Initialize(&stream);
|
||||
|
||||
|
||||
v8::preparser::PreParser::PreParseResult result =
|
||||
@ -360,3 +365,283 @@ TEST(PreParseOverflow) {
|
||||
stack_limit);
|
||||
CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
|
||||
}
|
||||
|
||||
|
||||
class TestExternalResource: public v8::String::ExternalStringResource {
|
||||
public:
|
||||
explicit TestExternalResource(uint16_t* data, int length)
|
||||
: data_(data), length_(static_cast<size_t>(length)) { }
|
||||
|
||||
~TestExternalResource() { }
|
||||
|
||||
const uint16_t* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t length() const {
|
||||
return length_;
|
||||
}
|
||||
private:
|
||||
uint16_t* data_;
|
||||
size_t length_;
|
||||
};
|
||||
|
||||
|
||||
#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
|
||||
|
||||
void TestCharacterStream(const char* ascii_source,
|
||||
unsigned length,
|
||||
unsigned start = 0,
|
||||
unsigned end = 0) {
|
||||
if (end == 0) end = length;
|
||||
unsigned sub_length = end - start;
|
||||
i::HandleScope test_scope;
|
||||
i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
|
||||
}
|
||||
i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
|
||||
i::Handle<i::String> ascii_string(
|
||||
i::Factory::NewStringFromAscii(ascii_vector));
|
||||
TestExternalResource resource(*uc16_buffer, length);
|
||||
i::Handle<i::String> uc16_string(
|
||||
i::Factory::NewExternalStringFromTwoByte(&resource));
|
||||
|
||||
i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
|
||||
i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
|
||||
i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
|
||||
i::Utf8ToUC16CharacterStream utf8_stream(
|
||||
reinterpret_cast<const i::byte*>(ascii_source), end);
|
||||
utf8_stream.SeekForward(start);
|
||||
|
||||
unsigned i = start;
|
||||
while (i < end) {
|
||||
// Read streams one char at a time
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
int32_t c0 = ascii_source[i];
|
||||
int32_t c1 = uc16_stream.Advance();
|
||||
int32_t c2 = string_stream.Advance();
|
||||
int32_t c3 = utf8_stream.Advance();
|
||||
i++;
|
||||
CHECK_EQ(c0, c1);
|
||||
CHECK_EQ(c0, c2);
|
||||
CHECK_EQ(c0, c3);
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
}
|
||||
while (i > start + sub_length / 4) {
|
||||
// Pushback, re-read, pushback again.
|
||||
int32_t c0 = ascii_source[i - 1];
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
uc16_stream.PushBack(c0);
|
||||
string_stream.PushBack(c0);
|
||||
utf8_stream.PushBack(c0);
|
||||
i--;
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
int32_t c1 = uc16_stream.Advance();
|
||||
int32_t c2 = string_stream.Advance();
|
||||
int32_t c3 = utf8_stream.Advance();
|
||||
i++;
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
CHECK_EQ(c0, c1);
|
||||
CHECK_EQ(c0, c2);
|
||||
CHECK_EQ(c0, c3);
|
||||
uc16_stream.PushBack(c0);
|
||||
string_stream.PushBack(c0);
|
||||
utf8_stream.PushBack(c0);
|
||||
i--;
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
}
|
||||
unsigned halfway = start + sub_length / 2;
|
||||
uc16_stream.SeekForward(halfway - i);
|
||||
string_stream.SeekForward(halfway - i);
|
||||
utf8_stream.SeekForward(halfway - i);
|
||||
i = halfway;
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
|
||||
while (i < end) {
|
||||
// Read streams one char at a time
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
int32_t c0 = ascii_source[i];
|
||||
int32_t c1 = uc16_stream.Advance();
|
||||
int32_t c2 = string_stream.Advance();
|
||||
int32_t c3 = utf8_stream.Advance();
|
||||
i++;
|
||||
CHECK_EQ(c0, c1);
|
||||
CHECK_EQ(c0, c2);
|
||||
CHECK_EQ(c0, c3);
|
||||
CHECK_EQU(i, uc16_stream.pos());
|
||||
CHECK_EQU(i, string_stream.pos());
|
||||
CHECK_EQU(i, utf8_stream.pos());
|
||||
}
|
||||
|
||||
int32_t c1 = uc16_stream.Advance();
|
||||
int32_t c2 = string_stream.Advance();
|
||||
int32_t c3 = utf8_stream.Advance();
|
||||
CHECK_LT(c1, 0);
|
||||
CHECK_LT(c2, 0);
|
||||
CHECK_LT(c3, 0);
|
||||
}
|
||||
|
||||
|
||||
TEST(CharacterStreams) {
|
||||
v8::HandleScope handles;
|
||||
v8::Persistent<v8::Context> context = v8::Context::New();
|
||||
v8::Context::Scope context_scope(context);
|
||||
|
||||
TestCharacterStream("abc\0\n\r\x7f", 7);
|
||||
static const unsigned kBigStringSize = 4096;
|
||||
char buffer[kBigStringSize + 1];
|
||||
for (unsigned i = 0; i < kBigStringSize; i++) {
|
||||
buffer[i] = static_cast<char>(i & 0x7f);
|
||||
}
|
||||
TestCharacterStream(buffer, kBigStringSize);
|
||||
|
||||
TestCharacterStream(buffer, kBigStringSize, 576, 3298);
|
||||
|
||||
TestCharacterStream("\0", 1);
|
||||
TestCharacterStream("", 0);
|
||||
}
|
||||
|
||||
|
||||
TEST(Utf8CharacterStream) {
|
||||
static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
|
||||
static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
|
||||
|
||||
static const int kAllUtf8CharsSize =
|
||||
(unibrow::Utf8::kMaxOneByteChar + 1) +
|
||||
(unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
|
||||
(unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
|
||||
static const unsigned kAllUtf8CharsSizeU =
|
||||
static_cast<unsigned>(kAllUtf8CharsSize);
|
||||
|
||||
char buffer[kAllUtf8CharsSizeU];
|
||||
unsigned cursor = 0;
|
||||
for (int i = 0; i <= kMaxUC16Char; i++) {
|
||||
cursor += unibrow::Utf8::Encode(buffer + cursor, i);
|
||||
}
|
||||
ASSERT(cursor == kAllUtf8CharsSizeU);
|
||||
|
||||
i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
|
||||
kAllUtf8CharsSizeU);
|
||||
for (int i = 0; i <= kMaxUC16Char; i++) {
|
||||
CHECK_EQU(i, stream.pos());
|
||||
int32_t c = stream.Advance();
|
||||
CHECK_EQ(i, c);
|
||||
CHECK_EQU(i + 1, stream.pos());
|
||||
}
|
||||
for (int i = kMaxUC16Char; i >= 0; i--) {
|
||||
CHECK_EQU(i + 1, stream.pos());
|
||||
stream.PushBack(i);
|
||||
CHECK_EQU(i, stream.pos());
|
||||
}
|
||||
int i = 0;
|
||||
while (stream.pos() < kMaxUC16CharU) {
|
||||
CHECK_EQU(i, stream.pos());
|
||||
unsigned progress = stream.SeekForward(12);
|
||||
i += progress;
|
||||
int32_t c = stream.Advance();
|
||||
if (i <= kMaxUC16Char) {
|
||||
CHECK_EQ(i, c);
|
||||
} else {
|
||||
CHECK_EQ(-1, c);
|
||||
}
|
||||
i += 1;
|
||||
CHECK_EQU(i, stream.pos());
|
||||
}
|
||||
}
|
||||
|
||||
#undef CHECK_EQU
|
||||
|
||||
void TestStreamScanner(i::UC16CharacterStream* stream,
|
||||
i::Token::Value* expected_tokens,
|
||||
int skip_pos = 0, // Zero means not skipping.
|
||||
int skip_to = 0) {
|
||||
i::V8JavaScriptScanner scanner;
|
||||
scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals);
|
||||
|
||||
int i = 0;
|
||||
do {
|
||||
i::Token::Value expected = expected_tokens[i];
|
||||
i::Token::Value actual = scanner.Next();
|
||||
CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
|
||||
if (scanner.location().end_pos == skip_pos) {
|
||||
scanner.SeekForward(skip_to);
|
||||
}
|
||||
i++;
|
||||
} while (expected_tokens[i] != i::Token::ILLEGAL);
|
||||
}
|
||||
|
||||
TEST(StreamScanner) {
|
||||
const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
|
||||
i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
|
||||
static_cast<unsigned>(strlen(str1)));
|
||||
i::Token::Value expectations1[] = {
|
||||
i::Token::LBRACE,
|
||||
i::Token::IDENTIFIER,
|
||||
i::Token::IDENTIFIER,
|
||||
i::Token::FOR,
|
||||
i::Token::COLON,
|
||||
i::Token::MUL,
|
||||
i::Token::DIV,
|
||||
i::Token::LT,
|
||||
i::Token::SUB,
|
||||
i::Token::IDENTIFIER,
|
||||
i::Token::EOS,
|
||||
i::Token::ILLEGAL
|
||||
};
|
||||
TestStreamScanner(&stream1, expectations1, 0, 0);
|
||||
|
||||
const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
|
||||
i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
|
||||
static_cast<unsigned>(strlen(str2)));
|
||||
i::Token::Value expectations2[] = {
|
||||
i::Token::CASE,
|
||||
i::Token::DEFAULT,
|
||||
i::Token::CONST,
|
||||
i::Token::LBRACE,
|
||||
// Skipped part here
|
||||
i::Token::RBRACE,
|
||||
i::Token::DO,
|
||||
i::Token::EOS,
|
||||
i::Token::ILLEGAL
|
||||
};
|
||||
ASSERT_EQ('{', str2[19]);
|
||||
ASSERT_EQ('}', str2[37]);
|
||||
TestStreamScanner(&stream2, expectations2, 20, 37);
|
||||
|
||||
const char* str3 = "{}}}}";
|
||||
i::Token::Value expectations3[] = {
|
||||
i::Token::LBRACE,
|
||||
i::Token::RBRACE,
|
||||
i::Token::RBRACE,
|
||||
i::Token::RBRACE,
|
||||
i::Token::RBRACE,
|
||||
i::Token::EOS,
|
||||
i::Token::ILLEGAL
|
||||
};
|
||||
// Skip zero-four RBRACEs.
|
||||
for (int i = 0; i <= 4; i++) {
|
||||
expectations3[6 - i] = i::Token::ILLEGAL;
|
||||
expectations3[5 - i] = i::Token::EOS;
|
||||
i::Utf8ToUC16CharacterStream stream3(
|
||||
reinterpret_cast<const i::byte*>(str3),
|
||||
static_cast<unsigned>(strlen(str3)));
|
||||
TestStreamScanner(&stream3, expectations3, 1, 1 + i);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user