Merge preparser Scanner with main JavaScript scanner.
Optimize scanning of keywords. Review URL: http://codereview.chromium.org/5188009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5858 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
5205fd4417
commit
afbbf485fb
@ -37,7 +37,6 @@
|
||||
#include "parser.h"
|
||||
#include "platform.h"
|
||||
#include "preparser.h"
|
||||
#include "prescanner.h"
|
||||
#include "runtime.h"
|
||||
#include "scopeinfo.h"
|
||||
#include "string-stream.h"
|
||||
@ -4637,12 +4636,15 @@ int ScriptDataImpl::ReadNumber(byte** source) {
|
||||
}
|
||||
|
||||
|
||||
static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
|
||||
// Create a Scanner for the preparser to use as input, and preparse the source.
|
||||
static ScriptDataImpl* DoPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
bool allow_lazy,
|
||||
PartialParserRecorder* recorder) {
|
||||
preparser::Scanner scanner;
|
||||
scanner.Initialize(stream);
|
||||
preparser::PreParser<preparser::Scanner, PartialParserRecorder> preparser;
|
||||
PartialParserRecorder* recorder,
|
||||
int literal_flags) {
|
||||
V8JavaScriptScanner scanner;
|
||||
scanner.Initialize(source, stream, literal_flags);
|
||||
preparser::PreParser<JavaScriptScanner, PartialParserRecorder> preparser;
|
||||
if (!preparser.PreParseProgram(&scanner, recorder, allow_lazy)) {
|
||||
Top::StackOverflow();
|
||||
return NULL;
|
||||
@ -4655,44 +4657,11 @@ static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
|
||||
}
|
||||
|
||||
|
||||
// Create an UTF16Buffer for the preparser to use as input,
|
||||
// and preparse the source.
|
||||
static ScriptDataImpl* DoPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
bool allow_lazy,
|
||||
PartialParserRecorder* recorder) {
|
||||
if (source.is_null()) {
|
||||
CharacterStreamUTF16Buffer buffer;
|
||||
int length = stream->Length();
|
||||
buffer.Initialize(source, stream, 0, length);
|
||||
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||
} else if (source->IsExternalAsciiString()) {
|
||||
ExternalStringUTF16Buffer<ExternalAsciiString, char> buffer;
|
||||
int length = source->length();
|
||||
buffer.Initialize(Handle<ExternalAsciiString>::cast(source), 0, length);
|
||||
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||
} else if (source->IsExternalTwoByteString()) {
|
||||
ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> buffer;
|
||||
int length = source->length();
|
||||
buffer.Initialize(Handle<ExternalTwoByteString>::cast(source), 0, length);
|
||||
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||
} else {
|
||||
CharacterStreamUTF16Buffer buffer;
|
||||
SafeStringInputBuffer input;
|
||||
input.Reset(0, source.location());
|
||||
int length = source->length();
|
||||
buffer.Initialize(source, &input, 0, length);
|
||||
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Preparse, but only collect data that is immediately useful,
|
||||
// even if the preparser data is only used once.
|
||||
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
v8::Extension* extension) {
|
||||
Handle<Script> no_script;
|
||||
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
||||
if (!allow_lazy) {
|
||||
// Partial preparsing is only about lazily compiled functions.
|
||||
@ -4701,7 +4670,8 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||
}
|
||||
PartialParserRecorder recorder;
|
||||
|
||||
return DoPreParse(source, stream, allow_lazy, &recorder);
|
||||
return DoPreParse(source, stream, allow_lazy, &recorder,
|
||||
JavaScriptScanner::kNoLiterals);
|
||||
}
|
||||
|
||||
|
||||
@ -4711,7 +4681,10 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
|
||||
Handle<Script> no_script;
|
||||
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
||||
CompleteParserRecorder recorder;
|
||||
return DoPreParse(source, stream, allow_lazy, &recorder);
|
||||
int kPreParseLiteralsFlags =
|
||||
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
|
||||
return DoPreParse(source, stream, allow_lazy,
|
||||
&recorder, kPreParseLiteralsFlags);
|
||||
}
|
||||
|
||||
|
||||
|
1054
src/prescanner.h
1054
src/prescanner.h
File diff suppressed because it is too large
Load Diff
@ -480,7 +480,7 @@ void JavaScriptScanner::Scan() {
|
||||
|
||||
default:
|
||||
if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
|
||||
token = ScanIdentifier();
|
||||
token = ScanIdentifierOrKeyword();
|
||||
} else if (IsDecimalDigit(c0_)) {
|
||||
token = ScanNumber(false);
|
||||
} else if (SkipWhiteSpace()) {
|
||||
@ -559,7 +559,7 @@ Token::Value JavaScriptScanner::ScanString() {
|
||||
uc32 quote = c0_;
|
||||
Advance(); // consume quote
|
||||
|
||||
LiteralScope literal(this);
|
||||
LiteralScope literal(this, kLiteralString);
|
||||
while (c0_ != quote && c0_ >= 0
|
||||
&& !ScannerConstants::kIsLineTerminator.get(c0_)) {
|
||||
uc32 c = c0_;
|
||||
@ -590,7 +590,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
|
||||
|
||||
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
|
||||
|
||||
LiteralScope literal(this);
|
||||
LiteralScope literal(this, kLiteralNumber);
|
||||
if (seen_period) {
|
||||
// we have already seen a decimal point of the float
|
||||
AddLiteralChar('.');
|
||||
@ -677,25 +677,44 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
|
||||
}
|
||||
|
||||
|
||||
Token::Value JavaScriptScanner::ScanIdentifier() {
|
||||
Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
|
||||
ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
|
||||
|
||||
LiteralScope literal(this);
|
||||
LiteralScope literal(this, kLiteralIdentifier);
|
||||
KeywordMatcher keyword_match;
|
||||
|
||||
// Scan identifier start character.
|
||||
if (c0_ == '\\') {
|
||||
uc32 c = ScanIdentifierUnicodeEscape();
|
||||
// Only allow legal identifier start characters.
|
||||
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
||||
AddLiteralChar(c);
|
||||
keyword_match.Fail();
|
||||
} else {
|
||||
AddLiteralChar(c0_);
|
||||
keyword_match.AddChar(c0_);
|
||||
Advance();
|
||||
return ScanIdentifierSuffix(&literal);
|
||||
}
|
||||
|
||||
uc32 first_char = c0_;
|
||||
Advance();
|
||||
AddLiteralChar(first_char);
|
||||
if (!keyword_match.AddChar(first_char)) {
|
||||
return ScanIdentifierSuffix(&literal);
|
||||
}
|
||||
|
||||
// Scan the rest of the identifier characters.
|
||||
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
|
||||
if (c0_ != '\\') {
|
||||
uc32 next_char = c0_;
|
||||
Advance();
|
||||
AddLiteralChar(next_char);
|
||||
if (keyword_match.AddChar(next_char)) continue;
|
||||
}
|
||||
// Fallthrough if no loner able to complete keyword.
|
||||
return ScanIdentifierSuffix(&literal);
|
||||
}
|
||||
literal.Complete();
|
||||
|
||||
return keyword_match.token();
|
||||
}
|
||||
|
||||
|
||||
Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
|
||||
// Scan the rest of the identifier characters.
|
||||
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
|
||||
if (c0_ == '\\') {
|
||||
@ -703,20 +722,17 @@ Token::Value JavaScriptScanner::ScanIdentifier() {
|
||||
// Only allow legal identifier part characters.
|
||||
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
||||
AddLiteralChar(c);
|
||||
keyword_match.Fail();
|
||||
} else {
|
||||
AddLiteralChar(c0_);
|
||||
keyword_match.AddChar(c0_);
|
||||
Advance();
|
||||
}
|
||||
}
|
||||
literal.Complete();
|
||||
literal->Complete();
|
||||
|
||||
return keyword_match.token();
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
|
||||
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
|
||||
bool in_character_class = false;
|
||||
@ -729,7 +745,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
|
||||
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
|
||||
// the scanner should pass uninterpreted bodies to the RegExp
|
||||
// constructor.
|
||||
LiteralScope literal(this);
|
||||
LiteralScope literal(this, kLiteralRegExp);
|
||||
if (seen_equal)
|
||||
AddLiteralChar('=');
|
||||
|
||||
@ -752,9 +768,10 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool JavaScriptScanner::ScanRegExpFlags() {
|
||||
// Scan regular expression flags.
|
||||
LiteralScope literal(this);
|
||||
LiteralScope literal(this, kLiteralRegExpFlags);
|
||||
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
|
||||
if (c0_ == '\\') {
|
||||
uc32 c = ScanIdentifierUnicodeEscape();
|
||||
@ -868,9 +885,7 @@ void KeywordMatcher::Step(unibrow::uchar input) {
|
||||
break;
|
||||
case IN:
|
||||
token_ = Token::IDENTIFIER;
|
||||
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
|
||||
return;
|
||||
}
|
||||
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
|
||||
break;
|
||||
case N:
|
||||
if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
|
||||
|
@ -327,6 +327,42 @@ class Scanner {
|
||||
|
||||
class JavaScriptScanner : public Scanner {
|
||||
public:
|
||||
|
||||
// Bit vector representing set of types of literals.
|
||||
enum LiteralType {
|
||||
kNoLiterals = 0,
|
||||
kLiteralNumber = 1,
|
||||
kLiteralIdentifier = 2,
|
||||
kLiteralString = 4,
|
||||
kLiteralRegExp = 8,
|
||||
kLiteralRegExpFlags = 16,
|
||||
kAllLiterals = 31
|
||||
};
|
||||
|
||||
// A LiteralScope that disables recording of some types of JavaScript
|
||||
// literals. If the scanner is configured to not record the specific
|
||||
// type of literal, the scope will not call StartLiteral.
|
||||
class LiteralScope {
|
||||
public:
|
||||
LiteralScope(JavaScriptScanner* self, LiteralType type)
|
||||
: scanner_(self), complete_(false) {
|
||||
if (scanner_->RecordsLiteral(type)) {
|
||||
scanner_->StartLiteral();
|
||||
}
|
||||
}
|
||||
~LiteralScope() {
|
||||
if (!complete_) scanner_->DropLiteral();
|
||||
}
|
||||
void Complete() {
|
||||
scanner_->TerminateLiteral();
|
||||
complete_ = true;
|
||||
}
|
||||
|
||||
private:
|
||||
JavaScriptScanner* scanner_;
|
||||
bool complete_;
|
||||
};
|
||||
|
||||
JavaScriptScanner();
|
||||
|
||||
// Returns the next token.
|
||||
@ -354,6 +390,11 @@ class JavaScriptScanner : public Scanner {
|
||||
// tokens, which is what it is used for.
|
||||
void SeekForward(int pos);
|
||||
|
||||
// Whether this scanner records the given literal type or not.
|
||||
bool RecordsLiteral(LiteralType type) {
|
||||
return (literal_flags_ & type) != 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool SkipWhiteSpace();
|
||||
Token::Value SkipSingleLineComment();
|
||||
@ -364,7 +405,8 @@ class JavaScriptScanner : public Scanner {
|
||||
|
||||
void ScanDecimalDigits();
|
||||
Token::Value ScanNumber(bool seen_period);
|
||||
Token::Value ScanIdentifier();
|
||||
Token::Value ScanIdentifierOrKeyword();
|
||||
Token::Value ScanIdentifierSuffix(LiteralScope* literal);
|
||||
|
||||
void ScanEscape();
|
||||
Token::Value ScanString();
|
||||
@ -376,6 +418,7 @@ class JavaScriptScanner : public Scanner {
|
||||
// If the escape sequence cannot be decoded the result is kBadChar.
|
||||
uc32 ScanIdentifierUnicodeEscape();
|
||||
|
||||
int literal_flags_;
|
||||
bool has_line_terminator_before_next_;
|
||||
};
|
||||
|
||||
@ -404,10 +447,11 @@ class KeywordMatcher {
|
||||
|
||||
Token::Value token() { return token_; }
|
||||
|
||||
inline void AddChar(unibrow::uchar input) {
|
||||
inline bool AddChar(unibrow::uchar input) {
|
||||
if (state_ != UNMATCHABLE) {
|
||||
Step(input);
|
||||
}
|
||||
return state_ != UNMATCHABLE;
|
||||
}
|
||||
|
||||
void Fail() {
|
||||
@ -458,23 +502,23 @@ class KeywordMatcher {
|
||||
const char* keyword,
|
||||
int position,
|
||||
Token::Value token_if_match) {
|
||||
if (input == static_cast<unibrow::uchar>(keyword[position])) {
|
||||
state_ = KEYWORD_PREFIX;
|
||||
this->keyword_ = keyword;
|
||||
this->counter_ = position + 1;
|
||||
this->keyword_token_ = token_if_match;
|
||||
return true;
|
||||
if (input != static_cast<unibrow::uchar>(keyword[position])) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
state_ = KEYWORD_PREFIX;
|
||||
this->keyword_ = keyword;
|
||||
this->counter_ = position + 1;
|
||||
this->keyword_token_ = token_if_match;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If input equals match character, transition to new state and return true.
|
||||
inline bool MatchState(unibrow::uchar input, char match, State new_state) {
|
||||
if (input == static_cast<unibrow::uchar>(match)) {
|
||||
state_ = new_state;
|
||||
return true;
|
||||
if (input != static_cast<unibrow::uchar>(match)) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
state_ = new_state;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool MatchKeyword(unibrow::uchar input,
|
||||
|
@ -118,8 +118,12 @@ void Scanner::LiteralScope::Complete() {
|
||||
// ----------------------------------------------------------------------------
|
||||
// V8JavaScriptScanner
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source) {
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, NULL, 0, source->length());
|
||||
// Need to capture identifiers in order to recognize "get" and "set"
|
||||
// in object literals.
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
@ -130,9 +134,11 @@ void V8JavaScriptScanner::Initialize(Handle<String> source) {
|
||||
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
unibrow::CharacterStream* stream) {
|
||||
unibrow::CharacterStream* stream,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, stream,
|
||||
0, UTF16Buffer::kNoEndPosition);
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
@ -144,9 +150,11 @@ void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
|
||||
void V8JavaScriptScanner::Initialize(Handle<String> source,
|
||||
int start_position,
|
||||
int end_position) {
|
||||
int end_position,
|
||||
int literal_flags) {
|
||||
source_ = stream_initializer_.Init(source, NULL,
|
||||
start_position, end_position);
|
||||
literal_flags_ = literal_flags | kLiteralIdentifier;
|
||||
Init();
|
||||
// Skip initial whitespace allowing HTML comment ends just like
|
||||
// after a newline and scan first token.
|
||||
|
@ -108,11 +108,13 @@ class V8JavaScriptScanner : public JavaScriptScanner {
|
||||
Token::Value NextCheckStack();
|
||||
|
||||
// Initialize the Scanner to scan source.
|
||||
void Initialize(Handle<String> source);
|
||||
void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
|
||||
void Initialize(Handle<String> source,
|
||||
unibrow::CharacterStream* stream);
|
||||
unibrow::CharacterStream* stream,
|
||||
int literal_flags = kAllLiterals);
|
||||
void Initialize(Handle<String> source,
|
||||
int start_position, int end_position);
|
||||
int start_position, int end_position,
|
||||
int literal_flags = kAllLiterals);
|
||||
|
||||
protected:
|
||||
StreamInitializer stream_initializer_;
|
||||
|
Loading…
Reference in New Issue
Block a user