Merge preparser Scanner with main JavaScript scanner.

Optimize scanning of keywords.

Review URL: http://codereview.chromium.org/5188009

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5858 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-11-19 09:02:59 +00:00
parent 5205fd4417
commit afbbf485fb
6 changed files with 124 additions and 1136 deletions

View File

@ -37,7 +37,6 @@
#include "parser.h"
#include "platform.h"
#include "preparser.h"
#include "prescanner.h"
#include "runtime.h"
#include "scopeinfo.h"
#include "string-stream.h"
@ -4637,12 +4636,15 @@ int ScriptDataImpl::ReadNumber(byte** source) {
}
static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
// Create a Scanner for the preparser to use as input, and preparse the source.
static ScriptDataImpl* DoPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
bool allow_lazy,
PartialParserRecorder* recorder) {
preparser::Scanner scanner;
scanner.Initialize(stream);
preparser::PreParser<preparser::Scanner, PartialParserRecorder> preparser;
PartialParserRecorder* recorder,
int literal_flags) {
V8JavaScriptScanner scanner;
scanner.Initialize(source, stream, literal_flags);
preparser::PreParser<JavaScriptScanner, PartialParserRecorder> preparser;
if (!preparser.PreParseProgram(&scanner, recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
@ -4655,44 +4657,11 @@ static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
}
// Create an UTF16Buffer for the preparser to use as input,
// and preparse the source.
static ScriptDataImpl* DoPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
bool allow_lazy,
PartialParserRecorder* recorder) {
if (source.is_null()) {
CharacterStreamUTF16Buffer buffer;
int length = stream->Length();
buffer.Initialize(source, stream, 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else if (source->IsExternalAsciiString()) {
ExternalStringUTF16Buffer<ExternalAsciiString, char> buffer;
int length = source->length();
buffer.Initialize(Handle<ExternalAsciiString>::cast(source), 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else if (source->IsExternalTwoByteString()) {
ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> buffer;
int length = source->length();
buffer.Initialize(Handle<ExternalTwoByteString>::cast(source), 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else {
CharacterStreamUTF16Buffer buffer;
SafeStringInputBuffer input;
input.Reset(0, source.location());
int length = source->length();
buffer.Initialize(source, &input, 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
}
}
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
bool allow_lazy = FLAG_lazy && (extension == NULL);
if (!allow_lazy) {
// Partial preparsing is only about lazily compiled functions.
@ -4701,7 +4670,8 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
}
PartialParserRecorder recorder;
return DoPreParse(source, stream, allow_lazy, &recorder);
return DoPreParse(source, stream, allow_lazy, &recorder,
JavaScriptScanner::kNoLiterals);
}
@ -4711,7 +4681,10 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
Handle<Script> no_script;
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
return DoPreParse(source, stream, allow_lazy, &recorder);
int kPreParseLiteralsFlags =
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
return DoPreParse(source, stream, allow_lazy,
&recorder, kPreParseLiteralsFlags);
}

File diff suppressed because it is too large Load Diff

View File

@ -480,7 +480,7 @@ void JavaScriptScanner::Scan() {
default:
if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
token = ScanIdentifier();
token = ScanIdentifierOrKeyword();
} else if (IsDecimalDigit(c0_)) {
token = ScanNumber(false);
} else if (SkipWhiteSpace()) {
@ -559,7 +559,7 @@ Token::Value JavaScriptScanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
LiteralScope literal(this);
LiteralScope literal(this, kLiteralString);
while (c0_ != quote && c0_ >= 0
&& !ScannerConstants::kIsLineTerminator.get(c0_)) {
uc32 c = c0_;
@ -590,7 +590,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
LiteralScope literal(this);
LiteralScope literal(this, kLiteralNumber);
if (seen_period) {
// we have already seen a decimal point of the float
AddLiteralChar('.');
@ -677,25 +677,44 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
}
Token::Value JavaScriptScanner::ScanIdentifier() {
Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
LiteralScope literal(this);
LiteralScope literal(this, kLiteralIdentifier);
KeywordMatcher keyword_match;
// Scan identifier start character.
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
AddLiteralChar(c);
keyword_match.Fail();
} else {
AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
return ScanIdentifierSuffix(&literal);
}
uc32 first_char = c0_;
Advance();
AddLiteralChar(first_char);
if (!keyword_match.AddChar(first_char)) {
return ScanIdentifierSuffix(&literal);
}
// Scan the rest of the identifier characters.
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ != '\\') {
uc32 next_char = c0_;
Advance();
AddLiteralChar(next_char);
if (keyword_match.AddChar(next_char)) continue;
}
// Fallthrough if no loner able to complete keyword.
return ScanIdentifierSuffix(&literal);
}
literal.Complete();
return keyword_match.token();
}
Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
// Scan the rest of the identifier characters.
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
@ -703,20 +722,17 @@ Token::Value JavaScriptScanner::ScanIdentifier() {
// Only allow legal identifier part characters.
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
AddLiteralChar(c);
keyword_match.Fail();
} else {
AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
}
}
literal.Complete();
literal->Complete();
return keyword_match.token();
return Token::IDENTIFIER;
}
bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
@ -729,7 +745,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
LiteralScope literal(this);
LiteralScope literal(this, kLiteralRegExp);
if (seen_equal)
AddLiteralChar('=');
@ -752,9 +768,10 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
return true;
}
bool JavaScriptScanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this);
LiteralScope literal(this, kLiteralRegExpFlags);
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
@ -868,9 +885,7 @@ void KeywordMatcher::Step(unibrow::uchar input) {
break;
case IN:
token_ = Token::IDENTIFIER;
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
return;
}
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
break;
case N:
if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;

View File

@ -327,6 +327,42 @@ class Scanner {
class JavaScriptScanner : public Scanner {
public:
// Bit vector representing set of types of literals.
enum LiteralType {
kNoLiterals = 0,
kLiteralNumber = 1,
kLiteralIdentifier = 2,
kLiteralString = 4,
kLiteralRegExp = 8,
kLiteralRegExpFlags = 16,
kAllLiterals = 31
};
// A LiteralScope that disables recording of some types of JavaScript
// literals. If the scanner is configured to not record the specific
// type of literal, the scope will not call StartLiteral.
class LiteralScope {
public:
LiteralScope(JavaScriptScanner* self, LiteralType type)
: scanner_(self), complete_(false) {
if (scanner_->RecordsLiteral(type)) {
scanner_->StartLiteral();
}
}
~LiteralScope() {
if (!complete_) scanner_->DropLiteral();
}
void Complete() {
scanner_->TerminateLiteral();
complete_ = true;
}
private:
JavaScriptScanner* scanner_;
bool complete_;
};
JavaScriptScanner();
// Returns the next token.
@ -354,6 +390,11 @@ class JavaScriptScanner : public Scanner {
// tokens, which is what it is used for.
void SeekForward(int pos);
// Whether this scanner records the given literal type or not.
bool RecordsLiteral(LiteralType type) {
return (literal_flags_ & type) != 0;
}
protected:
bool SkipWhiteSpace();
Token::Value SkipSingleLineComment();
@ -364,7 +405,8 @@ class JavaScriptScanner : public Scanner {
void ScanDecimalDigits();
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifier();
Token::Value ScanIdentifierOrKeyword();
Token::Value ScanIdentifierSuffix(LiteralScope* literal);
void ScanEscape();
Token::Value ScanString();
@ -376,6 +418,7 @@ class JavaScriptScanner : public Scanner {
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
int literal_flags_;
bool has_line_terminator_before_next_;
};
@ -404,10 +447,11 @@ class KeywordMatcher {
Token::Value token() { return token_; }
inline void AddChar(unibrow::uchar input) {
inline bool AddChar(unibrow::uchar input) {
if (state_ != UNMATCHABLE) {
Step(input);
}
return state_ != UNMATCHABLE;
}
void Fail() {
@ -458,24 +502,24 @@ class KeywordMatcher {
const char* keyword,
int position,
Token::Value token_if_match) {
if (input == static_cast<unibrow::uchar>(keyword[position])) {
if (input != static_cast<unibrow::uchar>(keyword[position])) {
return false;
}
state_ = KEYWORD_PREFIX;
this->keyword_ = keyword;
this->counter_ = position + 1;
this->keyword_token_ = token_if_match;
return true;
}
return false;
}
// If input equals match character, transition to new state and return true.
inline bool MatchState(unibrow::uchar input, char match, State new_state) {
if (input == static_cast<unibrow::uchar>(match)) {
if (input != static_cast<unibrow::uchar>(match)) {
return false;
}
state_ = new_state;
return true;
}
return false;
}
inline bool MatchKeyword(unibrow::uchar input,
char match,

View File

@ -118,8 +118,12 @@ void Scanner::LiteralScope::Complete() {
// ----------------------------------------------------------------------------
// V8JavaScriptScanner
void V8JavaScriptScanner::Initialize(Handle<String> source) {
void V8JavaScriptScanner::Initialize(Handle<String> source,
int literal_flags) {
source_ = stream_initializer_.Init(source, NULL, 0, source->length());
// Need to capture identifiers in order to recognize "get" and "set"
// in object literals.
literal_flags_ = literal_flags | kLiteralIdentifier;
Init();
// Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token.
@ -130,9 +134,11 @@ void V8JavaScriptScanner::Initialize(Handle<String> source) {
void V8JavaScriptScanner::Initialize(Handle<String> source,
unibrow::CharacterStream* stream) {
unibrow::CharacterStream* stream,
int literal_flags) {
source_ = stream_initializer_.Init(source, stream,
0, UTF16Buffer::kNoEndPosition);
literal_flags_ = literal_flags | kLiteralIdentifier;
Init();
// Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token.
@ -144,9 +150,11 @@ void V8JavaScriptScanner::Initialize(Handle<String> source,
void V8JavaScriptScanner::Initialize(Handle<String> source,
int start_position,
int end_position) {
int end_position,
int literal_flags) {
source_ = stream_initializer_.Init(source, NULL,
start_position, end_position);
literal_flags_ = literal_flags | kLiteralIdentifier;
Init();
// Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token.

View File

@ -108,11 +108,13 @@ class V8JavaScriptScanner : public JavaScriptScanner {
Token::Value NextCheckStack();
// Initialize the Scanner to scan source.
void Initialize(Handle<String> source);
void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
void Initialize(Handle<String> source,
unibrow::CharacterStream* stream);
unibrow::CharacterStream* stream,
int literal_flags = kAllLiterals);
void Initialize(Handle<String> source,
int start_position, int end_position);
int start_position, int end_position,
int literal_flags = kAllLiterals);
protected:
StreamInitializer stream_initializer_;