[scanner] Revert to pointer tracking token storage rather than range
Using a circular buffer regresses performance unfortunately. Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6 Reviewed-on: https://chromium-review.googlesource.com/1194064 Reviewed-by: Igor Sheludko <ishell@chromium.org> Commit-Queue: Toon Verwaest <verwaest@chromium.org> Cr-Commit-Position: refs/heads/master@{#55478}
This commit is contained in:
parent
0bceaf1e7c
commit
103d886a7e
@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
|
||||
// Advance as long as character is a WhiteSpace or LineTerminator.
|
||||
// Remember if the latter is the case.
|
||||
if (unibrow::IsLineTerminator(c0_)) {
|
||||
scan_target().after_line_terminator = true;
|
||||
next().after_line_terminator = true;
|
||||
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
|
||||
break;
|
||||
}
|
||||
@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
|
||||
V8_INLINE Token::Value Scanner::ScanSingleToken() {
|
||||
Token::Value token;
|
||||
do {
|
||||
scan_target().location.beg_pos = source_pos();
|
||||
next().location.beg_pos = source_pos();
|
||||
|
||||
if (static_cast<unsigned>(c0_) <= 0x7F) {
|
||||
Token::Value token = one_char_tokens[c0_];
|
||||
@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
|
||||
Advance();
|
||||
if (c0_ == '-') {
|
||||
Advance();
|
||||
if (c0_ == '>' && scan_target().after_line_terminator) {
|
||||
if (c0_ == '>' && next().after_line_terminator) {
|
||||
// For compatibility with SpiderMonkey, we skip lines that
|
||||
// start with an HTML comment end '-->'.
|
||||
token = SkipSingleHTMLComment();
|
||||
@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
|
||||
Token::Value token = ScanIdentifierOrKeyword();
|
||||
if (!Token::IsContextualKeyword(token)) return token;
|
||||
|
||||
scan_target().contextual_token = token;
|
||||
next().contextual_token = token;
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
if (IsDecimalDigit(c0_)) return ScanNumber(false);
|
||||
@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
|
||||
return token;
|
||||
}
|
||||
|
||||
void Scanner::Scan() {
|
||||
next().literal_chars.Drop();
|
||||
next().raw_literal_chars.Drop();
|
||||
next().contextual_token = Token::UNINITIALIZED;
|
||||
next().invalid_template_escape_message = MessageTemplate::kNone;
|
||||
|
||||
next().token = ScanSingleToken();
|
||||
next().location.end_pos = source_pos();
|
||||
|
||||
#ifdef DEBUG
|
||||
SanityCheckTokenDesc(current());
|
||||
SanityCheckTokenDesc(next());
|
||||
SanityCheckTokenDesc(next_next());
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
@ -186,6 +186,7 @@ void Scanner::Initialize() {
|
||||
// Need to capture identifiers in order to recognize "get" and "set"
|
||||
// in object literals.
|
||||
Init();
|
||||
next().after_line_terminator = true;
|
||||
Scan();
|
||||
}
|
||||
|
||||
@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
|
||||
return x;
|
||||
}
|
||||
|
||||
Token::Value Scanner::Next() {
|
||||
if (next().token == Token::EOS) next().location = current().location;
|
||||
// Rotate through tokens.
|
||||
TokenDesc* previous = current_;
|
||||
current_ = next_;
|
||||
// Either we already have the next token lined up, in which case next_next_
|
||||
// simply becomes next_. In that case we use current_ as new next_next_ and
|
||||
// clear its token to indicate that it wasn't scanned yet. Otherwise we use
|
||||
// current_ as next_ and scan into it, leaving next_next_ uninitialized.
|
||||
if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
|
||||
next_ = previous;
|
||||
next().after_line_terminator = false;
|
||||
Scan();
|
||||
} else {
|
||||
next_ = next_next_;
|
||||
next_next_ = previous;
|
||||
previous->token = Token::UNINITIALIZED;
|
||||
previous->contextual_token = Token::UNINITIALIZED;
|
||||
DCHECK_NE(Token::UNINITIALIZED, current().token);
|
||||
}
|
||||
return current().token;
|
||||
}
|
||||
|
||||
Token::Value Scanner::PeekAhead() {
|
||||
DCHECK(next().token != Token::DIV);
|
||||
DCHECK(next().token != Token::ASSIGN_DIV);
|
||||
|
||||
if (next_next().token != Token::UNINITIALIZED) {
|
||||
return next_next().token;
|
||||
}
|
||||
TokenDesc* temp = next_;
|
||||
next_ = next_next_;
|
||||
next().after_line_terminator = false;
|
||||
Scan();
|
||||
next_next_ = next_;
|
||||
next_ = temp;
|
||||
return next_next().token;
|
||||
}
|
||||
|
||||
Token::Value Scanner::SkipSingleHTMLComment() {
|
||||
if (is_module_) {
|
||||
ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
|
||||
@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() {
|
||||
|
||||
while (c0_ != kEndOfInput) {
|
||||
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
|
||||
if (!scan_target().after_line_terminator &&
|
||||
unibrow::IsLineTerminator(c0_)) {
|
||||
if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
|
||||
// Following ECMA-262, section 7.4, a comment containing
|
||||
// a newline will make the comment count as a line-terminator.
|
||||
scan_target().after_line_terminator = true;
|
||||
next().after_line_terminator = true;
|
||||
}
|
||||
|
||||
while (V8_UNLIKELY(c0_ == '*')) {
|
||||
@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() {
|
||||
return SkipSingleHTMLComment();
|
||||
}
|
||||
|
||||
void Scanner::Scan() {
|
||||
token_end_ = (token_end_ + 1) & kTokenStorageMask;
|
||||
|
||||
scan_target().after_line_terminator = (source_pos() == 0);
|
||||
scan_target().literal_chars.Drop();
|
||||
scan_target().raw_literal_chars.Drop();
|
||||
scan_target().contextual_token = Token::UNINITIALIZED;
|
||||
scan_target().invalid_template_escape_message = MessageTemplate::kNone;
|
||||
|
||||
scan_target().token = ScanSingleToken();
|
||||
scan_target().location.end_pos = source_pos();
|
||||
|
||||
#ifdef DEBUG
|
||||
for (TokenDesc& token : token_storage_) {
|
||||
SanityCheckTokenDesc(token);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
|
||||
// Most tokens should not have literal_chars or even raw_literal chars.
|
||||
@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) {
|
||||
if (pos != current_pos) {
|
||||
source_->Seek(pos);
|
||||
Advance();
|
||||
// This function is only called to seek to the location
|
||||
// of the end of a function (at the "}" token). It doesn't matter
|
||||
// whether there was a line terminator in the part we skip.
|
||||
next().after_line_terminator = false;
|
||||
}
|
||||
|
||||
ResetTokenStorage();
|
||||
Scan();
|
||||
}
|
||||
|
||||
@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() {
|
||||
|
||||
Token::Value result = Token::TEMPLATE_SPAN;
|
||||
LiteralScope literal(this);
|
||||
scan_target().raw_literal_chars.Start();
|
||||
next().raw_literal_chars.Start();
|
||||
const bool capture_raw = true;
|
||||
while (true) {
|
||||
uc32 c = c0_;
|
||||
@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() {
|
||||
DCHECK_EQ(!success, has_error());
|
||||
// For templates, invalid escape sequence checking is handled in the
|
||||
// parser.
|
||||
scanner_error_state.MoveErrorTo(&scan_target());
|
||||
octal_error_state.MoveErrorTo(&scan_target());
|
||||
scanner_error_state.MoveErrorTo(next_);
|
||||
octal_error_state.MoveErrorTo(next_);
|
||||
}
|
||||
} else if (c < 0) {
|
||||
// Unterminated template literal
|
||||
@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() {
|
||||
AddLiteralChar(c);
|
||||
}
|
||||
}
|
||||
|
||||
literal.Complete();
|
||||
scan_target().location.end_pos = source_pos();
|
||||
scan_target().token = result;
|
||||
scan_target().contextual_token = Token::UNINITIALIZED;
|
||||
next().location.end_pos = source_pos();
|
||||
next().token = result;
|
||||
next().contextual_token = Token::UNINITIALIZED;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
|
||||
Handle<String> tmp;
|
||||
if (source_url_.length() > 0) {
|
||||
@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
|
||||
return Token::ILLEGAL;
|
||||
}
|
||||
|
||||
if (scan_target().literal_chars.one_byte_literal().length() <= 10 &&
|
||||
if (next().literal_chars.one_byte_literal().length() <= 10 &&
|
||||
value <= Smi::kMaxValue && c0_ != '.' &&
|
||||
!unicode_cache_->IsIdentifierStart(c0_)) {
|
||||
scan_target().smi_value_ = static_cast<uint32_t>(value);
|
||||
next().smi_value_ = static_cast<uint32_t>(value);
|
||||
literal.Complete();
|
||||
|
||||
if (kind == DECIMAL_WITH_LEADING_ZERO) {
|
||||
@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
|
||||
}
|
||||
} else if (c0_ <= kMaxAscii && c0_ != '\\') {
|
||||
// Only a-z+ or _: could be a keyword or identifier.
|
||||
Vector<const uint8_t> chars =
|
||||
scan_target().literal_chars.one_byte_literal();
|
||||
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
|
||||
Token::Value token =
|
||||
KeywordOrIdentifierToken(chars.start(), chars.length());
|
||||
if (token == Token::IDENTIFIER ||
|
||||
@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
|
||||
}
|
||||
}
|
||||
|
||||
if (scan_target().literal_chars.is_one_byte()) {
|
||||
Vector<const uint8_t> chars =
|
||||
scan_target().literal_chars.one_byte_literal();
|
||||
if (next().literal_chars.is_one_byte()) {
|
||||
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
|
||||
Token::Value token =
|
||||
KeywordOrIdentifierToken(chars.start(), chars.length());
|
||||
/* TODO(adamk): YIELD should be handled specially. */
|
||||
@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
|
||||
}
|
||||
|
||||
bool Scanner::ScanRegExpPattern() {
|
||||
DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
|
||||
DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
|
||||
// Make sure the scanner didn't scan beyond the regexp start.
|
||||
DCHECK(!HasToken(2));
|
||||
|
||||
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
|
||||
bool in_character_class = false;
|
||||
@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() {
|
||||
// the scanner should pass uninterpreted bodies to the RegExp
|
||||
// constructor.
|
||||
LiteralScope literal(this);
|
||||
if (next().token == Token::ASSIGN_DIV) AddLiteralChar('=');
|
||||
if (next().token == Token::ASSIGN_DIV) {
|
||||
AddLiteralChar('=');
|
||||
}
|
||||
|
||||
while (c0_ != '/' || in_character_class) {
|
||||
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
|
||||
@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() {
|
||||
Advance(); // consume '/'
|
||||
|
||||
literal.Complete();
|
||||
scan_target().token = Token::REGEXP_LITERAL;
|
||||
scan_target().contextual_token = Token::UNINITIALIZED;
|
||||
next().token = Token::REGEXP_LITERAL;
|
||||
next().contextual_token = Token::UNINITIALIZED;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
|
||||
DCHECK(!HasToken(2));
|
||||
DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
|
||||
|
||||
// Scan regular expression flags.
|
||||
@ -1314,7 +1332,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
|
||||
flags |= flag;
|
||||
}
|
||||
|
||||
scan_target().location.end_pos = source_pos();
|
||||
next().location.end_pos = source_pos();
|
||||
return Just(RegExp::Flags(flags));
|
||||
}
|
||||
|
||||
@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) {
|
||||
// TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
|
||||
|
||||
// To re-scan from a given character position, we need to:
|
||||
// 1. Move the stream to the right position,
|
||||
// 1, Reset the current_, next_ and next_next_ tokens
|
||||
// (next_ + next_next_ will be overwrittem by Next(),
|
||||
// current_ will remain unchanged, so overwrite it fully.)
|
||||
for (TokenDesc& token : token_storage_) {
|
||||
token.token = Token::UNINITIALIZED;
|
||||
token.contextual_token = Token::UNINITIALIZED;
|
||||
}
|
||||
// 2, reset the source to the desired position,
|
||||
source_->Seek(position);
|
||||
// 2. refill the one-character buffer with the first character from the
|
||||
// stream,
|
||||
// 3, re-scan, by scanning the look-ahead char + 1 token (next_).
|
||||
c0_ = source_->Advance();
|
||||
// 3. Reset the token storage, and
|
||||
ResetTokenStorage();
|
||||
// 4. scan the first token.
|
||||
next().after_line_terminator = false;
|
||||
Scan();
|
||||
DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
|
||||
}
|
||||
|
@ -237,31 +237,9 @@ class Scanner {
|
||||
void Initialize();
|
||||
|
||||
// Returns the next token and advances input.
|
||||
V8_INLINE Token::Value Next() {
|
||||
// TODO(verwaest): Remove.
|
||||
if (next().token == Token::EOS) {
|
||||
next_target().location = current().location;
|
||||
}
|
||||
// Advance current token.
|
||||
token_start_ = TokenIndex(1);
|
||||
// Scan the next token if it's not yet ready.
|
||||
if (V8_LIKELY(!HasToken(1))) Scan();
|
||||
// Return current token.
|
||||
DCHECK(HasToken(1));
|
||||
return current().token;
|
||||
}
|
||||
|
||||
Token::Value Next();
|
||||
// Returns the token following peek()
|
||||
V8_INLINE Token::Value PeekAhead() {
|
||||
DCHECK_NE(Token::DIV, next().token);
|
||||
DCHECK_NE(Token::ASSIGN_DIV, next().token);
|
||||
DCHECK(HasToken(1));
|
||||
|
||||
if (V8_LIKELY(!HasToken(2))) Scan();
|
||||
|
||||
return next_next().token;
|
||||
}
|
||||
|
||||
Token::Value PeekAhead();
|
||||
// Returns the current token again.
|
||||
Token::Value current_token() { return current().token; }
|
||||
|
||||
@ -395,8 +373,7 @@ class Scanner {
|
||||
// Scans the input as a template literal
|
||||
Token::Value ScanTemplateContinuation() {
|
||||
DCHECK_EQ(next().token, Token::RBRACE);
|
||||
DCHECK(!HasToken(2));
|
||||
DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos);
|
||||
DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
|
||||
return ScanTemplateSpan();
|
||||
}
|
||||
|
||||
@ -535,7 +512,7 @@ class Scanner {
|
||||
class LiteralScope {
|
||||
public:
|
||||
explicit LiteralScope(Scanner* scanner)
|
||||
: buffer_(&scanner->scan_target().literal_chars), complete_(false) {
|
||||
: buffer_(&scanner->next().literal_chars), complete_(false) {
|
||||
buffer_->Start();
|
||||
}
|
||||
~LiteralScope() {
|
||||
@ -584,6 +561,10 @@ class Scanner {
|
||||
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
|
||||
Advance();
|
||||
|
||||
current_ = &token_storage_[0];
|
||||
next_ = &token_storage_[1];
|
||||
next_next_ = &token_storage_[2];
|
||||
|
||||
found_html_comment_ = false;
|
||||
scanner_error_ = MessageTemplate::kNone;
|
||||
}
|
||||
@ -604,16 +585,12 @@ class Scanner {
|
||||
// Seek to the next_ token at the given position.
|
||||
void SeekNext(size_t position);
|
||||
|
||||
// Literal buffer support
|
||||
V8_INLINE void AddLiteralChar(uc32 c) {
|
||||
scan_target().literal_chars.AddChar(c);
|
||||
}
|
||||
V8_INLINE void AddLiteralChar(char c) {
|
||||
scan_target().literal_chars.AddChar(c);
|
||||
}
|
||||
V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
|
||||
|
||||
V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
|
||||
|
||||
V8_INLINE void AddRawLiteralChar(uc32 c) {
|
||||
scan_target().raw_literal_chars.AddChar(c);
|
||||
next().raw_literal_chars.AddChar(c);
|
||||
}
|
||||
|
||||
V8_INLINE void AddLiteralCharAdvance() {
|
||||
@ -737,7 +714,7 @@ class Scanner {
|
||||
|
||||
// Scans a single JavaScript token.
|
||||
V8_INLINE Token::Value ScanSingleToken();
|
||||
void Scan();
|
||||
V8_INLINE void Scan();
|
||||
|
||||
V8_INLINE Token::Value SkipWhiteSpace();
|
||||
Token::Value SkipSingleHTMLComment();
|
||||
@ -808,36 +785,17 @@ class Scanner {
|
||||
LiteralBuffer source_url_;
|
||||
LiteralBuffer source_mapping_url_;
|
||||
|
||||
static const int kNumberOfTokens = 1 << 2;
|
||||
static const int kTokenStorageMask = kNumberOfTokens - 1;
|
||||
TokenDesc token_storage_[3];
|
||||
|
||||
TokenDesc token_storage_[kNumberOfTokens];
|
||||
// Index of current token in token_storage_.
|
||||
int token_start_ = 0;
|
||||
// Index of last scanned token in token_storage. We typically scan the next
|
||||
// token aftewards. Initially this points to the initial current token since
|
||||
// we always scan the next token and move the previous next to current.
|
||||
int token_end_ = 0;
|
||||
TokenDesc& next() { return *next_; }
|
||||
|
||||
void ResetTokenStorage() { token_start_ = token_end_ = 0; }
|
||||
const TokenDesc& current() const { return *current_; }
|
||||
const TokenDesc& next() const { return *next_; }
|
||||
const TokenDesc& next_next() const { return *next_next_; }
|
||||
|
||||
int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; }
|
||||
|
||||
bool HasToken(int i) const {
|
||||
return i <= ((token_end_ - token_start_) & kTokenStorageMask);
|
||||
}
|
||||
|
||||
const TokenDesc& GetToken(int i) const {
|
||||
DCHECK(HasToken(i));
|
||||
return token_storage_[TokenIndex(i)];
|
||||
}
|
||||
|
||||
const TokenDesc& current() const { return GetToken(0); }
|
||||
const TokenDesc& next() const { return GetToken(1); }
|
||||
const TokenDesc& next_next() const { return GetToken(2); }
|
||||
|
||||
TokenDesc& scan_target() { return token_storage_[token_end_]; }
|
||||
TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; }
|
||||
TokenDesc* current_; // desc for current token (as returned by Next())
|
||||
TokenDesc* next_; // desc for next token (one token look-ahead)
|
||||
TokenDesc* next_next_; // desc for the token after next (after PeakAhead())
|
||||
|
||||
// Input stream. Must be initialized to an Utf16CharacterStream.
|
||||
Utf16CharacterStream* const source_;
|
||||
|
Loading…
Reference in New Issue
Block a user