[scanner] Revert to pointer tracking token storage rather than range

Using a circular buffer regresses performance unfortunately.

Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6
Reviewed-on: https://chromium-review.googlesource.com/1194064
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55478}
This commit is contained in:
Toon Verwaest 2018-08-28 17:48:17 +02:00 committed by Commit Bot
parent 0bceaf1e7c
commit 103d886a7e
3 changed files with 115 additions and 119 deletions

View File

@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
// Advance as long as character is a WhiteSpace or LineTerminator. // Advance as long as character is a WhiteSpace or LineTerminator.
// Remember if the latter is the case. // Remember if the latter is the case.
if (unibrow::IsLineTerminator(c0_)) { if (unibrow::IsLineTerminator(c0_)) {
scan_target().after_line_terminator = true; next().after_line_terminator = true;
} else if (!unicode_cache_->IsWhiteSpace(c0_)) { } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break; break;
} }
@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
V8_INLINE Token::Value Scanner::ScanSingleToken() { V8_INLINE Token::Value Scanner::ScanSingleToken() {
Token::Value token; Token::Value token;
do { do {
scan_target().location.beg_pos = source_pos(); next().location.beg_pos = source_pos();
if (static_cast<unsigned>(c0_) <= 0x7F) { if (static_cast<unsigned>(c0_) <= 0x7F) {
Token::Value token = one_char_tokens[c0_]; Token::Value token = one_char_tokens[c0_];
@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
Advance(); Advance();
if (c0_ == '-') { if (c0_ == '-') {
Advance(); Advance();
if (c0_ == '>' && scan_target().after_line_terminator) { if (c0_ == '>' && next().after_line_terminator) {
// For compatibility with SpiderMonkey, we skip lines that // For compatibility with SpiderMonkey, we skip lines that
// start with an HTML comment end '-->'. // start with an HTML comment end '-->'.
token = SkipSingleHTMLComment(); token = SkipSingleHTMLComment();
@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
Token::Value token = ScanIdentifierOrKeyword(); Token::Value token = ScanIdentifierOrKeyword();
if (!Token::IsContextualKeyword(token)) return token; if (!Token::IsContextualKeyword(token)) return token;
scan_target().contextual_token = token; next().contextual_token = token;
return Token::IDENTIFIER; return Token::IDENTIFIER;
} }
if (IsDecimalDigit(c0_)) return ScanNumber(false); if (IsDecimalDigit(c0_)) return ScanNumber(false);
@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
return token; return token;
} }
void Scanner::Scan() {
next().literal_chars.Drop();
next().raw_literal_chars.Drop();
next().contextual_token = Token::UNINITIALIZED;
next().invalid_template_escape_message = MessageTemplate::kNone;
next().token = ScanSingleToken();
next().location.end_pos = source_pos();
#ifdef DEBUG
SanityCheckTokenDesc(current());
SanityCheckTokenDesc(next());
SanityCheckTokenDesc(next_next());
#endif
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8

View File

@ -186,6 +186,7 @@ void Scanner::Initialize() {
// Need to capture identifiers in order to recognize "get" and "set" // Need to capture identifiers in order to recognize "get" and "set"
// in object literals. // in object literals.
Init(); Init();
next().after_line_terminator = true;
Scan(); Scan();
} }
@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
return x; return x;
} }
Token::Value Scanner::Next() {
if (next().token == Token::EOS) next().location = current().location;
// Rotate through tokens.
TokenDesc* previous = current_;
current_ = next_;
// Either we already have the next token lined up, in which case next_next_
// simply becomes next_. In that case we use current_ as new next_next_ and
// clear its token to indicate that it wasn't scanned yet. Otherwise we use
// current_ as next_ and scan into it, leaving next_next_ uninitialized.
if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
next_ = previous;
next().after_line_terminator = false;
Scan();
} else {
next_ = next_next_;
next_next_ = previous;
previous->token = Token::UNINITIALIZED;
previous->contextual_token = Token::UNINITIALIZED;
DCHECK_NE(Token::UNINITIALIZED, current().token);
}
return current().token;
}
Token::Value Scanner::PeekAhead() {
DCHECK(next().token != Token::DIV);
DCHECK(next().token != Token::ASSIGN_DIV);
if (next_next().token != Token::UNINITIALIZED) {
return next_next().token;
}
TokenDesc* temp = next_;
next_ = next_next_;
next().after_line_terminator = false;
Scan();
next_next_ = next_;
next_ = temp;
return next_next().token;
}
Token::Value Scanner::SkipSingleHTMLComment() { Token::Value Scanner::SkipSingleHTMLComment() {
if (is_module_) { if (is_module_) {
ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule); ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() {
while (c0_ != kEndOfInput) { while (c0_ != kEndOfInput) {
DCHECK(!unibrow::IsLineTerminator(kEndOfInput)); DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
if (!scan_target().after_line_terminator && if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
unibrow::IsLineTerminator(c0_)) {
// Following ECMA-262, section 7.4, a comment containing // Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator. // a newline will make the comment count as a line-terminator.
scan_target().after_line_terminator = true; next().after_line_terminator = true;
} }
while (V8_UNLIKELY(c0_ == '*')) { while (V8_UNLIKELY(c0_ == '*')) {
@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() {
return SkipSingleHTMLComment(); return SkipSingleHTMLComment();
} }
void Scanner::Scan() {
token_end_ = (token_end_ + 1) & kTokenStorageMask;
scan_target().after_line_terminator = (source_pos() == 0);
scan_target().literal_chars.Drop();
scan_target().raw_literal_chars.Drop();
scan_target().contextual_token = Token::UNINITIALIZED;
scan_target().invalid_template_escape_message = MessageTemplate::kNone;
scan_target().token = ScanSingleToken();
scan_target().location.end_pos = source_pos();
#ifdef DEBUG
for (TokenDesc& token : token_storage_) {
SanityCheckTokenDesc(token);
}
#endif
}
#ifdef DEBUG #ifdef DEBUG
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
// Most tokens should not have literal_chars or even raw_literal chars. // Most tokens should not have literal_chars or even raw_literal chars.
@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) {
if (pos != current_pos) { if (pos != current_pos) {
source_->Seek(pos); source_->Seek(pos);
Advance(); Advance();
// This function is only called to seek to the location
// of the end of a function (at the "}" token). It doesn't matter
// whether there was a line terminator in the part we skip.
next().after_line_terminator = false;
} }
ResetTokenStorage();
Scan(); Scan();
} }
@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() {
Token::Value result = Token::TEMPLATE_SPAN; Token::Value result = Token::TEMPLATE_SPAN;
LiteralScope literal(this); LiteralScope literal(this);
scan_target().raw_literal_chars.Start(); next().raw_literal_chars.Start();
const bool capture_raw = true; const bool capture_raw = true;
while (true) { while (true) {
uc32 c = c0_; uc32 c = c0_;
@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() {
DCHECK_EQ(!success, has_error()); DCHECK_EQ(!success, has_error());
// For templates, invalid escape sequence checking is handled in the // For templates, invalid escape sequence checking is handled in the
// parser. // parser.
scanner_error_state.MoveErrorTo(&scan_target()); scanner_error_state.MoveErrorTo(next_);
octal_error_state.MoveErrorTo(&scan_target()); octal_error_state.MoveErrorTo(next_);
} }
} else if (c < 0) { } else if (c < 0) {
// Unterminated template literal // Unterminated template literal
@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() {
AddLiteralChar(c); AddLiteralChar(c);
} }
} }
literal.Complete(); literal.Complete();
scan_target().location.end_pos = source_pos(); next().location.end_pos = source_pos();
scan_target().token = result; next().token = result;
scan_target().contextual_token = Token::UNINITIALIZED; next().contextual_token = Token::UNINITIALIZED;
return result; return result;
} }
Handle<String> Scanner::SourceUrl(Isolate* isolate) const { Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> tmp; Handle<String> tmp;
if (source_url_.length() > 0) { if (source_url_.length() > 0) {
@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::ILLEGAL; return Token::ILLEGAL;
} }
if (scan_target().literal_chars.one_byte_literal().length() <= 10 && if (next().literal_chars.one_byte_literal().length() <= 10 &&
value <= Smi::kMaxValue && c0_ != '.' && value <= Smi::kMaxValue && c0_ != '.' &&
!unicode_cache_->IsIdentifierStart(c0_)) { !unicode_cache_->IsIdentifierStart(c0_)) {
scan_target().smi_value_ = static_cast<uint32_t>(value); next().smi_value_ = static_cast<uint32_t>(value);
literal.Complete(); literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) { if (kind == DECIMAL_WITH_LEADING_ZERO) {
@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
} }
} else if (c0_ <= kMaxAscii && c0_ != '\\') { } else if (c0_ <= kMaxAscii && c0_ != '\\') {
// Only a-z+ or _: could be a keyword or identifier. // Only a-z+ or _: could be a keyword or identifier.
Vector<const uint8_t> chars = Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
scan_target().literal_chars.one_byte_literal();
Token::Value token = Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length()); KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER || if (token == Token::IDENTIFIER ||
@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
} }
} }
if (scan_target().literal_chars.is_one_byte()) { if (next().literal_chars.is_one_byte()) {
Vector<const uint8_t> chars = Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
scan_target().literal_chars.one_byte_literal();
Token::Value token = Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length()); KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */ /* TODO(adamk): YIELD should be handled specially. */
@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
} }
bool Scanner::ScanRegExpPattern() { bool Scanner::ScanRegExpPattern() {
DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV); DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
// Make sure the scanner didn't scan beyond the regexp start.
DCHECK(!HasToken(2));
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false; bool in_character_class = false;
@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() {
// the scanner should pass uninterpreted bodies to the RegExp // the scanner should pass uninterpreted bodies to the RegExp
// constructor. // constructor.
LiteralScope literal(this); LiteralScope literal(this);
if (next().token == Token::ASSIGN_DIV) AddLiteralChar('='); if (next().token == Token::ASSIGN_DIV) {
AddLiteralChar('=');
}
while (c0_ != '/' || in_character_class) { while (c0_ != '/' || in_character_class) {
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) { if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() {
Advance(); // consume '/' Advance(); // consume '/'
literal.Complete(); literal.Complete();
scan_target().token = Token::REGEXP_LITERAL; next().token = Token::REGEXP_LITERAL;
scan_target().contextual_token = Token::UNINITIALIZED; next().contextual_token = Token::UNINITIALIZED;
return true; return true;
} }
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
DCHECK(!HasToken(2));
DCHECK_EQ(Token::REGEXP_LITERAL, next().token); DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
// Scan regular expression flags. // Scan regular expression flags.
@ -1314,7 +1332,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
flags |= flag; flags |= flag;
} }
scan_target().location.end_pos = source_pos(); next().location.end_pos = source_pos();
return Just(RegExp::Flags(flags)); return Just(RegExp::Flags(flags));
} }
@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) {
// TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions. // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
// To re-scan from a given character position, we need to: // To re-scan from a given character position, we need to:
// 1. Move the stream to the right position, // 1, Reset the current_, next_ and next_next_ tokens
// (next_ + next_next_ will be overwrittem by Next(),
// current_ will remain unchanged, so overwrite it fully.)
for (TokenDesc& token : token_storage_) {
token.token = Token::UNINITIALIZED;
token.contextual_token = Token::UNINITIALIZED;
}
// 2, reset the source to the desired position,
source_->Seek(position); source_->Seek(position);
// 2. refill the one-character buffer with the first character from the // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
// stream,
c0_ = source_->Advance(); c0_ = source_->Advance();
// 3. Reset the token storage, and next().after_line_terminator = false;
ResetTokenStorage();
// 4. scan the first token.
Scan(); Scan();
DCHECK_EQ(next().location.beg_pos, static_cast<int>(position)); DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
} }

View File

@ -237,31 +237,9 @@ class Scanner {
void Initialize(); void Initialize();
// Returns the next token and advances input. // Returns the next token and advances input.
V8_INLINE Token::Value Next() { Token::Value Next();
// TODO(verwaest): Remove.
if (next().token == Token::EOS) {
next_target().location = current().location;
}
// Advance current token.
token_start_ = TokenIndex(1);
// Scan the next token if it's not yet ready.
if (V8_LIKELY(!HasToken(1))) Scan();
// Return current token.
DCHECK(HasToken(1));
return current().token;
}
// Returns the token following peek() // Returns the token following peek()
V8_INLINE Token::Value PeekAhead() { Token::Value PeekAhead();
DCHECK_NE(Token::DIV, next().token);
DCHECK_NE(Token::ASSIGN_DIV, next().token);
DCHECK(HasToken(1));
if (V8_LIKELY(!HasToken(2))) Scan();
return next_next().token;
}
// Returns the current token again. // Returns the current token again.
Token::Value current_token() { return current().token; } Token::Value current_token() { return current().token; }
@ -395,8 +373,7 @@ class Scanner {
// Scans the input as a template literal // Scans the input as a template literal
Token::Value ScanTemplateContinuation() { Token::Value ScanTemplateContinuation() {
DCHECK_EQ(next().token, Token::RBRACE); DCHECK_EQ(next().token, Token::RBRACE);
DCHECK(!HasToken(2)); DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos);
return ScanTemplateSpan(); return ScanTemplateSpan();
} }
@ -535,7 +512,7 @@ class Scanner {
class LiteralScope { class LiteralScope {
public: public:
explicit LiteralScope(Scanner* scanner) explicit LiteralScope(Scanner* scanner)
: buffer_(&scanner->scan_target().literal_chars), complete_(false) { : buffer_(&scanner->next().literal_chars), complete_(false) {
buffer_->Start(); buffer_->Start();
} }
~LiteralScope() { ~LiteralScope() {
@ -584,6 +561,10 @@ class Scanner {
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
Advance(); Advance();
current_ = &token_storage_[0];
next_ = &token_storage_[1];
next_next_ = &token_storage_[2];
found_html_comment_ = false; found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone; scanner_error_ = MessageTemplate::kNone;
} }
@ -604,16 +585,12 @@ class Scanner {
// Seek to the next_ token at the given position. // Seek to the next_ token at the given position.
void SeekNext(size_t position); void SeekNext(size_t position);
// Literal buffer support V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
V8_INLINE void AddLiteralChar(uc32 c) {
scan_target().literal_chars.AddChar(c); V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
}
V8_INLINE void AddLiteralChar(char c) {
scan_target().literal_chars.AddChar(c);
}
V8_INLINE void AddRawLiteralChar(uc32 c) { V8_INLINE void AddRawLiteralChar(uc32 c) {
scan_target().raw_literal_chars.AddChar(c); next().raw_literal_chars.AddChar(c);
} }
V8_INLINE void AddLiteralCharAdvance() { V8_INLINE void AddLiteralCharAdvance() {
@ -737,7 +714,7 @@ class Scanner {
// Scans a single JavaScript token. // Scans a single JavaScript token.
V8_INLINE Token::Value ScanSingleToken(); V8_INLINE Token::Value ScanSingleToken();
void Scan(); V8_INLINE void Scan();
V8_INLINE Token::Value SkipWhiteSpace(); V8_INLINE Token::Value SkipWhiteSpace();
Token::Value SkipSingleHTMLComment(); Token::Value SkipSingleHTMLComment();
@ -808,36 +785,17 @@ class Scanner {
LiteralBuffer source_url_; LiteralBuffer source_url_;
LiteralBuffer source_mapping_url_; LiteralBuffer source_mapping_url_;
static const int kNumberOfTokens = 1 << 2; TokenDesc token_storage_[3];
static const int kTokenStorageMask = kNumberOfTokens - 1;
TokenDesc token_storage_[kNumberOfTokens]; TokenDesc& next() { return *next_; }
// Index of current token in token_storage_.
int token_start_ = 0;
// Index of last scanned token in token_storage. We typically scan the next
// token aftewards. Initially this points to the initial current token since
// we always scan the next token and move the previous next to current.
int token_end_ = 0;
void ResetTokenStorage() { token_start_ = token_end_ = 0; } const TokenDesc& current() const { return *current_; }
const TokenDesc& next() const { return *next_; }
const TokenDesc& next_next() const { return *next_next_; }
int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; } TokenDesc* current_; // desc for current token (as returned by Next())
TokenDesc* next_; // desc for next token (one token look-ahead)
bool HasToken(int i) const { TokenDesc* next_next_; // desc for the token after next (after PeakAhead())
return i <= ((token_end_ - token_start_) & kTokenStorageMask);
}
const TokenDesc& GetToken(int i) const {
DCHECK(HasToken(i));
return token_storage_[TokenIndex(i)];
}
const TokenDesc& current() const { return GetToken(0); }
const TokenDesc& next() const { return GetToken(1); }
const TokenDesc& next_next() const { return GetToken(2); }
TokenDesc& scan_target() { return token_storage_[token_end_]; }
TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; }
// Input stream. Must be initialized to an Utf16CharacterStream. // Input stream. Must be initialized to an Utf16CharacterStream.
Utf16CharacterStream* const source_; Utf16CharacterStream* const source_;