From 103d886a7e12a3f12a42b26f8ff92d08bdd46d3b Mon Sep 17 00:00:00 2001 From: Toon Verwaest Date: Tue, 28 Aug 2018 17:48:17 +0200 Subject: [PATCH] [scanner] Revert to pointer tracking token storage rather than range Using a circular buffer regresses performance unfortunately. Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6 Reviewed-on: https://chromium-review.googlesource.com/1194064 Reviewed-by: Igor Sheludko Commit-Queue: Toon Verwaest Cr-Commit-Position: refs/heads/master@{#55478} --- src/parsing/scanner-inl.h | 24 ++++++-- src/parsing/scanner.cc | 126 ++++++++++++++++++++++---------------- src/parsing/scanner.h | 84 +++++++------------------ 3 files changed, 115 insertions(+), 119 deletions(-) diff --git a/src/parsing/scanner-inl.h b/src/parsing/scanner-inl.h index 29bcea516f..9b9d6e6f6a 100644 --- a/src/parsing/scanner-inl.h +++ b/src/parsing/scanner-inl.h @@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() { // Advance as long as character is a WhiteSpace or LineTerminator. // Remember if the latter is the case. if (unibrow::IsLineTerminator(c0_)) { - scan_target().after_line_terminator = true; + next().after_line_terminator = true; } else if (!unicode_cache_->IsWhiteSpace(c0_)) { break; } @@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() { V8_INLINE Token::Value Scanner::ScanSingleToken() { Token::Value token; do { - scan_target().location.beg_pos = source_pos(); + next().location.beg_pos = source_pos(); if (static_cast(c0_) <= 0x7F) { Token::Value token = one_char_tokens[c0_]; @@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() { Advance(); if (c0_ == '-') { Advance(); - if (c0_ == '>' && scan_target().after_line_terminator) { + if (c0_ == '>' && next().after_line_terminator) { // For compatibility with SpiderMonkey, we skip lines that // start with an HTML comment end '-->'. token = SkipSingleHTMLComment(); @@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() { Token::Value token = ScanIdentifierOrKeyword(); if (!Token::IsContextualKeyword(token)) return token; - scan_target().contextual_token = token; + next().contextual_token = token; return Token::IDENTIFIER; } if (IsDecimalDigit(c0_)) return ScanNumber(false); @@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() { return token; } +void Scanner::Scan() { + next().literal_chars.Drop(); + next().raw_literal_chars.Drop(); + next().contextual_token = Token::UNINITIALIZED; + next().invalid_template_escape_message = MessageTemplate::kNone; + + next().token = ScanSingleToken(); + next().location.end_pos = source_pos(); + +#ifdef DEBUG + SanityCheckTokenDesc(current()); + SanityCheckTokenDesc(next()); + SanityCheckTokenDesc(next_next()); +#endif +} + } // namespace internal } // namespace v8 diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc index 5cf6765975..2a4d08d12f 100644 --- a/src/parsing/scanner.cc +++ b/src/parsing/scanner.cc @@ -186,6 +186,7 @@ void Scanner::Initialize() { // Need to capture identifiers in order to recognize "get" and "set" // in object literals. Init(); + next().after_line_terminator = true; Scan(); } @@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) { return x; } +Token::Value Scanner::Next() { + if (next().token == Token::EOS) next().location = current().location; + // Rotate through tokens. + TokenDesc* previous = current_; + current_ = next_; + // Either we already have the next token lined up, in which case next_next_ + // simply becomes next_. In that case we use current_ as new next_next_ and + // clear its token to indicate that it wasn't scanned yet. Otherwise we use + // current_ as next_ and scan into it, leaving next_next_ uninitialized. + if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) { + next_ = previous; + next().after_line_terminator = false; + Scan(); + } else { + next_ = next_next_; + next_next_ = previous; + previous->token = Token::UNINITIALIZED; + previous->contextual_token = Token::UNINITIALIZED; + DCHECK_NE(Token::UNINITIALIZED, current().token); + } + return current().token; +} + +Token::Value Scanner::PeekAhead() { + DCHECK(next().token != Token::DIV); + DCHECK(next().token != Token::ASSIGN_DIV); + + if (next_next().token != Token::UNINITIALIZED) { + return next_next().token; + } + TokenDesc* temp = next_; + next_ = next_next_; + next().after_line_terminator = false; + Scan(); + next_next_ = next_; + next_ = temp; + return next_next().token; +} + Token::Value Scanner::SkipSingleHTMLComment() { if (is_module_) { ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule); @@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() { while (c0_ != kEndOfInput) { DCHECK(!unibrow::IsLineTerminator(kEndOfInput)); - if (!scan_target().after_line_terminator && - unibrow::IsLineTerminator(c0_)) { + if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) { // Following ECMA-262, section 7.4, a comment containing // a newline will make the comment count as a line-terminator. - scan_target().after_line_terminator = true; + next().after_line_terminator = true; } while (V8_UNLIKELY(c0_ == '*')) { @@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() { return SkipSingleHTMLComment(); } -void Scanner::Scan() { - token_end_ = (token_end_ + 1) & kTokenStorageMask; - - scan_target().after_line_terminator = (source_pos() == 0); - scan_target().literal_chars.Drop(); - scan_target().raw_literal_chars.Drop(); - scan_target().contextual_token = Token::UNINITIALIZED; - scan_target().invalid_template_escape_message = MessageTemplate::kNone; - - scan_target().token = ScanSingleToken(); - scan_target().location.end_pos = source_pos(); - -#ifdef DEBUG - for (TokenDesc& token : token_storage_) { - SanityCheckTokenDesc(token); - } -#endif -} - #ifdef DEBUG void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { // Most tokens should not have literal_chars or even raw_literal chars. @@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) { if (pos != current_pos) { source_->Seek(pos); Advance(); + // This function is only called to seek to the location + // of the end of a function (at the "}" token). It doesn't matter + // whether there was a line terminator in the part we skip. + next().after_line_terminator = false; } - - ResetTokenStorage(); Scan(); } @@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() { Token::Value result = Token::TEMPLATE_SPAN; LiteralScope literal(this); - scan_target().raw_literal_chars.Start(); + next().raw_literal_chars.Start(); const bool capture_raw = true; while (true) { uc32 c = c0_; @@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() { DCHECK_EQ(!success, has_error()); // For templates, invalid escape sequence checking is handled in the // parser. - scanner_error_state.MoveErrorTo(&scan_target()); - octal_error_state.MoveErrorTo(&scan_target()); + scanner_error_state.MoveErrorTo(next_); + octal_error_state.MoveErrorTo(next_); } } else if (c < 0) { // Unterminated template literal @@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() { AddLiteralChar(c); } } - literal.Complete(); - scan_target().location.end_pos = source_pos(); - scan_target().token = result; - scan_target().contextual_token = Token::UNINITIALIZED; + next().location.end_pos = source_pos(); + next().token = result; + next().contextual_token = Token::UNINITIALIZED; return result; } - Handle Scanner::SourceUrl(Isolate* isolate) const { Handle tmp; if (source_url_.length() > 0) { @@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) { return Token::ILLEGAL; } - if (scan_target().literal_chars.one_byte_literal().length() <= 10 && + if (next().literal_chars.one_byte_literal().length() <= 10 && value <= Smi::kMaxValue && c0_ != '.' && !unicode_cache_->IsIdentifierStart(c0_)) { - scan_target().smi_value_ = static_cast(value); + next().smi_value_ = static_cast(value); literal.Complete(); if (kind == DECIMAL_WITH_LEADING_ZERO) { @@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } } else if (c0_ <= kMaxAscii && c0_ != '\\') { // Only a-z+ or _: could be a keyword or identifier. - Vector chars = - scan_target().literal_chars.one_byte_literal(); + Vector chars = next().literal_chars.one_byte_literal(); Token::Value token = KeywordOrIdentifierToken(chars.start(), chars.length()); if (token == Token::IDENTIFIER || @@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } } - if (scan_target().literal_chars.is_one_byte()) { - Vector chars = - scan_target().literal_chars.one_byte_literal(); + if (next().literal_chars.is_one_byte()) { + Vector chars = next().literal_chars.one_byte_literal(); Token::Value token = KeywordOrIdentifierToken(chars.start(), chars.length()); /* TODO(adamk): YIELD should be handled specially. */ @@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } bool Scanner::ScanRegExpPattern() { + DCHECK_EQ(Token::UNINITIALIZED, next_next().token); DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV); - // Make sure the scanner didn't scan beyond the regexp start. - DCHECK(!HasToken(2)); // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags bool in_character_class = false; @@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() { // the scanner should pass uninterpreted bodies to the RegExp // constructor. LiteralScope literal(this); - if (next().token == Token::ASSIGN_DIV) AddLiteralChar('='); + if (next().token == Token::ASSIGN_DIV) { + AddLiteralChar('='); + } while (c0_ != '/' || in_character_class) { if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) { @@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() { Advance(); // consume '/' literal.Complete(); - scan_target().token = Token::REGEXP_LITERAL; - scan_target().contextual_token = Token::UNINITIALIZED; + next().token = Token::REGEXP_LITERAL; + next().contextual_token = Token::UNINITIALIZED; return true; } Maybe Scanner::ScanRegExpFlags() { - DCHECK(!HasToken(2)); DCHECK_EQ(Token::REGEXP_LITERAL, next().token); // Scan regular expression flags. @@ -1314,7 +1332,7 @@ Maybe Scanner::ScanRegExpFlags() { flags |= flag; } - scan_target().location.end_pos = source_pos(); + next().location.end_pos = source_pos(); return Just(RegExp::Flags(flags)); } @@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) { // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions. // To re-scan from a given character position, we need to: - // 1. Move the stream to the right position, + // 1, Reset the current_, next_ and next_next_ tokens + // (next_ + next_next_ will be overwrittem by Next(), + // current_ will remain unchanged, so overwrite it fully.) + for (TokenDesc& token : token_storage_) { + token.token = Token::UNINITIALIZED; + token.contextual_token = Token::UNINITIALIZED; + } + // 2, reset the source to the desired position, source_->Seek(position); - // 2. refill the one-character buffer with the first character from the - // stream, + // 3, re-scan, by scanning the look-ahead char + 1 token (next_). c0_ = source_->Advance(); - // 3. Reset the token storage, and - ResetTokenStorage(); - // 4. scan the first token. + next().after_line_terminator = false; Scan(); DCHECK_EQ(next().location.beg_pos, static_cast(position)); } diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h index c970fc5ad3..00c844020e 100644 --- a/src/parsing/scanner.h +++ b/src/parsing/scanner.h @@ -237,31 +237,9 @@ class Scanner { void Initialize(); // Returns the next token and advances input. - V8_INLINE Token::Value Next() { - // TODO(verwaest): Remove. - if (next().token == Token::EOS) { - next_target().location = current().location; - } - // Advance current token. - token_start_ = TokenIndex(1); - // Scan the next token if it's not yet ready. - if (V8_LIKELY(!HasToken(1))) Scan(); - // Return current token. - DCHECK(HasToken(1)); - return current().token; - } - + Token::Value Next(); // Returns the token following peek() - V8_INLINE Token::Value PeekAhead() { - DCHECK_NE(Token::DIV, next().token); - DCHECK_NE(Token::ASSIGN_DIV, next().token); - DCHECK(HasToken(1)); - - if (V8_LIKELY(!HasToken(2))) Scan(); - - return next_next().token; - } - + Token::Value PeekAhead(); // Returns the current token again. Token::Value current_token() { return current().token; } @@ -395,8 +373,7 @@ class Scanner { // Scans the input as a template literal Token::Value ScanTemplateContinuation() { DCHECK_EQ(next().token, Token::RBRACE); - DCHECK(!HasToken(2)); - DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos); + DCHECK_EQ(source_pos() - 1, next().location.beg_pos); return ScanTemplateSpan(); } @@ -535,7 +512,7 @@ class Scanner { class LiteralScope { public: explicit LiteralScope(Scanner* scanner) - : buffer_(&scanner->scan_target().literal_chars), complete_(false) { + : buffer_(&scanner->next().literal_chars), complete_(false) { buffer_->Start(); } ~LiteralScope() { @@ -584,6 +561,10 @@ class Scanner { STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); + current_ = &token_storage_[0]; + next_ = &token_storage_[1]; + next_next_ = &token_storage_[2]; + found_html_comment_ = false; scanner_error_ = MessageTemplate::kNone; } @@ -604,16 +585,12 @@ class Scanner { // Seek to the next_ token at the given position. void SeekNext(size_t position); - // Literal buffer support - V8_INLINE void AddLiteralChar(uc32 c) { - scan_target().literal_chars.AddChar(c); - } - V8_INLINE void AddLiteralChar(char c) { - scan_target().literal_chars.AddChar(c); - } + V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); } + + V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); } V8_INLINE void AddRawLiteralChar(uc32 c) { - scan_target().raw_literal_chars.AddChar(c); + next().raw_literal_chars.AddChar(c); } V8_INLINE void AddLiteralCharAdvance() { @@ -737,7 +714,7 @@ class Scanner { // Scans a single JavaScript token. V8_INLINE Token::Value ScanSingleToken(); - void Scan(); + V8_INLINE void Scan(); V8_INLINE Token::Value SkipWhiteSpace(); Token::Value SkipSingleHTMLComment(); @@ -808,36 +785,17 @@ class Scanner { LiteralBuffer source_url_; LiteralBuffer source_mapping_url_; - static const int kNumberOfTokens = 1 << 2; - static const int kTokenStorageMask = kNumberOfTokens - 1; + TokenDesc token_storage_[3]; - TokenDesc token_storage_[kNumberOfTokens]; - // Index of current token in token_storage_. - int token_start_ = 0; - // Index of last scanned token in token_storage. We typically scan the next - // token aftewards. Initially this points to the initial current token since - // we always scan the next token and move the previous next to current. - int token_end_ = 0; + TokenDesc& next() { return *next_; } - void ResetTokenStorage() { token_start_ = token_end_ = 0; } + const TokenDesc& current() const { return *current_; } + const TokenDesc& next() const { return *next_; } + const TokenDesc& next_next() const { return *next_next_; } - int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; } - - bool HasToken(int i) const { - return i <= ((token_end_ - token_start_) & kTokenStorageMask); - } - - const TokenDesc& GetToken(int i) const { - DCHECK(HasToken(i)); - return token_storage_[TokenIndex(i)]; - } - - const TokenDesc& current() const { return GetToken(0); } - const TokenDesc& next() const { return GetToken(1); } - const TokenDesc& next_next() const { return GetToken(2); } - - TokenDesc& scan_target() { return token_storage_[token_end_]; } - TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; } + TokenDesc* current_; // desc for current token (as returned by Next()) + TokenDesc* next_; // desc for next token (one token look-ahead) + TokenDesc* next_next_; // desc for the token after next (after PeakAhead()) // Input stream. Must be initialized to an Utf16CharacterStream. Utf16CharacterStream* const source_;