From 103d886a7e12a3f12a42b26f8ff92d08bdd46d3b Mon Sep 17 00:00:00 2001
From: Toon Verwaest <verwaest@chromium.org>
Date: Tue, 28 Aug 2018 17:48:17 +0200
Subject: [PATCH] [scanner] Revert to pointer tracking token storage rather
 than range

Using a circular buffer regresses performance unfortunately.

Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6
Reviewed-on: https://chromium-review.googlesource.com/1194064
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55478}
---
 src/parsing/scanner-inl.h |  24 ++++++--
 src/parsing/scanner.cc    | 126 ++++++++++++++++++++++----------------
 src/parsing/scanner.h     |  84 +++++++------------------
 3 files changed, 115 insertions(+), 119 deletions(-)
diff --git a/src/parsing/scanner-inl.h b/src/parsing/scanner-inl.h
index 29bcea516f..9b9d6e6f6a 100644
--- a/src/parsing/scanner-inl.h
+++ b/src/parsing/scanner-inl.h
@@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
     // Advance as long as character is a WhiteSpace or LineTerminator.
     // Remember if the latter is the case.
     if (unibrow::IsLineTerminator(c0_)) {
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
     } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
       break;
     }
@@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
 V8_INLINE Token::Value Scanner::ScanSingleToken() {
   Token::Value token;
   do {
-    scan_target().location.beg_pos = source_pos();
+    next().location.beg_pos = source_pos();
 
     if (static_cast<unsigned>(c0_) <= 0x7F) {
       Token::Value token = one_char_tokens[c0_];
@@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
         Advance();
         if (c0_ == '-') {
           Advance();
-          if (c0_ == '>' && scan_target().after_line_terminator) {
+          if (c0_ == '>' && next().after_line_terminator) {
             // For compatibility with SpiderMonkey, we skip lines that
             // start with an HTML comment end '-->'.
             token = SkipSingleHTMLComment();
@@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
           Token::Value token = ScanIdentifierOrKeyword();
           if (!Token::IsContextualKeyword(token)) return token;
 
-          scan_target().contextual_token = token;
+          next().contextual_token = token;
           return Token::IDENTIFIER;
         }
         if (IsDecimalDigit(c0_)) return ScanNumber(false);
@@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
   return token;
 }
 
+void Scanner::Scan() {
+  next().literal_chars.Drop();
+  next().raw_literal_chars.Drop();
+  next().contextual_token = Token::UNINITIALIZED;
+  next().invalid_template_escape_message = MessageTemplate::kNone;
+
+  next().token = ScanSingleToken();
+  next().location.end_pos = source_pos();
+
+#ifdef DEBUG
+  SanityCheckTokenDesc(current());
+  SanityCheckTokenDesc(next());
+  SanityCheckTokenDesc(next_next());
+#endif
+}
+
 }  // namespace internal
 }  // namespace v8
 
diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc
index 5cf6765975..2a4d08d12f 100644
--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@@ -186,6 +186,7 @@ void Scanner::Initialize() {
   // Need to capture identifiers in order to recognize "get" and "set"
   // in object literals.
   Init();
+  next().after_line_terminator = true;
   Scan();
 }
 
@@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
   return x;
 }
 
+Token::Value Scanner::Next() {
+  if (next().token == Token::EOS) next().location = current().location;
+  // Rotate through tokens.
+  TokenDesc* previous = current_;
+  current_ = next_;
+  // Either we already have the next token lined up, in which case next_next_
+  // simply becomes next_. In that case we use current_ as new next_next_ and
+  // clear its token to indicate that it wasn't scanned yet. Otherwise we use
+  // current_ as next_ and scan into it, leaving next_next_ uninitialized.
+  if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
+    next_ = previous;
+    next().after_line_terminator = false;
+    Scan();
+  } else {
+    next_ = next_next_;
+    next_next_ = previous;
+    previous->token = Token::UNINITIALIZED;
+    previous->contextual_token = Token::UNINITIALIZED;
+    DCHECK_NE(Token::UNINITIALIZED, current().token);
+  }
+  return current().token;
+}
+
+Token::Value Scanner::PeekAhead() {
+  DCHECK(next().token != Token::DIV);
+  DCHECK(next().token != Token::ASSIGN_DIV);
+
+  if (next_next().token != Token::UNINITIALIZED) {
+    return next_next().token;
+  }
+  TokenDesc* temp = next_;
+  next_ = next_next_;
+  next().after_line_terminator = false;
+  Scan();
+  next_next_ = next_;
+  next_ = temp;
+  return next_next().token;
+}
+
 Token::Value Scanner::SkipSingleHTMLComment() {
   if (is_module_) {
     ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
@@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() {
 
   while (c0_ != kEndOfInput) {
     DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
-    if (!scan_target().after_line_terminator &&
-        unibrow::IsLineTerminator(c0_)) {
+    if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
       // Following ECMA-262, section 7.4, a comment containing
       // a newline will make the comment count as a line-terminator.
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
     }
 
     while (V8_UNLIKELY(c0_ == '*')) {
@@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() {
   return SkipSingleHTMLComment();
 }
 
-void Scanner::Scan() {
-  token_end_ = (token_end_ + 1) & kTokenStorageMask;
-
-  scan_target().after_line_terminator = (source_pos() == 0);
-  scan_target().literal_chars.Drop();
-  scan_target().raw_literal_chars.Drop();
-  scan_target().contextual_token = Token::UNINITIALIZED;
-  scan_target().invalid_template_escape_message = MessageTemplate::kNone;
-
-  scan_target().token = ScanSingleToken();
-  scan_target().location.end_pos = source_pos();
-
-#ifdef DEBUG
-  for (TokenDesc& token : token_storage_) {
-    SanityCheckTokenDesc(token);
-  }
-#endif
-}
-
 #ifdef DEBUG
 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
   // Most tokens should not have literal_chars or even raw_literal chars.
@@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) {
   if (pos != current_pos) {
     source_->Seek(pos);
     Advance();
+    // This function is only called to seek to the location
+    // of the end of a function (at the "}" token). It doesn't matter
+    // whether there was a line terminator in the part we skip.
+    next().after_line_terminator = false;
   }
-
-  ResetTokenStorage();
   Scan();
 }
 
@@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() {
 
   Token::Value result = Token::TEMPLATE_SPAN;
   LiteralScope literal(this);
-  scan_target().raw_literal_chars.Start();
+  next().raw_literal_chars.Start();
   const bool capture_raw = true;
   while (true) {
     uc32 c = c0_;
@@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() {
         DCHECK_EQ(!success, has_error());
         // For templates, invalid escape sequence checking is handled in the
         // parser.
-        scanner_error_state.MoveErrorTo(&scan_target());
-        octal_error_state.MoveErrorTo(&scan_target());
+        scanner_error_state.MoveErrorTo(next_);
+        octal_error_state.MoveErrorTo(next_);
       }
     } else if (c < 0) {
       // Unterminated template literal
@@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() {
       AddLiteralChar(c);
     }
   }
-
   literal.Complete();
-  scan_target().location.end_pos = source_pos();
-  scan_target().token = result;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().location.end_pos = source_pos();
+  next().token = result;
+  next().contextual_token = Token::UNINITIALIZED;
 
   return result;
 }
 
-
 Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
   Handle<String> tmp;
   if (source_url_.length() > 0) {
@@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
           return Token::ILLEGAL;
         }
 
-        if (scan_target().literal_chars.one_byte_literal().length() <= 10 &&
+        if (next().literal_chars.one_byte_literal().length() <= 10 &&
             value <= Smi::kMaxValue && c0_ != '.' &&
             !unicode_cache_->IsIdentifierStart(c0_)) {
-          scan_target().smi_value_ = static_cast<uint32_t>(value);
+          next().smi_value_ = static_cast<uint32_t>(value);
           literal.Complete();
 
           if (kind == DECIMAL_WITH_LEADING_ZERO) {
@@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
       }
     } else if (c0_ <= kMaxAscii && c0_ != '\\') {
       // Only a-z+ or _: could be a keyword or identifier.
-      Vector<const uint8_t> chars =
-          scan_target().literal_chars.one_byte_literal();
+      Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
       Token::Value token =
           KeywordOrIdentifierToken(chars.start(), chars.length());
       if (token == Token::IDENTIFIER ||
@@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
     }
   }
 
-  if (scan_target().literal_chars.is_one_byte()) {
-    Vector<const uint8_t> chars =
-        scan_target().literal_chars.one_byte_literal();
+  if (next().literal_chars.is_one_byte()) {
+    Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
     Token::Value token =
         KeywordOrIdentifierToken(chars.start(), chars.length());
     /* TODO(adamk): YIELD should be handled specially. */
@@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
 }
 
 bool Scanner::ScanRegExpPattern() {
+  DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
   DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
-  // Make sure the scanner didn't scan beyond the regexp start.
-  DCHECK(!HasToken(2));
 
   // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
   bool in_character_class = false;
@@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() {
   // the scanner should pass uninterpreted bodies to the RegExp
   // constructor.
   LiteralScope literal(this);
-  if (next().token == Token::ASSIGN_DIV) AddLiteralChar('=');
+  if (next().token == Token::ASSIGN_DIV) {
+    AddLiteralChar('=');
+  }
 
   while (c0_ != '/' || in_character_class) {
     if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
@@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() {
   Advance();  // consume '/'
 
   literal.Complete();
-  scan_target().token = Token::REGEXP_LITERAL;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().token = Token::REGEXP_LITERAL;
+  next().contextual_token = Token::UNINITIALIZED;
   return true;
 }
 
 
 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
-  DCHECK(!HasToken(2));
   DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
 
   // Scan regular expression flags.
@@ -1314,7 +1332,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
     flags |= flag;
   }
 
-  scan_target().location.end_pos = source_pos();
+  next().location.end_pos = source_pos();
   return Just(RegExp::Flags(flags));
 }
 
@@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) {
   // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
 
   // To re-scan from a given character position, we need to:
-  // 1. Move the stream to the right position,
+  // 1, Reset the current_, next_ and next_next_ tokens
+  //    (next_ + next_next_ will be overwrittem by Next(),
+  //     current_ will remain unchanged, so overwrite it fully.)
+  for (TokenDesc& token : token_storage_) {
+    token.token = Token::UNINITIALIZED;
+    token.contextual_token = Token::UNINITIALIZED;
+  }
+  // 2, reset the source to the desired position,
   source_->Seek(position);
-  // 2. refill the one-character buffer with the first character from the
-  // stream,
+  // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
   c0_ = source_->Advance();
-  // 3. Reset the token storage, and
-  ResetTokenStorage();
-  // 4. scan the first token.
+  next().after_line_terminator = false;
   Scan();
   DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
 }
diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h
index c970fc5ad3..00c844020e 100644
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@@ -237,31 +237,9 @@ class Scanner {
   void Initialize();
 
   // Returns the next token and advances input.
-  V8_INLINE Token::Value Next() {
-    // TODO(verwaest): Remove.
-    if (next().token == Token::EOS) {
-      next_target().location = current().location;
-    }
-    // Advance current token.
-    token_start_ = TokenIndex(1);
-    // Scan the next token if it's not yet ready.
-    if (V8_LIKELY(!HasToken(1))) Scan();
-    // Return current token.
-    DCHECK(HasToken(1));
-    return current().token;
-  }
-
+  Token::Value Next();
   // Returns the token following peek()
-  V8_INLINE Token::Value PeekAhead() {
-    DCHECK_NE(Token::DIV, next().token);
-    DCHECK_NE(Token::ASSIGN_DIV, next().token);
-    DCHECK(HasToken(1));
-
-    if (V8_LIKELY(!HasToken(2))) Scan();
-
-    return next_next().token;
-  }
-
+  Token::Value PeekAhead();
   // Returns the current token again.
   Token::Value current_token() { return current().token; }
 
@@ -395,8 +373,7 @@ class Scanner {
   // Scans the input as a template literal
   Token::Value ScanTemplateContinuation() {
     DCHECK_EQ(next().token, Token::RBRACE);
-    DCHECK(!HasToken(2));
-    DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos);
+    DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
     return ScanTemplateSpan();
   }
 
@@ -535,7 +512,7 @@ class Scanner {
   class LiteralScope {
    public:
     explicit LiteralScope(Scanner* scanner)
-        : buffer_(&scanner->scan_target().literal_chars), complete_(false) {
+        : buffer_(&scanner->next().literal_chars), complete_(false) {
       buffer_->Start();
     }
     ~LiteralScope() {
@@ -584,6 +561,10 @@ class Scanner {
     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
     Advance();
 
+    current_ = &token_storage_[0];
+    next_ = &token_storage_[1];
+    next_next_ = &token_storage_[2];
+
     found_html_comment_ = false;
     scanner_error_ = MessageTemplate::kNone;
   }
@@ -604,16 +585,12 @@ class Scanner {
   // Seek to the next_ token at the given position.
   void SeekNext(size_t position);
 
-  // Literal buffer support
-  V8_INLINE void AddLiteralChar(uc32 c) {
-    scan_target().literal_chars.AddChar(c);
-  }
-  V8_INLINE void AddLiteralChar(char c) {
-    scan_target().literal_chars.AddChar(c);
-  }
+  V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
+
+  V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
 
   V8_INLINE void AddRawLiteralChar(uc32 c) {
-    scan_target().raw_literal_chars.AddChar(c);
+    next().raw_literal_chars.AddChar(c);
   }
 
   V8_INLINE void AddLiteralCharAdvance() {
@@ -737,7 +714,7 @@ class Scanner {
 
   // Scans a single JavaScript token.
   V8_INLINE Token::Value ScanSingleToken();
-  void Scan();
+  V8_INLINE void Scan();
 
   V8_INLINE Token::Value SkipWhiteSpace();
   Token::Value SkipSingleHTMLComment();
@@ -808,36 +785,17 @@ class Scanner {
   LiteralBuffer source_url_;
   LiteralBuffer source_mapping_url_;
 
-  static const int kNumberOfTokens = 1 << 2;
-  static const int kTokenStorageMask = kNumberOfTokens - 1;
+  TokenDesc token_storage_[3];
 
-  TokenDesc token_storage_[kNumberOfTokens];
-  // Index of current token in token_storage_.
-  int token_start_ = 0;
-  // Index of last scanned token in token_storage. We typically scan the next
-  // token aftewards. Initially this points to the initial current token since
-  // we always scan the next token and move the previous next to current.
-  int token_end_ = 0;
+  TokenDesc& next() { return *next_; }
 
-  void ResetTokenStorage() { token_start_ = token_end_ = 0; }
+  const TokenDesc& current() const { return *current_; }
+  const TokenDesc& next() const { return *next_; }
+  const TokenDesc& next_next() const { return *next_next_; }
 
-  int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; }
-
-  bool HasToken(int i) const {
-    return i <= ((token_end_ - token_start_) & kTokenStorageMask);
-  }
-
-  const TokenDesc& GetToken(int i) const {
-    DCHECK(HasToken(i));
-    return token_storage_[TokenIndex(i)];
-  }
-
-  const TokenDesc& current() const { return GetToken(0); }
-  const TokenDesc& next() const { return GetToken(1); }
-  const TokenDesc& next_next() const { return GetToken(2); }
-
-  TokenDesc& scan_target() { return token_storage_[token_end_]; }
-  TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; }
+  TokenDesc* current_;    // desc for current token (as returned by Next())
+  TokenDesc* next_;       // desc for next token (one token look-ahead)
+  TokenDesc* next_next_;  // desc for the token after next (after PeakAhead())
 
   // Input stream. Must be initialized to an Utf16CharacterStream.
   Utf16CharacterStream* const source_;