[scanner] Revert to pointer tracking token storage rather than range

Using a circular buffer regresses performance unfortunately. Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6 Reviewed-on: https://chromium-review.googlesource.com/1194064 Reviewed-by: Igor Sheludko <ishell@chromium.org> Commit-Queue: Toon Verwaest <verwaest@chromium.org> Cr-Commit-Position: refs/heads/master@{#55478}
2018-08-28 17:48:17 +02:00 · 2018-08-28 17:48:17 +02:00 · 103d886a7e
commit 103d886a7e
parent 0bceaf1e7c
3 changed files with 115 additions and 119 deletions
--- a/src/parsing/scanner-inl.h
+++ b/src/parsing/scanner-inl.h
@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
    // Advance as long as character is a WhiteSpace or LineTerminator.
    // Remember if the latter is the case.
    if (unibrow::IsLineTerminator(c0_)) {
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
    } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
      break;
    }
@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
 V8_INLINE Token::Value Scanner::ScanSingleToken() {
  Token::Value token;
  do {
-    scan_target().location.beg_pos = source_pos();
+    next().location.beg_pos = source_pos();

    if (static_cast<unsigned>(c0_) <= 0x7F) {
      Token::Value token = one_char_tokens[c0_];
@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
        Advance();
        if (c0_ == '-') {
          Advance();
-          if (c0_ == '>' && scan_target().after_line_terminator) {
+          if (c0_ == '>' && next().after_line_terminator) {
            // For compatibility with SpiderMonkey, we skip lines that
            // start with an HTML comment end '-->'.
            token = SkipSingleHTMLComment();
@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
          Token::Value token = ScanIdentifierOrKeyword();
          if (!Token::IsContextualKeyword(token)) return token;

-          scan_target().contextual_token = token;
+          next().contextual_token = token;
          return Token::IDENTIFIER;
        }
        if (IsDecimalDigit(c0_)) return ScanNumber(false);
@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
  return token;
 }

+void Scanner::Scan() {
+  next().literal_chars.Drop();
+  next().raw_literal_chars.Drop();
+  next().contextual_token = Token::UNINITIALIZED;
+  next().invalid_template_escape_message = MessageTemplate::kNone;
+
+  next().token = ScanSingleToken();
+  next().location.end_pos = source_pos();
+
+#ifdef DEBUG
+  SanityCheckTokenDesc(current());
+  SanityCheckTokenDesc(next());
+  SanityCheckTokenDesc(next_next());
+#endif
+}
+
 }  // namespace internal
 }  // namespace v8

--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@ -186,6 +186,7 @@ void Scanner::Initialize() {
  // Need to capture identifiers in order to recognize "get" and "set"
  // in object literals.
  Init();
+  next().after_line_terminator = true;
  Scan();
 }

@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
  return x;
 }

+Token::Value Scanner::Next() {
+  if (next().token == Token::EOS) next().location = current().location;
+  // Rotate through tokens.
+  TokenDesc* previous = current_;
+  current_ = next_;
+  // Either we already have the next token lined up, in which case next_next_
+  // simply becomes next_. In that case we use current_ as new next_next_ and
+  // clear its token to indicate that it wasn't scanned yet. Otherwise we use
+  // current_ as next_ and scan into it, leaving next_next_ uninitialized.
+  if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
+    next_ = previous;
+    next().after_line_terminator = false;
+    Scan();
+  } else {
+    next_ = next_next_;
+    next_next_ = previous;
+    previous->token = Token::UNINITIALIZED;
+    previous->contextual_token = Token::UNINITIALIZED;
+    DCHECK_NE(Token::UNINITIALIZED, current().token);
+  }
+  return current().token;
+}
+
+Token::Value Scanner::PeekAhead() {
+  DCHECK(next().token != Token::DIV);
+  DCHECK(next().token != Token::ASSIGN_DIV);
+
+  if (next_next().token != Token::UNINITIALIZED) {
+    return next_next().token;
+  }
+  TokenDesc* temp = next_;
+  next_ = next_next_;
+  next().after_line_terminator = false;
+  Scan();
+  next_next_ = next_;
+  next_ = temp;
+  return next_next().token;
+}
+
 Token::Value Scanner::SkipSingleHTMLComment() {
  if (is_module_) {
    ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() {

  while (c0_ != kEndOfInput) {
    DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
-    if (!scan_target().after_line_terminator &&
-        unibrow::IsLineTerminator(c0_)) {
+    if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
      // Following ECMA-262, section 7.4, a comment containing
      // a newline will make the comment count as a line-terminator.
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
    }

    while (V8_UNLIKELY(c0_ == '*')) {
@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() {
  return SkipSingleHTMLComment();
 }

-void Scanner::Scan() {
-  token_end_ = (token_end_ + 1) & kTokenStorageMask;
-
-  scan_target().after_line_terminator = (source_pos() == 0);
-  scan_target().literal_chars.Drop();
-  scan_target().raw_literal_chars.Drop();
-  scan_target().contextual_token = Token::UNINITIALIZED;
-  scan_target().invalid_template_escape_message = MessageTemplate::kNone;
-
-  scan_target().token = ScanSingleToken();
-  scan_target().location.end_pos = source_pos();
-
-#ifdef DEBUG
-  for (TokenDesc& token : token_storage_) {
-    SanityCheckTokenDesc(token);
-  }
-#endif
-}
-
 #ifdef DEBUG
 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
  // Most tokens should not have literal_chars or even raw_literal chars.
@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) {
  if (pos != current_pos) {
    source_->Seek(pos);
    Advance();
+    // This function is only called to seek to the location
+    // of the end of a function (at the "}" token). It doesn't matter
+    // whether there was a line terminator in the part we skip.
+    next().after_line_terminator = false;
  }
-
-  ResetTokenStorage();
  Scan();
 }

@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() {

  Token::Value result = Token::TEMPLATE_SPAN;
  LiteralScope literal(this);
-  scan_target().raw_literal_chars.Start();
+  next().raw_literal_chars.Start();
  const bool capture_raw = true;
  while (true) {
    uc32 c = c0_;
@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() {
        DCHECK_EQ(!success, has_error());
        // For templates, invalid escape sequence checking is handled in the
        // parser.
-        scanner_error_state.MoveErrorTo(&scan_target());
-        octal_error_state.MoveErrorTo(&scan_target());
+        scanner_error_state.MoveErrorTo(next_);
+        octal_error_state.MoveErrorTo(next_);
      }
    } else if (c < 0) {
      // Unterminated template literal
@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() {
      AddLiteralChar(c);
    }
  }
-
  literal.Complete();
-  scan_target().location.end_pos = source_pos();
-  scan_target().token = result;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().location.end_pos = source_pos();
+  next().token = result;
+  next().contextual_token = Token::UNINITIALIZED;

  return result;
 }

-
 Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
  Handle<String> tmp;
  if (source_url_.length() > 0) {
@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
          return Token::ILLEGAL;
        }

-        if (scan_target().literal_chars.one_byte_literal().length() <= 10 &&
+        if (next().literal_chars.one_byte_literal().length() <= 10 &&
            value <= Smi::kMaxValue && c0_ != '.' &&
            !unicode_cache_->IsIdentifierStart(c0_)) {
-          scan_target().smi_value_ = static_cast<uint32_t>(value);
+          next().smi_value_ = static_cast<uint32_t>(value);
          literal.Complete();

          if (kind == DECIMAL_WITH_LEADING_ZERO) {
@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
      }
    } else if (c0_ <= kMaxAscii && c0_ != '\\') {
      // Only a-z+ or _: could be a keyword or identifier.
-      Vector<const uint8_t> chars =
-          scan_target().literal_chars.one_byte_literal();
+      Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
      Token::Value token =
          KeywordOrIdentifierToken(chars.start(), chars.length());
      if (token == Token::IDENTIFIER ||
@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
    }
  }

-  if (scan_target().literal_chars.is_one_byte()) {
-    Vector<const uint8_t> chars =
-        scan_target().literal_chars.one_byte_literal();
+  if (next().literal_chars.is_one_byte()) {
+    Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
    Token::Value token =
        KeywordOrIdentifierToken(chars.start(), chars.length());
    /* TODO(adamk): YIELD should be handled specially. */
@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
 }

 bool Scanner::ScanRegExpPattern() {
+  DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
  DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
-  // Make sure the scanner didn't scan beyond the regexp start.
-  DCHECK(!HasToken(2));

  // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
  bool in_character_class = false;
@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() {
  // the scanner should pass uninterpreted bodies to the RegExp
  // constructor.
  LiteralScope literal(this);
-  if (next().token == Token::ASSIGN_DIV) AddLiteralChar('=');
+  if (next().token == Token::ASSIGN_DIV) {
+    AddLiteralChar('=');
+  }

  while (c0_ != '/' || in_character_class) {
    if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() {
  Advance();  // consume '/'

  literal.Complete();
-  scan_target().token = Token::REGEXP_LITERAL;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().token = Token::REGEXP_LITERAL;
+  next().contextual_token = Token::UNINITIALIZED;
  return true;
 }


 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
-  DCHECK(!HasToken(2));
  DCHECK_EQ(Token::REGEXP_LITERAL, next().token);

  // Scan regular expression flags.
@ -1314,7 +1332,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
    flags |= flag;
  }

-  scan_target().location.end_pos = source_pos();
+  next().location.end_pos = source_pos();
  return Just(RegExp::Flags(flags));
 }

@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) {
  // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.

  // To re-scan from a given character position, we need to:
-  // 1. Move the stream to the right position,
+  // 1, Reset the current_, next_ and next_next_ tokens
+  //    (next_ + next_next_ will be overwrittem by Next(),
+  //     current_ will remain unchanged, so overwrite it fully.)
+  for (TokenDesc& token : token_storage_) {
+    token.token = Token::UNINITIALIZED;
+    token.contextual_token = Token::UNINITIALIZED;
+  }
+  // 2, reset the source to the desired position,
  source_->Seek(position);
-  // 2. refill the one-character buffer with the first character from the
-  // stream,
+  // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
  c0_ = source_->Advance();
-  // 3. Reset the token storage, and
-  ResetTokenStorage();
-  // 4. scan the first token.
+  next().after_line_terminator = false;
  Scan();
  DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
 }
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@ -237,31 +237,9 @@ class Scanner {
  void Initialize();

  // Returns the next token and advances input.
-  V8_INLINE Token::Value Next() {
-    // TODO(verwaest): Remove.
-    if (next().token == Token::EOS) {
-      next_target().location = current().location;
-    }
-    // Advance current token.
-    token_start_ = TokenIndex(1);
-    // Scan the next token if it's not yet ready.
-    if (V8_LIKELY(!HasToken(1))) Scan();
-    // Return current token.
-    DCHECK(HasToken(1));
-    return current().token;
-  }
-
+  Token::Value Next();
  // Returns the token following peek()
-  V8_INLINE Token::Value PeekAhead() {
-    DCHECK_NE(Token::DIV, next().token);
-    DCHECK_NE(Token::ASSIGN_DIV, next().token);
-    DCHECK(HasToken(1));
-
-    if (V8_LIKELY(!HasToken(2))) Scan();
-
-    return next_next().token;
-  }
-
+  Token::Value PeekAhead();
  // Returns the current token again.
  Token::Value current_token() { return current().token; }

@ -395,8 +373,7 @@ class Scanner {
  // Scans the input as a template literal
  Token::Value ScanTemplateContinuation() {
    DCHECK_EQ(next().token, Token::RBRACE);
-    DCHECK(!HasToken(2));
-    DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos);
+    DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
    return ScanTemplateSpan();
  }

@ -535,7 +512,7 @@ class Scanner {
  class LiteralScope {
   public:
    explicit LiteralScope(Scanner* scanner)
-        : buffer_(&scanner->scan_target().literal_chars), complete_(false) {
+        : buffer_(&scanner->next().literal_chars), complete_(false) {
      buffer_->Start();
    }
    ~LiteralScope() {
@ -584,6 +561,10 @@ class Scanner {
    STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    Advance();

+    current_ = &token_storage_[0];
+    next_ = &token_storage_[1];
+    next_next_ = &token_storage_[2];
+
    found_html_comment_ = false;
    scanner_error_ = MessageTemplate::kNone;
  }
@ -604,16 +585,12 @@ class Scanner {
  // Seek to the next_ token at the given position.
  void SeekNext(size_t position);

-  // Literal buffer support
-  V8_INLINE void AddLiteralChar(uc32 c) {
-    scan_target().literal_chars.AddChar(c);
-  }
-  V8_INLINE void AddLiteralChar(char c) {
-    scan_target().literal_chars.AddChar(c);
-  }
+  V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
+
+  V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }

  V8_INLINE void AddRawLiteralChar(uc32 c) {
-    scan_target().raw_literal_chars.AddChar(c);
+    next().raw_literal_chars.AddChar(c);
  }

  V8_INLINE void AddLiteralCharAdvance() {
@ -737,7 +714,7 @@ class Scanner {

  // Scans a single JavaScript token.
  V8_INLINE Token::Value ScanSingleToken();
-  void Scan();
+  V8_INLINE void Scan();

  V8_INLINE Token::Value SkipWhiteSpace();
  Token::Value SkipSingleHTMLComment();
@ -808,36 +785,17 @@ class Scanner {
  LiteralBuffer source_url_;
  LiteralBuffer source_mapping_url_;

-  static const int kNumberOfTokens = 1 << 2;
-  static const int kTokenStorageMask = kNumberOfTokens - 1;
+  TokenDesc token_storage_[3];

-  TokenDesc token_storage_[kNumberOfTokens];
-  // Index of current token in token_storage_.
-  int token_start_ = 0;
-  // Index of last scanned token in token_storage. We typically scan the next
-  // token aftewards. Initially this points to the initial current token since
-  // we always scan the next token and move the previous next to current.
-  int token_end_ = 0;
+  TokenDesc& next() { return *next_; }

-  void ResetTokenStorage() { token_start_ = token_end_ = 0; }
+  const TokenDesc& current() const { return *current_; }
+  const TokenDesc& next() const { return *next_; }
+  const TokenDesc& next_next() const { return *next_next_; }

-  int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; }
-
-  bool HasToken(int i) const {
-    return i <= ((token_end_ - token_start_) & kTokenStorageMask);
-  }
-
-  const TokenDesc& GetToken(int i) const {
-    DCHECK(HasToken(i));
-    return token_storage_[TokenIndex(i)];
-  }
-
-  const TokenDesc& current() const { return GetToken(0); }
-  const TokenDesc& next() const { return GetToken(1); }
-  const TokenDesc& next_next() const { return GetToken(2); }
-
-  TokenDesc& scan_target() { return token_storage_[token_end_]; }
-  TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; }
+  TokenDesc* current_;    // desc for current token (as returned by Next())
+  TokenDesc* next_;       // desc for next token (one token look-ahead)
+  TokenDesc* next_next_;  // desc for the token after next (after PeakAhead())

  // Input stream. Must be initialized to an Utf16CharacterStream.
  Utf16CharacterStream* const source_;