[scanner] Revert to pointer tracking token storage rather than range

Using a circular buffer regresses performance unfortunately. Change-Id: Id5b68cae798d21f46376141f86d5707794bf08d6 Reviewed-on: https://chromium-review.googlesource.com/1194064 Reviewed-by: Igor Sheludko <ishell@chromium.org> Commit-Queue: Toon Verwaest <verwaest@chromium.org> Cr-Commit-Position: refs/heads/master@{#55478}
2018-08-28 17:48:17 +02:00 · 2018-08-28 17:48:17 +02:00 · 103d886a7e
commit 103d886a7e
parent 0bceaf1e7c
3 changed files with 115 additions and 119 deletions
--- a/src/parsing/scanner-inl.h
+++ b/src/parsing/scanner-inl.h
@ -159,7 +159,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
    // Advance as long as character is a WhiteSpace or LineTerminator.
    // Remember if the latter is the case.
    if (unibrow::IsLineTerminator(c0_)) {
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
    } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
      break;
    }
@ -178,7 +178,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
 V8_INLINE Token::Value Scanner::ScanSingleToken() {
  Token::Value token;
  do {
-    scan_target().location.beg_pos = source_pos();
+    next().location.beg_pos = source_pos();
    if (static_cast<unsigned>(c0_) <= 0x7F) {
      Token::Value token = one_char_tokens[c0_];
@ -242,7 +242,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
        Advance();
        if (c0_ == '-') {
          Advance();
-          if (c0_ == '>' && scan_target().after_line_terminator) {
+          if (c0_ == '>' && next().after_line_terminator) {
            // For compatibility with SpiderMonkey, we skip lines that
            // start with an HTML comment end '-->'.
            token = SkipSingleHTMLComment();
@ -330,7 +330,7 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
          Token::Value token = ScanIdentifierOrKeyword();
          if (!Token::IsContextualKeyword(token)) return token;
-          scan_target().contextual_token = token;
+          next().contextual_token = token;
          return Token::IDENTIFIER;
        }
        if (IsDecimalDigit(c0_)) return ScanNumber(false);
@ -344,6 +344,22 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
  return token;
 }
 void Scanner::Scan() {
  next().literal_chars.Drop();
  next().raw_literal_chars.Drop();
  next().contextual_token = Token::UNINITIALIZED;
  next().invalid_template_escape_message = MessageTemplate::kNone;
  next().token = ScanSingleToken();
  next().location.end_pos = source_pos();
 #ifdef DEBUG
  SanityCheckTokenDesc(current());
  SanityCheckTokenDesc(next());
  SanityCheckTokenDesc(next_next());
 #endif
 }
 }  // namespace internal
 }  // namespace v8
--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@ -186,6 +186,7 @@ void Scanner::Initialize() {
  // Need to capture identifiers in order to recognize "get" and "set"
  // in object literals.
  Init();
  next().after_line_terminator = true;
  Scan();
 }
@ -231,6 +232,45 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
  return x;
 }
 Token::Value Scanner::Next() {
  if (next().token == Token::EOS) next().location = current().location;
  // Rotate through tokens.
  TokenDesc* previous = current_;
  current_ = next_;
  // Either we already have the next token lined up, in which case next_next_
  // simply becomes next_. In that case we use current_ as new next_next_ and
  // clear its token to indicate that it wasn't scanned yet. Otherwise we use
  // current_ as next_ and scan into it, leaving next_next_ uninitialized.
  if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
    next_ = previous;
    next().after_line_terminator = false;
    Scan();
  } else {
    next_ = next_next_;
    next_next_ = previous;
    previous->token = Token::UNINITIALIZED;
    previous->contextual_token = Token::UNINITIALIZED;
    DCHECK_NE(Token::UNINITIALIZED, current().token);
  }
  return current().token;
 }
 Token::Value Scanner::PeekAhead() {
  DCHECK(next().token != Token::DIV);
  DCHECK(next().token != Token::ASSIGN_DIV);
  if (next_next().token != Token::UNINITIALIZED) {
    return next_next().token;
  }
  TokenDesc* temp = next_;
  next_ = next_next_;
  next().after_line_terminator = false;
  Scan();
  next_next_ = next_;
  next_ = temp;
  return next_next().token;
 }
 Token::Value Scanner::SkipSingleHTMLComment() {
  if (is_module_) {
    ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
@ -319,11 +359,10 @@ Token::Value Scanner::SkipMultiLineComment() {
  while (c0_ != kEndOfInput) {
    DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
-    if (!scan_target().after_line_terminator &&
+    if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
        unibrow::IsLineTerminator(c0_)) {
      // Following ECMA-262, section 7.4, a comment containing
      // a newline will make the comment count as a line-terminator.
-      scan_target().after_line_terminator = true;
+      next().after_line_terminator = true;
    }
    while (V8_UNLIKELY(c0_ == '*')) {
@ -354,25 +393,6 @@ Token::Value Scanner::ScanHtmlComment() {
  return SkipSingleHTMLComment();
 }
 void Scanner::Scan() {
  token_end_ = (token_end_ + 1) & kTokenStorageMask;
  scan_target().after_line_terminator = (source_pos() == 0);
  scan_target().literal_chars.Drop();
  scan_target().raw_literal_chars.Drop();
  scan_target().contextual_token = Token::UNINITIALIZED;
  scan_target().invalid_template_escape_message = MessageTemplate::kNone;
  scan_target().token = ScanSingleToken();
  scan_target().location.end_pos = source_pos();
 #ifdef DEBUG
  for (TokenDesc& token : token_storage_) {
    SanityCheckTokenDesc(token);
  }
 #endif
 }
 #ifdef DEBUG
 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
  // Most tokens should not have literal_chars or even raw_literal chars.
@ -434,9 +454,11 @@ void Scanner::SeekForward(int pos) {
  if (pos != current_pos) {
    source_->Seek(pos);
    Advance();
    // This function is only called to seek to the location
    // of the end of a function (at the "}" token). It doesn't matter
    // whether there was a line terminator in the part we skip.
    next().after_line_terminator = false;
  }
  ResetTokenStorage();
  Scan();
 }
@ -586,7 +608,7 @@ Token::Value Scanner::ScanTemplateSpan() {
  Token::Value result = Token::TEMPLATE_SPAN;
  LiteralScope literal(this);
-  scan_target().raw_literal_chars.Start();
+  next().raw_literal_chars.Start();
  const bool capture_raw = true;
  while (true) {
    uc32 c = c0_;
@ -619,8 +641,8 @@ Token::Value Scanner::ScanTemplateSpan() {
        DCHECK_EQ(!success, has_error());
        // For templates, invalid escape sequence checking is handled in the
        // parser.
-        scanner_error_state.MoveErrorTo(&scan_target());
+        scanner_error_state.MoveErrorTo(next_);
-        octal_error_state.MoveErrorTo(&scan_target());
+        octal_error_state.MoveErrorTo(next_);
      }
    } else if (c < 0) {
      // Unterminated template literal
@ -638,16 +660,14 @@ Token::Value Scanner::ScanTemplateSpan() {
      AddLiteralChar(c);
    }
  }
  literal.Complete();
-  scan_target().location.end_pos = source_pos();
+  next().location.end_pos = source_pos();
-  scan_target().token = result;
+  next().token = result;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().contextual_token = Token::UNINITIALIZED;
  return result;
 }
 Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
  Handle<String> tmp;
  if (source_url_.length() > 0) {
@ -886,10 +906,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
          return Token::ILLEGAL;
        }
-        if (scan_target().literal_chars.one_byte_literal().length() <= 10 &&
+        if (next().literal_chars.one_byte_literal().length() <= 10 &&
            value <= Smi::kMaxValue && c0_ != '.' &&
            !unicode_cache_->IsIdentifierStart(c0_)) {
-          scan_target().smi_value_ = static_cast<uint32_t>(value);
+          next().smi_value_ = static_cast<uint32_t>(value);
          literal.Complete();
          if (kind == DECIMAL_WITH_LEADING_ZERO) {
@ -1148,8 +1168,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
      }
    } else if (c0_ <= kMaxAscii && c0_ != '\\') {
      // Only a-z+ or _: could be a keyword or identifier.
-      Vector<const uint8_t> chars =
+      Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
          scan_target().literal_chars.one_byte_literal();
      Token::Value token =
          KeywordOrIdentifierToken(chars.start(), chars.length());
      if (token == Token::IDENTIFIER ||
@ -1199,9 +1218,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
    }
  }
-  if (scan_target().literal_chars.is_one_byte()) {
+  if (next().literal_chars.is_one_byte()) {
-    Vector<const uint8_t> chars =
+    Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
        scan_target().literal_chars.one_byte_literal();
    Token::Value token =
        KeywordOrIdentifierToken(chars.start(), chars.length());
    /* TODO(adamk): YIELD should be handled specially. */
@ -1229,9 +1247,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
 }
 bool Scanner::ScanRegExpPattern() {
  DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
  DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
  // Make sure the scanner didn't scan beyond the regexp start.
  DCHECK(!HasToken(2));
  // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
  bool in_character_class = false;
@ -1240,7 +1257,9 @@ bool Scanner::ScanRegExpPattern() {
  // the scanner should pass uninterpreted bodies to the RegExp
  // constructor.
  LiteralScope literal(this);
-  if (next().token == Token::ASSIGN_DIV) AddLiteralChar('=');
+  if (next().token == Token::ASSIGN_DIV) {
    AddLiteralChar('=');
  }
  while (c0_ != '/' || in_character_class) {
    if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
@ -1271,14 +1290,13 @@ bool Scanner::ScanRegExpPattern() {
  Advance();  // consume '/'
  literal.Complete();
-  scan_target().token = Token::REGEXP_LITERAL;
+  next().token = Token::REGEXP_LITERAL;
-  scan_target().contextual_token = Token::UNINITIALIZED;
+  next().contextual_token = Token::UNINITIALIZED;
  return true;
 }
 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
  DCHECK(!HasToken(2));
  DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
  // Scan regular expression flags.
@ -1314,7 +1332,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
    flags |= flag;
  }
-  scan_target().location.end_pos = source_pos();
+  next().location.end_pos = source_pos();
  return Just(RegExp::Flags(flags));
 }
@ -1374,14 +1392,18 @@ void Scanner::SeekNext(size_t position) {
  // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
  // To re-scan from a given character position, we need to:
-  // 1. Move the stream to the right position,
+  // 1, Reset the current_, next_ and next_next_ tokens
  //    (next_ + next_next_ will be overwrittem by Next(),
  //     current_ will remain unchanged, so overwrite it fully.)
  for (TokenDesc& token : token_storage_) {
    token.token = Token::UNINITIALIZED;
    token.contextual_token = Token::UNINITIALIZED;
  }
  // 2, reset the source to the desired position,
  source_->Seek(position);
-  // 2. refill the one-character buffer with the first character from the
+  // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
  // stream,
  c0_ = source_->Advance();
-  // 3. Reset the token storage, and
+  next().after_line_terminator = false;
  ResetTokenStorage();
  // 4. scan the first token.
  Scan();
  DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
 }
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@ -237,31 +237,9 @@ class Scanner {
  void Initialize();
  // Returns the next token and advances input.
-  V8_INLINE Token::Value Next() {
+  Token::Value Next();
    // TODO(verwaest): Remove.
    if (next().token == Token::EOS) {
      next_target().location = current().location;
    }
    // Advance current token.
    token_start_ = TokenIndex(1);
    // Scan the next token if it's not yet ready.
    if (V8_LIKELY(!HasToken(1))) Scan();
    // Return current token.
    DCHECK(HasToken(1));
    return current().token;
  }
  // Returns the token following peek()
-  V8_INLINE Token::Value PeekAhead() {
+  Token::Value PeekAhead();
    DCHECK_NE(Token::DIV, next().token);
    DCHECK_NE(Token::ASSIGN_DIV, next().token);
    DCHECK(HasToken(1));
    if (V8_LIKELY(!HasToken(2))) Scan();
    return next_next().token;
  }
  // Returns the current token again.
  Token::Value current_token() { return current().token; }
@ -395,8 +373,7 @@ class Scanner {
  // Scans the input as a template literal
  Token::Value ScanTemplateContinuation() {
    DCHECK_EQ(next().token, Token::RBRACE);
-    DCHECK(!HasToken(2));
+    DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
    DCHECK_EQ(source_pos() - 1, scan_target().location.beg_pos);
    return ScanTemplateSpan();
  }
@ -535,7 +512,7 @@ class Scanner {
  class LiteralScope {
   public:
    explicit LiteralScope(Scanner* scanner)
-        : buffer_(&scanner->scan_target().literal_chars), complete_(false) {
+        : buffer_(&scanner->next().literal_chars), complete_(false) {
      buffer_->Start();
    }
    ~LiteralScope() {
@ -584,6 +561,10 @@ class Scanner {
    STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    Advance();
    current_ = &token_storage_[0];
    next_ = &token_storage_[1];
    next_next_ = &token_storage_[2];
    found_html_comment_ = false;
    scanner_error_ = MessageTemplate::kNone;
  }
@ -604,16 +585,12 @@ class Scanner {
  // Seek to the next_ token at the given position.
  void SeekNext(size_t position);
-  // Literal buffer support
+  V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
-  V8_INLINE void AddLiteralChar(uc32 c) {
+
-    scan_target().literal_chars.AddChar(c);
+  V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
  }
  V8_INLINE void AddLiteralChar(char c) {
    scan_target().literal_chars.AddChar(c);
  }
  V8_INLINE void AddRawLiteralChar(uc32 c) {
-    scan_target().raw_literal_chars.AddChar(c);
+    next().raw_literal_chars.AddChar(c);
  }
  V8_INLINE void AddLiteralCharAdvance() {
@ -737,7 +714,7 @@ class Scanner {
  // Scans a single JavaScript token.
  V8_INLINE Token::Value ScanSingleToken();
-  void Scan();
+  V8_INLINE void Scan();
  V8_INLINE Token::Value SkipWhiteSpace();
  Token::Value SkipSingleHTMLComment();
@ -808,36 +785,17 @@ class Scanner {
  LiteralBuffer source_url_;
  LiteralBuffer source_mapping_url_;
-  static const int kNumberOfTokens = 1 << 2;
+  TokenDesc token_storage_[3];
  static const int kTokenStorageMask = kNumberOfTokens - 1;
-  TokenDesc token_storage_[kNumberOfTokens];
+  TokenDesc& next() { return *next_; }
  // Index of current token in token_storage_.
  int token_start_ = 0;
  // Index of last scanned token in token_storage. We typically scan the next
  // token aftewards. Initially this points to the initial current token since
  // we always scan the next token and move the previous next to current.
  int token_end_ = 0;
-  void ResetTokenStorage() { token_start_ = token_end_ = 0; }
+  const TokenDesc& current() const { return *current_; }
  const TokenDesc& next() const { return *next_; }
  const TokenDesc& next_next() const { return *next_next_; }
-  int TokenIndex(int i) const { return (token_start_ + i) & kTokenStorageMask; }
+  TokenDesc* current_;    // desc for current token (as returned by Next())
-
+  TokenDesc* next_;       // desc for next token (one token look-ahead)
-  bool HasToken(int i) const {
+  TokenDesc* next_next_;  // desc for the token after next (after PeakAhead())
    return i <= ((token_end_ - token_start_) & kTokenStorageMask);
  }
  const TokenDesc& GetToken(int i) const {
    DCHECK(HasToken(i));
    return token_storage_[TokenIndex(i)];
  }
  const TokenDesc& current() const { return GetToken(0); }
  const TokenDesc& next() const { return GetToken(1); }
  const TokenDesc& next_next() const { return GetToken(2); }
  TokenDesc& scan_target() { return token_storage_[token_end_]; }
  TokenDesc& next_target() { return token_storage_[TokenIndex(1)]; }
  // Input stream. Must be initialized to an Utf16CharacterStream.
  Utf16CharacterStream* const source_;