[scanner] Don't use UnicodeCache for IsLineTerminator.

For such a simple predicate, calling a(n inline) function that checks against the values is faster (*) than maintaining the cache. (*) When scanning a file that contains only comments, we're basically calling IsLineTerminator in a loop. Parsing such files is now 7-18% faster in local experiments. BUG=v8:6092 Change-Id: I6a8f2aba9669a76152292f4e6c7853638d15aae3 Reviewed-on: https://chromium-review.googlesource.com/645633 Commit-Queue: Marja Hölttä <marja@chromium.org> Reviewed-by: Adam Klein <adamk@chromium.org> Cr-Commit-Position: refs/heads/master@{#47810}
2017-09-04 14:34:55 +02:00 · 2017-09-04 14:34:55 +02:00 · 2b6780dc17
commit 2b6780dc17
parent 6d72ccf7fb
6 changed files with 23 additions and 31 deletions
--- a/src/char-predicates.h
+++ b/src/char-predicates.h
@ -80,7 +80,7 @@ struct WhiteSpace {
 // as well as \u0009 - \u000d and \ufeff.
 struct WhiteSpaceOrLineTerminator {
  static inline bool Is(uc32 c) {
-    return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
+    return WhiteSpace::Is(c) || unibrow::IsLineTerminator(c);
  }
 };

--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@ -438,7 +438,7 @@ Token::Value Scanner::SkipWhiteSpace() {

      // Advance as long as character is a WhiteSpace or LineTerminator.
      // Remember if the latter is the case.
-      if (unicode_cache_->IsLineTerminator(c0_)) {
+      if (unibrow::IsLineTerminator(c0_)) {
        has_line_terminator_before_next_ = true;
      } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
        break;
@ -495,7 +495,7 @@ Token::Value Scanner::SkipSingleLineComment() {
  // separately by the lexical grammar and becomes part of the
  // stream of input elements for the syntactic grammar (see
  // ECMA-262, section 7.4).
-  while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
    Advance();
  }

@ -505,7 +505,7 @@ Token::Value Scanner::SkipSingleLineComment() {

 Token::Value Scanner::SkipSourceURLComment() {
  TryToParseSourceURLComment();
-  while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
    Advance();
  }

@ -541,7 +541,7 @@ void Scanner::TryToParseSourceURLComment() {
  while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
    Advance();
  }
-  while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
    // Disallowed characters.
    if (c0_ == '"' || c0_ == '\'') {
      value->Reset();
@ -554,7 +554,7 @@ void Scanner::TryToParseSourceURLComment() {
    Advance();
  }
  // Allow whitespace at the end.
-  while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
+  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
    if (!unicode_cache_->IsWhiteSpace(c0_)) {
      value->Reset();
      break;
@ -571,7 +571,7 @@ Token::Value Scanner::SkipMultiLineComment() {
  while (c0_ != kEndOfInput) {
    uc32 ch = c0_;
    Advance();
-    if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
+    if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
      // Following ECMA-262, section 7.4, a comment containing
      // a newline will make the comment count as a line-terminator.
      has_multiline_comment_before_next_ = true;
@ -968,7 +968,7 @@ bool Scanner::ScanEscape() {

  // Skip escaped newlines.
  if (!in_template_literal && c0_ != kEndOfInput &&
-      unicode_cache_->IsLineTerminator(c)) {
+      unibrow::IsLineTerminator(c)) {
    // Allow escaped CR+LF newlines in multiline string literals.
    if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
    return true;
@ -1062,7 +1062,7 @@ Token::Value Scanner::ScanString() {
  }

  while (c0_ != quote && c0_ != kEndOfInput &&
-         !unicode_cache_->IsLineTerminator(c0_)) {
+         !unibrow::IsLineTerminator(c0_)) {
    uc32 c = c0_;
    Advance();
    if (c == '\\') {
@ -1118,7 +1118,7 @@ Token::Value Scanner::ScanTemplateSpan() {
      ReduceRawLiteralLength(2);
      break;
    } else if (c == '\\') {
-      if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
+      if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
        // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
        // code unit sequence.
        uc32 lastChar = c0_;
@ -1659,12 +1659,14 @@ bool Scanner::ScanRegExpPattern() {
  }

  while (c0_ != '/' || in_character_class) {
-    if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
+    if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
      return false;
+    }
    if (c0_ == '\\') {  // Escape sequence.
      AddLiteralCharAdvance();
-      if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
+      if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
        return false;
+      }
      AddLiteralCharAdvance();
      // If the escape allows more characters, i.e., \x??, \u????, or \c?,
      // only "safe" characters are allowed (letters, digits, underscore),
--- a/src/unicode-cache-inl.h
+++ b/src/unicode-cache-inl.h
@ -20,15 +20,9 @@ bool UnicodeCache::IsIdentifierPart(unibrow::uchar c) {
  return kIsIdentifierPart.get(c);
 }

-
-bool UnicodeCache::IsLineTerminator(unibrow::uchar c) {
-  return kIsLineTerminator.get(c);
-}
-
-
 bool UnicodeCache::IsLineTerminatorSequence(unibrow::uchar c,
                                            unibrow::uchar next) {
-  if (!IsLineTerminator(c)) return false;
+  if (!unibrow::IsLineTerminator(c)) return false;
  if (c == 0x000d && next == 0x000a) return false;  // CR with following LF.
  return true;
 }
--- a/src/unicode-cache.h
+++ b/src/unicode-cache.h
@ -32,7 +32,6 @@ class UnicodeCache {
 private:
  unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
  unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
-  unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
  unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
  unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
      kIsWhiteSpaceOrLineTerminator;
--- a/src/unicode.cc
+++ b/src/unicode.cc
@ -1163,14 +1163,6 @@ bool WhiteSpace::Is(uchar c) {
 }
 #endif  // !V8_INTL_SUPPORT

-// LineTerminator:       'JS_Line_Terminator' in point.properties
-// ES#sec-line-terminators lists exactly 4 code points:
-// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
-
-bool LineTerminator::Is(uchar c) {
-  return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
-}
-
 #ifndef V8_INTL_SUPPORT
 static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = {  // NOLINT
  {{105, 775}}, {{kSentinel}} }; // NOLINT
--- a/src/unicode.h
+++ b/src/unicode.h
@ -194,9 +194,14 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
  static bool Is(uchar c);
 };
 #endif  // !V8_INTL_SUPPORT
-struct V8_EXPORT_PRIVATE LineTerminator {
-  static bool Is(uchar c);
-};
+
+// LineTerminator:       'JS_Line_Terminator' in point.properties
+// ES#sec-line-terminators lists exactly 4 code points:
+// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
+V8_INLINE bool IsLineTerminator(uchar c) {
+  return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
+}
+
 #ifndef V8_INTL_SUPPORT
 struct ToLowercase {
  static const int kMaxWidth = 3;