[scanner] Don't use UnicodeCache for IsLineTerminator.

For such a simple predicate, calling a(n inline) function that checks against
the values is faster (*) than maintaining the cache.

(*) When scanning a file that contains only comments, we're basically calling
IsLineTerminator in a loop. Parsing such files is now 7-18% faster in local
experiments.

BUG=v8:6092

Change-Id: I6a8f2aba9669a76152292f4e6c7853638d15aae3
Reviewed-on: https://chromium-review.googlesource.com/645633
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: Adam Klein <adamk@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47810}
This commit is contained in:
Marja Hölttä 2017-09-04 14:34:55 +02:00 committed by Commit Bot
parent 6d72ccf7fb
commit 2b6780dc17
6 changed files with 23 additions and 31 deletions

View File

@ -80,7 +80,7 @@ struct WhiteSpace {
// as well as \u0009 - \u000d and \ufeff.
struct WhiteSpaceOrLineTerminator {
static inline bool Is(uc32 c) {
return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
return WhiteSpace::Is(c) || unibrow::IsLineTerminator(c);
}
};

View File

@ -438,7 +438,7 @@ Token::Value Scanner::SkipWhiteSpace() {
// Advance as long as character is a WhiteSpace or LineTerminator.
// Remember if the latter is the case.
if (unicode_cache_->IsLineTerminator(c0_)) {
if (unibrow::IsLineTerminator(c0_)) {
has_line_terminator_before_next_ = true;
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break;
@ -495,7 +495,7 @@ Token::Value Scanner::SkipSingleLineComment() {
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4).
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
Advance();
}
@ -505,7 +505,7 @@ Token::Value Scanner::SkipSingleLineComment() {
Token::Value Scanner::SkipSourceURLComment() {
TryToParseSourceURLComment();
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
Advance();
}
@ -541,7 +541,7 @@ void Scanner::TryToParseSourceURLComment() {
while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
Advance();
}
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters.
if (c0_ == '"' || c0_ == '\'') {
value->Reset();
@ -554,7 +554,7 @@ void Scanner::TryToParseSourceURLComment() {
Advance();
}
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
if (!unicode_cache_->IsWhiteSpace(c0_)) {
value->Reset();
break;
@ -571,7 +571,7 @@ Token::Value Scanner::SkipMultiLineComment() {
while (c0_ != kEndOfInput) {
uc32 ch = c0_;
Advance();
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
// Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator.
has_multiline_comment_before_next_ = true;
@ -968,7 +968,7 @@ bool Scanner::ScanEscape() {
// Skip escaped newlines.
if (!in_template_literal && c0_ != kEndOfInput &&
unicode_cache_->IsLineTerminator(c)) {
unibrow::IsLineTerminator(c)) {
// Allow escaped CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
return true;
@ -1062,7 +1062,7 @@ Token::Value Scanner::ScanString() {
}
while (c0_ != quote && c0_ != kEndOfInput &&
!unicode_cache_->IsLineTerminator(c0_)) {
!unibrow::IsLineTerminator(c0_)) {
uc32 c = c0_;
Advance();
if (c == '\\') {
@ -1118,7 +1118,7 @@ Token::Value Scanner::ScanTemplateSpan() {
ReduceRawLiteralLength(2);
break;
} else if (c == '\\') {
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
// code unit sequence.
uc32 lastChar = c0_;
@ -1659,12 +1659,14 @@ bool Scanner::ScanRegExpPattern() {
}
while (c0_ != '/' || in_character_class) {
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
return false;
}
if (c0_ == '\\') { // Escape sequence.
AddLiteralCharAdvance();
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
return false;
}
AddLiteralCharAdvance();
// If the escape allows more characters, i.e., \x??, \u????, or \c?,
// only "safe" characters are allowed (letters, digits, underscore),

View File

@ -20,15 +20,9 @@ bool UnicodeCache::IsIdentifierPart(unibrow::uchar c) {
return kIsIdentifierPart.get(c);
}
bool UnicodeCache::IsLineTerminator(unibrow::uchar c) {
return kIsLineTerminator.get(c);
}
bool UnicodeCache::IsLineTerminatorSequence(unibrow::uchar c,
unibrow::uchar next) {
if (!IsLineTerminator(c)) return false;
if (!unibrow::IsLineTerminator(c)) return false;
if (c == 0x000d && next == 0x000a) return false; // CR with following LF.
return true;
}

View File

@ -32,7 +32,6 @@ class UnicodeCache {
private:
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
kIsWhiteSpaceOrLineTerminator;

View File

@ -1163,14 +1163,6 @@ bool WhiteSpace::Is(uchar c) {
}
#endif // !V8_INTL_SUPPORT
// LineTerminator: 'JS_Line_Terminator' in point.properties
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
bool LineTerminator::Is(uchar c) {
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
#ifndef V8_INTL_SUPPORT
static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // NOLINT
{{105, 775}}, {{kSentinel}} }; // NOLINT

View File

@ -194,9 +194,14 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
static bool Is(uchar c);
};
#endif // !V8_INTL_SUPPORT
struct V8_EXPORT_PRIVATE LineTerminator {
static bool Is(uchar c);
};
// LineTerminator: 'JS_Line_Terminator' in point.properties
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
V8_INLINE bool IsLineTerminator(uchar c) {
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
#ifndef V8_INTL_SUPPORT
struct ToLowercase {
static const int kMaxWidth = 3;