[scanner] Don't use UnicodeCache for IsLineTerminator.
For such a simple predicate, calling a(n inline) function that checks against the values is faster (*) than maintaining the cache. (*) When scanning a file that contains only comments, we're basically calling IsLineTerminator in a loop. Parsing such files is now 7-18% faster in local experiments. BUG=v8:6092 Change-Id: I6a8f2aba9669a76152292f4e6c7853638d15aae3 Reviewed-on: https://chromium-review.googlesource.com/645633 Commit-Queue: Marja Hölttä <marja@chromium.org> Reviewed-by: Adam Klein <adamk@chromium.org> Cr-Commit-Position: refs/heads/master@{#47810}
This commit is contained in:
parent
6d72ccf7fb
commit
2b6780dc17
@ -80,7 +80,7 @@ struct WhiteSpace {
|
||||
// as well as \u0009 - \u000d and \ufeff.
|
||||
struct WhiteSpaceOrLineTerminator {
|
||||
static inline bool Is(uc32 c) {
|
||||
return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
|
||||
return WhiteSpace::Is(c) || unibrow::IsLineTerminator(c);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -438,7 +438,7 @@ Token::Value Scanner::SkipWhiteSpace() {
|
||||
|
||||
// Advance as long as character is a WhiteSpace or LineTerminator.
|
||||
// Remember if the latter is the case.
|
||||
if (unicode_cache_->IsLineTerminator(c0_)) {
|
||||
if (unibrow::IsLineTerminator(c0_)) {
|
||||
has_line_terminator_before_next_ = true;
|
||||
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
|
||||
break;
|
||||
@ -495,7 +495,7 @@ Token::Value Scanner::SkipSingleLineComment() {
|
||||
// separately by the lexical grammar and becomes part of the
|
||||
// stream of input elements for the syntactic grammar (see
|
||||
// ECMA-262, section 7.4).
|
||||
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
|
||||
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
|
||||
Advance();
|
||||
}
|
||||
|
||||
@ -505,7 +505,7 @@ Token::Value Scanner::SkipSingleLineComment() {
|
||||
|
||||
Token::Value Scanner::SkipSourceURLComment() {
|
||||
TryToParseSourceURLComment();
|
||||
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
|
||||
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
|
||||
Advance();
|
||||
}
|
||||
|
||||
@ -541,7 +541,7 @@ void Scanner::TryToParseSourceURLComment() {
|
||||
while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
|
||||
Advance();
|
||||
}
|
||||
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
|
||||
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
|
||||
// Disallowed characters.
|
||||
if (c0_ == '"' || c0_ == '\'') {
|
||||
value->Reset();
|
||||
@ -554,7 +554,7 @@ void Scanner::TryToParseSourceURLComment() {
|
||||
Advance();
|
||||
}
|
||||
// Allow whitespace at the end.
|
||||
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) {
|
||||
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
|
||||
if (!unicode_cache_->IsWhiteSpace(c0_)) {
|
||||
value->Reset();
|
||||
break;
|
||||
@ -571,7 +571,7 @@ Token::Value Scanner::SkipMultiLineComment() {
|
||||
while (c0_ != kEndOfInput) {
|
||||
uc32 ch = c0_;
|
||||
Advance();
|
||||
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) {
|
||||
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
|
||||
// Following ECMA-262, section 7.4, a comment containing
|
||||
// a newline will make the comment count as a line-terminator.
|
||||
has_multiline_comment_before_next_ = true;
|
||||
@ -968,7 +968,7 @@ bool Scanner::ScanEscape() {
|
||||
|
||||
// Skip escaped newlines.
|
||||
if (!in_template_literal && c0_ != kEndOfInput &&
|
||||
unicode_cache_->IsLineTerminator(c)) {
|
||||
unibrow::IsLineTerminator(c)) {
|
||||
// Allow escaped CR+LF newlines in multiline string literals.
|
||||
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
|
||||
return true;
|
||||
@ -1062,7 +1062,7 @@ Token::Value Scanner::ScanString() {
|
||||
}
|
||||
|
||||
while (c0_ != quote && c0_ != kEndOfInput &&
|
||||
!unicode_cache_->IsLineTerminator(c0_)) {
|
||||
!unibrow::IsLineTerminator(c0_)) {
|
||||
uc32 c = c0_;
|
||||
Advance();
|
||||
if (c == '\\') {
|
||||
@ -1118,7 +1118,7 @@ Token::Value Scanner::ScanTemplateSpan() {
|
||||
ReduceRawLiteralLength(2);
|
||||
break;
|
||||
} else if (c == '\\') {
|
||||
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) {
|
||||
if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
|
||||
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
|
||||
// code unit sequence.
|
||||
uc32 lastChar = c0_;
|
||||
@ -1659,12 +1659,14 @@ bool Scanner::ScanRegExpPattern() {
|
||||
}
|
||||
|
||||
while (c0_ != '/' || in_character_class) {
|
||||
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
|
||||
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
|
||||
return false;
|
||||
}
|
||||
if (c0_ == '\\') { // Escape sequence.
|
||||
AddLiteralCharAdvance();
|
||||
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_))
|
||||
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
|
||||
return false;
|
||||
}
|
||||
AddLiteralCharAdvance();
|
||||
// If the escape allows more characters, i.e., \x??, \u????, or \c?,
|
||||
// only "safe" characters are allowed (letters, digits, underscore),
|
||||
|
@ -20,15 +20,9 @@ bool UnicodeCache::IsIdentifierPart(unibrow::uchar c) {
|
||||
return kIsIdentifierPart.get(c);
|
||||
}
|
||||
|
||||
|
||||
bool UnicodeCache::IsLineTerminator(unibrow::uchar c) {
|
||||
return kIsLineTerminator.get(c);
|
||||
}
|
||||
|
||||
|
||||
bool UnicodeCache::IsLineTerminatorSequence(unibrow::uchar c,
|
||||
unibrow::uchar next) {
|
||||
if (!IsLineTerminator(c)) return false;
|
||||
if (!unibrow::IsLineTerminator(c)) return false;
|
||||
if (c == 0x000d && next == 0x000a) return false; // CR with following LF.
|
||||
return true;
|
||||
}
|
||||
|
@ -32,7 +32,6 @@ class UnicodeCache {
|
||||
private:
|
||||
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
|
||||
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
|
||||
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
|
||||
unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
|
||||
unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
|
||||
kIsWhiteSpaceOrLineTerminator;
|
||||
|
@ -1163,14 +1163,6 @@ bool WhiteSpace::Is(uchar c) {
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
// LineTerminator: 'JS_Line_Terminator' in point.properties
|
||||
// ES#sec-line-terminators lists exactly 4 code points:
|
||||
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
|
||||
|
||||
bool LineTerminator::Is(uchar c) {
|
||||
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
|
||||
}
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // NOLINT
|
||||
{{105, 775}}, {{kSentinel}} }; // NOLINT
|
||||
|
@ -194,9 +194,14 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
struct V8_EXPORT_PRIVATE LineTerminator {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
|
||||
// LineTerminator: 'JS_Line_Terminator' in point.properties
|
||||
// ES#sec-line-terminators lists exactly 4 code points:
|
||||
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
|
||||
V8_INLINE bool IsLineTerminator(uchar c) {
|
||||
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
|
||||
}
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
struct ToLowercase {
|
||||
static const int kMaxWidth = 3;
|
||||
|
Loading…
Reference in New Issue
Block a user