diff --git a/src/parsing/scanner-inl.h b/src/parsing/scanner-inl.h index 9647957062..bfa039e15b 100644 --- a/src/parsing/scanner-inl.h +++ b/src/parsing/scanner-inl.h @@ -149,200 +149,6 @@ static const Token::Value one_char_tokens[] = { }; // clang-format on -// ---------------------------------------------------------------------------- -// Keyword Matcher - -#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ - KEYWORD_GROUP('a') \ - KEYWORD("arguments", Token::ARGUMENTS) \ - KEYWORD("as", Token::AS) \ - KEYWORD("async", Token::ASYNC) \ - KEYWORD("await", Token::AWAIT) \ - KEYWORD("anonymous", Token::ANONYMOUS) \ - KEYWORD_GROUP('b') \ - KEYWORD("break", Token::BREAK) \ - KEYWORD_GROUP('c') \ - KEYWORD("case", Token::CASE) \ - KEYWORD("catch", Token::CATCH) \ - KEYWORD("class", Token::CLASS) \ - KEYWORD("const", Token::CONST) \ - KEYWORD("constructor", Token::CONSTRUCTOR) \ - KEYWORD("continue", Token::CONTINUE) \ - KEYWORD_GROUP('d') \ - KEYWORD("debugger", Token::DEBUGGER) \ - KEYWORD("default", Token::DEFAULT) \ - KEYWORD("delete", Token::DELETE) \ - KEYWORD("do", Token::DO) \ - KEYWORD_GROUP('e') \ - KEYWORD("else", Token::ELSE) \ - KEYWORD("enum", Token::ENUM) \ - KEYWORD("eval", Token::EVAL) \ - KEYWORD("export", Token::EXPORT) \ - KEYWORD("extends", Token::EXTENDS) \ - KEYWORD_GROUP('f') \ - KEYWORD("false", Token::FALSE_LITERAL) \ - KEYWORD("finally", Token::FINALLY) \ - KEYWORD("for", Token::FOR) \ - KEYWORD("from", Token::FROM) \ - KEYWORD("function", Token::FUNCTION) \ - KEYWORD_GROUP('g') \ - KEYWORD("get", Token::GET) \ - KEYWORD_GROUP('i') \ - KEYWORD("if", Token::IF) \ - KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("import", Token::IMPORT) \ - KEYWORD("in", Token::IN) \ - KEYWORD("instanceof", Token::INSTANCEOF) \ - KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD_GROUP('l') \ - KEYWORD("let", Token::LET) \ - KEYWORD_GROUP('m') \ - KEYWORD("meta", Token::META) \ - KEYWORD_GROUP('n') \ - KEYWORD("name", Token::NAME) \ - KEYWORD("new", Token::NEW) \ - KEYWORD("null", Token::NULL_LITERAL) \ - KEYWORD_GROUP('o') \ - KEYWORD("of", Token::OF) \ - KEYWORD_GROUP('p') \ - KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("prototype", Token::PROTOTYPE) \ - KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD_GROUP('r') \ - KEYWORD("return", Token::RETURN) \ - KEYWORD_GROUP('s') \ - KEYWORD("set", Token::SET) \ - KEYWORD("static", Token::STATIC) \ - KEYWORD("super", Token::SUPER) \ - KEYWORD("switch", Token::SWITCH) \ - KEYWORD_GROUP('t') \ - KEYWORD("target", Token::TARGET) \ - KEYWORD("this", Token::THIS) \ - KEYWORD("throw", Token::THROW) \ - KEYWORD("true", Token::TRUE_LITERAL) \ - KEYWORD("try", Token::TRY) \ - KEYWORD("typeof", Token::TYPEOF) \ - KEYWORD_GROUP('u') \ - KEYWORD("undefined", Token::UNDEFINED) \ - KEYWORD_GROUP('v') \ - KEYWORD("var", Token::VAR) \ - KEYWORD("void", Token::VOID) \ - KEYWORD_GROUP('w') \ - KEYWORD("while", Token::WHILE) \ - KEYWORD("with", Token::WITH) \ - KEYWORD_GROUP('y') \ - KEYWORD("yield", Token::YIELD) \ - KEYWORD_GROUP('_') \ - KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \ - KEYWORD_GROUP('#') \ - KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR) - -V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input, - int input_length) { - DCHECK_GE(input_length, 1); - const int kMinLength = 2; - const int kMaxLength = 12; - if (input_length < kMinLength || input_length > kMaxLength) { - return Token::IDENTIFIER; - } - switch (input[0]) { - default: -#define KEYWORD_GROUP_CASE(ch) \ - break; \ - case ch: -#define KEYWORD(keyword, token) \ - { \ - /* 'keyword' is a char array, so sizeof(keyword) is */ \ - /* strlen(keyword) plus 1 for the NUL char. */ \ - const int keyword_length = sizeof(keyword) - 1; \ - STATIC_ASSERT(keyword_length >= kMinLength); \ - STATIC_ASSERT(keyword_length <= kMaxLength); \ - DCHECK_EQ(input[0], keyword[0]); \ - DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \ - 0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \ - if (input_length == keyword_length && input[1] == keyword[1] && \ - (keyword_length <= 2 || input[2] == keyword[2]) && \ - (keyword_length <= 3 || input[3] == keyword[3]) && \ - (keyword_length <= 4 || input[4] == keyword[4]) && \ - (keyword_length <= 5 || input[5] == keyword[5]) && \ - (keyword_length <= 6 || input[6] == keyword[6]) && \ - (keyword_length <= 7 || input[7] == keyword[7]) && \ - (keyword_length <= 8 || input[8] == keyword[8]) && \ - (keyword_length <= 9 || input[9] == keyword[9]) && \ - (keyword_length <= 10 || input[10] == keyword[10])) { \ - return token; \ - } \ - } - KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) - } - return Token::IDENTIFIER; -#undef KEYWORDS -#undef KEYWORD -#undef KEYWORD_GROUP_CASE -} - -V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() { - LiteralScope literal(this); - return ScanIdentifierOrKeywordInner(&literal); -} - -V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner( - LiteralScope* literal) { - DCHECK(unicode_cache_->IsIdentifierStart(c0_)); - bool escaped = false; - if (IsInRange(c0_, 'a', 'z') || c0_ == '_') { - do { - AddLiteralChar(static_cast(c0_)); - Advance(); - } while (IsInRange(c0_, 'a', 'z') || c0_ == '_'); - - if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') { - // Identifier starting with lowercase or _. - do { - AddLiteralChar(static_cast(c0_)); - Advance(); - } while (IsAsciiIdentifier(c0_)); - - if (c0_ <= kMaxAscii && c0_ != '\\') { - literal->Complete(); - return Token::IDENTIFIER; - } - } else if (c0_ <= kMaxAscii && c0_ != '\\') { - // Only a-z+ or _: could be a keyword or identifier. - Vector chars = next().literal_chars.one_byte_literal(); - Token::Value token = - KeywordOrIdentifierToken(chars.start(), chars.length()); - if (token == Token::IDENTIFIER || - token == Token::FUTURE_STRICT_RESERVED_WORD || - Token::IsContextualKeyword(token)) - literal->Complete(); - return token; - } - } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') { - do { - AddLiteralChar(static_cast(c0_)); - Advance(); - } while (IsAsciiIdentifier(c0_)); - - if (c0_ <= kMaxAscii && c0_ != '\\') { - literal->Complete(); - return Token::IDENTIFIER; - } - } else if (c0_ == '\\') { - escaped = true; - uc32 c = ScanIdentifierUnicodeEscape(); - DCHECK(!unicode_cache_->IsIdentifierStart(-1)); - if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) { - return Token::ILLEGAL; - } - AddLiteralChar(c); - } - - return ScanIdentifierOrKeywordInnerSlow(literal, escaped); -} - V8_INLINE Token::Value Scanner::SkipWhiteSpace() { int start_position = source_pos(); diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc index da92fecf25..310ee1ee52 100644 --- a/src/parsing/scanner.cc +++ b/src/parsing/scanner.cc @@ -1005,8 +1005,197 @@ uc32 Scanner::ScanUnicodeEscape() { return ScanHexNumber(4); } -Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal, - bool escaped) { + +// ---------------------------------------------------------------------------- +// Keyword Matcher + +#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ + KEYWORD_GROUP('a') \ + KEYWORD("arguments", Token::ARGUMENTS) \ + KEYWORD("as", Token::AS) \ + KEYWORD("async", Token::ASYNC) \ + KEYWORD("await", Token::AWAIT) \ + KEYWORD("anonymous", Token::ANONYMOUS) \ + KEYWORD_GROUP('b') \ + KEYWORD("break", Token::BREAK) \ + KEYWORD_GROUP('c') \ + KEYWORD("case", Token::CASE) \ + KEYWORD("catch", Token::CATCH) \ + KEYWORD("class", Token::CLASS) \ + KEYWORD("const", Token::CONST) \ + KEYWORD("constructor", Token::CONSTRUCTOR) \ + KEYWORD("continue", Token::CONTINUE) \ + KEYWORD_GROUP('d') \ + KEYWORD("debugger", Token::DEBUGGER) \ + KEYWORD("default", Token::DEFAULT) \ + KEYWORD("delete", Token::DELETE) \ + KEYWORD("do", Token::DO) \ + KEYWORD_GROUP('e') \ + KEYWORD("else", Token::ELSE) \ + KEYWORD("enum", Token::ENUM) \ + KEYWORD("eval", Token::EVAL) \ + KEYWORD("export", Token::EXPORT) \ + KEYWORD("extends", Token::EXTENDS) \ + KEYWORD_GROUP('f') \ + KEYWORD("false", Token::FALSE_LITERAL) \ + KEYWORD("finally", Token::FINALLY) \ + KEYWORD("for", Token::FOR) \ + KEYWORD("from", Token::FROM) \ + KEYWORD("function", Token::FUNCTION) \ + KEYWORD_GROUP('g') \ + KEYWORD("get", Token::GET) \ + KEYWORD_GROUP('i') \ + KEYWORD("if", Token::IF) \ + KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("import", Token::IMPORT) \ + KEYWORD("in", Token::IN) \ + KEYWORD("instanceof", Token::INSTANCEOF) \ + KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('l') \ + KEYWORD("let", Token::LET) \ + KEYWORD_GROUP('m') \ + KEYWORD("meta", Token::META) \ + KEYWORD_GROUP('n') \ + KEYWORD("name", Token::NAME) \ + KEYWORD("new", Token::NEW) \ + KEYWORD("null", Token::NULL_LITERAL) \ + KEYWORD_GROUP('o') \ + KEYWORD("of", Token::OF) \ + KEYWORD_GROUP('p') \ + KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("prototype", Token::PROTOTYPE) \ + KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('r') \ + KEYWORD("return", Token::RETURN) \ + KEYWORD_GROUP('s') \ + KEYWORD("set", Token::SET) \ + KEYWORD("static", Token::STATIC) \ + KEYWORD("super", Token::SUPER) \ + KEYWORD("switch", Token::SWITCH) \ + KEYWORD_GROUP('t') \ + KEYWORD("target", Token::TARGET) \ + KEYWORD("this", Token::THIS) \ + KEYWORD("throw", Token::THROW) \ + KEYWORD("true", Token::TRUE_LITERAL) \ + KEYWORD("try", Token::TRY) \ + KEYWORD("typeof", Token::TYPEOF) \ + KEYWORD_GROUP('u') \ + KEYWORD("undefined", Token::UNDEFINED) \ + KEYWORD_GROUP('v') \ + KEYWORD("var", Token::VAR) \ + KEYWORD("void", Token::VOID) \ + KEYWORD_GROUP('w') \ + KEYWORD("while", Token::WHILE) \ + KEYWORD("with", Token::WITH) \ + KEYWORD_GROUP('y') \ + KEYWORD("yield", Token::YIELD) \ + KEYWORD_GROUP('_') \ + KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \ + KEYWORD_GROUP('#') \ + KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR) + +static Token::Value KeywordOrIdentifierToken(const uint8_t* input, + int input_length) { + DCHECK_GE(input_length, 1); + const int kMinLength = 2; + const int kMaxLength = 12; + if (input_length < kMinLength || input_length > kMaxLength) { + return Token::IDENTIFIER; + } + switch (input[0]) { + default: +#define KEYWORD_GROUP_CASE(ch) \ + break; \ + case ch: +#define KEYWORD(keyword, token) \ + { \ + /* 'keyword' is a char array, so sizeof(keyword) is */ \ + /* strlen(keyword) plus 1 for the NUL char. */ \ + const int keyword_length = sizeof(keyword) - 1; \ + STATIC_ASSERT(keyword_length >= kMinLength); \ + STATIC_ASSERT(keyword_length <= kMaxLength); \ + DCHECK_EQ(input[0], keyword[0]); \ + DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \ + 0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \ + if (input_length == keyword_length && input[1] == keyword[1] && \ + (keyword_length <= 2 || input[2] == keyword[2]) && \ + (keyword_length <= 3 || input[3] == keyword[3]) && \ + (keyword_length <= 4 || input[4] == keyword[4]) && \ + (keyword_length <= 5 || input[5] == keyword[5]) && \ + (keyword_length <= 6 || input[6] == keyword[6]) && \ + (keyword_length <= 7 || input[7] == keyword[7]) && \ + (keyword_length <= 8 || input[8] == keyword[8]) && \ + (keyword_length <= 9 || input[9] == keyword[9]) && \ + (keyword_length <= 10 || input[10] == keyword[10])) { \ + return token; \ + } \ + } + KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) + } + return Token::IDENTIFIER; +#undef KEYWORDS +#undef KEYWORD +#undef KEYWORD_GROUP_CASE +} + +Token::Value Scanner::ScanIdentifierOrKeyword() { + LiteralScope literal(this); + return ScanIdentifierOrKeywordInner(&literal); +} + +Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { + DCHECK(unicode_cache_->IsIdentifierStart(c0_)); + bool escaped = false; + if (IsInRange(c0_, 'a', 'z') || c0_ == '_') { + do { + AddLiteralChar(static_cast(c0_)); + Advance(); + } while (IsInRange(c0_, 'a', 'z') || c0_ == '_'); + + if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') { + // Identifier starting with lowercase or _. + do { + AddLiteralChar(static_cast(c0_)); + Advance(); + } while (IsAsciiIdentifier(c0_)); + + if (c0_ <= kMaxAscii && c0_ != '\\') { + literal->Complete(); + return Token::IDENTIFIER; + } + } else if (c0_ <= kMaxAscii && c0_ != '\\') { + // Only a-z+ or _: could be a keyword or identifier. + Vector chars = next().literal_chars.one_byte_literal(); + Token::Value token = + KeywordOrIdentifierToken(chars.start(), chars.length()); + if (token == Token::IDENTIFIER || + token == Token::FUTURE_STRICT_RESERVED_WORD || + Token::IsContextualKeyword(token)) + literal->Complete(); + return token; + } + } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') { + do { + AddLiteralChar(static_cast(c0_)); + Advance(); + } while (IsAsciiIdentifier(c0_)); + + if (c0_ <= kMaxAscii && c0_ != '\\') { + literal->Complete(); + return Token::IDENTIFIER; + } + } else if (c0_ == '\\') { + escaped = true; + uc32 c = ScanIdentifierUnicodeEscape(); + DCHECK(!unicode_cache_->IsIdentifierStart(-1)); + if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + } + while (true) { if (c0_ == '\\') { escaped = true; diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h index a654f7958a..b87796388f 100644 --- a/src/parsing/scanner.h +++ b/src/parsing/scanner.h @@ -738,10 +738,8 @@ class Scanner { bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind); Token::Value ScanNumber(bool seen_period); - V8_INLINE Token::Value ScanIdentifierOrKeyword(); - V8_INLINE Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal); - Token::Value ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal, - bool escaped); + Token::Value ScanIdentifierOrKeyword(); + Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal); Token::Value ScanString(); Token::Value ScanPrivateName();