Reland "[parser] Perfect hash for keywords"
This is a reland of ca086a497c
Original change's description:
> [parser] Perfect hash for keywords
>
> Use gperf to generate a perfect hash table for keyword lookup. Adds a
> python script which munges the output of gperf and adds additional
> cleanup and optimisations.
>
> Change-Id: I3656a7287dbd0688917893de3a671faef9e4578a
> Reviewed-on: https://chromium-review.googlesource.com/c/1349240
> Commit-Queue: Leszek Swirski <leszeks@chromium.org>
> Reviewed-by: Toon Verwaest <verwaest@chromium.org>
> Reviewed-by: Marja Hölttä <marja@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#57790}
Change-Id: Ifb53527ba3d0652ea4f5d03740f7c856ad5d91da
Reviewed-on: https://chromium-review.googlesource.com/c/1350121
Reviewed-by: Toon Verwaest <verwaest@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57831}
This commit is contained in:
parent
9b8937c9d0
commit
47daa48696
225
src/parsing/keywords-gen.h
Normal file
225
src/parsing/keywords-gen.h
Normal file
@ -0,0 +1,225 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This file is automatically generated by gen-keywords-gen-h.py and should not
|
||||
// be modified manually.
|
||||
|
||||
#ifndef V8_PARSING_KEYWORDS_GEN_H_
|
||||
#define V8_PARSING_KEYWORDS_GEN_H_
|
||||
|
||||
#include "src/parsing/token.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
/* C++ code produced by gperf version 3.1 */
|
||||
/* Command-line: gperf -m100 src/parsing/keywords.txt */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !( \
|
||||
(' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ('%' == 37) && \
|
||||
('&' == 38) && ('\'' == 39) && ('(' == 40) && (')' == 41) && \
|
||||
('*' == 42) && ('+' == 43) && (',' == 44) && ('-' == 45) && ('.' == 46) && \
|
||||
('/' == 47) && ('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) && \
|
||||
('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) && \
|
||||
('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) && ('=' == 61) && \
|
||||
('>' == 62) && ('?' == 63) && ('A' == 65) && ('B' == 66) && ('C' == 67) && \
|
||||
('D' == 68) && ('E' == 69) && ('F' == 70) && ('G' == 71) && ('H' == 72) && \
|
||||
('I' == 73) && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) && \
|
||||
('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) && ('R' == 82) && \
|
||||
('S' == 83) && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) && \
|
||||
('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) && \
|
||||
('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) && \
|
||||
('a' == 97) && ('b' == 98) && ('c' == 99) && ('d' == 100) && \
|
||||
('e' == 101) && ('f' == 102) && ('g' == 103) && ('h' == 104) && \
|
||||
('i' == 105) && ('j' == 106) && ('k' == 107) && ('l' == 108) && \
|
||||
('m' == 109) && ('n' == 110) && ('o' == 111) && ('p' == 112) && \
|
||||
('q' == 113) && ('r' == 114) && ('s' == 115) && ('t' == 116) && \
|
||||
('u' == 117) && ('v' == 118) && ('w' == 119) && ('x' == 120) && \
|
||||
('y' == 121) && ('z' == 122) && ('{' == 123) && ('|' == 124) && \
|
||||
('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
#error "gperf generated tables don't work with this execution character set."
|
||||
// If you see this error, please report a bug to <bug-gperf@gnu.org>.
|
||||
#endif
|
||||
|
||||
#line 16 "src/parsing/keywords.txt"
|
||||
struct PerfectKeywordHashTableEntry {
|
||||
const char* name;
|
||||
Token::Value value;
|
||||
};
|
||||
enum {
|
||||
TOTAL_KEYWORDS = 47,
|
||||
MIN_WORD_LENGTH = 2,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 2,
|
||||
MAX_HASH_VALUE = 51
|
||||
};
|
||||
|
||||
/* maximum key range = 50, duplicates = 0 */
|
||||
|
||||
class PerfectKeywordHash {
|
||||
private:
|
||||
static inline unsigned int Hash(const char* str, int len);
|
||||
|
||||
public:
|
||||
static inline Token::Value GetToken(const char* str, int len);
|
||||
};
|
||||
|
||||
inline unsigned int PerfectKeywordHash::Hash(const char* str, int len) {
|
||||
DCHECK_LT(str[1], 128);
|
||||
DCHECK_LT(str[0], 128);
|
||||
static const unsigned char asso_values[128] = {
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||
52, 8, 2, 6, 0, 0, 9, 52, 21, 0, 52, 52, 36, 40, 0, 3,
|
||||
6, 52, 17, 13, 16, 16, 38, 25, 6, 26, 52, 52, 52, 52, 52, 52};
|
||||
return len + asso_values[static_cast<unsigned char>(str[1])] +
|
||||
asso_values[static_cast<unsigned char>(str[0])];
|
||||
}
|
||||
|
||||
static const unsigned char kPerfectKeywordLengthTable[64] = {
|
||||
0, 0, 2, 3, 4, 2, 6, 7, 8, 9, 10, 2, 6, 7, 5, 3, 7, 8, 4, 5, 4, 7,
|
||||
5, 6, 5, 0, 5, 0, 6, 4, 7, 5, 9, 8, 5, 6, 3, 4, 5, 3, 4, 4, 5, 0,
|
||||
6, 4, 6, 5, 6, 3, 10, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static const struct PerfectKeywordHashTableEntry kPerfectKeywordHashTable[64] =
|
||||
{{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
#line 41 "src/parsing/keywords.txt"
|
||||
{"in", Token::IN},
|
||||
#line 45 "src/parsing/keywords.txt"
|
||||
{"new", Token::NEW},
|
||||
#line 31 "src/parsing/keywords.txt"
|
||||
{"enum", Token::ENUM},
|
||||
#line 29 "src/parsing/keywords.txt"
|
||||
{"do", Token::DO},
|
||||
#line 28 "src/parsing/keywords.txt"
|
||||
{"delete", Token::DELETE},
|
||||
#line 27 "src/parsing/keywords.txt"
|
||||
{"default", Token::DEFAULT},
|
||||
#line 26 "src/parsing/keywords.txt"
|
||||
{"debugger", Token::DEBUGGER},
|
||||
#line 43 "src/parsing/keywords.txt"
|
||||
{"interface", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 42 "src/parsing/keywords.txt"
|
||||
{"instanceof", Token::INSTANCEOF},
|
||||
#line 38 "src/parsing/keywords.txt"
|
||||
{"if", Token::IF},
|
||||
#line 32 "src/parsing/keywords.txt"
|
||||
{"export", Token::EXPORT},
|
||||
#line 33 "src/parsing/keywords.txt"
|
||||
{"extends", Token::EXTENDS},
|
||||
#line 24 "src/parsing/keywords.txt"
|
||||
{"const", Token::CONST},
|
||||
#line 36 "src/parsing/keywords.txt"
|
||||
{"for", Token::FOR},
|
||||
#line 35 "src/parsing/keywords.txt"
|
||||
{"finally", Token::FINALLY},
|
||||
#line 25 "src/parsing/keywords.txt"
|
||||
{"continue", Token::CONTINUE},
|
||||
#line 21 "src/parsing/keywords.txt"
|
||||
{"case", Token::CASE},
|
||||
#line 22 "src/parsing/keywords.txt"
|
||||
{"catch", Token::CATCH},
|
||||
#line 46 "src/parsing/keywords.txt"
|
||||
{"null", Token::NULL_LITERAL},
|
||||
#line 47 "src/parsing/keywords.txt"
|
||||
{"package", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 34 "src/parsing/keywords.txt"
|
||||
{"false", Token::FALSE_LITERAL},
|
||||
#line 51 "src/parsing/keywords.txt"
|
||||
{"return", Token::RETURN},
|
||||
#line 20 "src/parsing/keywords.txt"
|
||||
{"break", Token::BREAK},
|
||||
{"", Token::IDENTIFIER},
|
||||
#line 18 "src/parsing/keywords.txt"
|
||||
{"async", Token::ASYNC},
|
||||
{"", Token::IDENTIFIER},
|
||||
#line 50 "src/parsing/keywords.txt"
|
||||
{"public", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 63 "src/parsing/keywords.txt"
|
||||
{"with", Token::WITH},
|
||||
#line 48 "src/parsing/keywords.txt"
|
||||
{"private", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 64 "src/parsing/keywords.txt"
|
||||
{"yield", Token::YIELD},
|
||||
#line 49 "src/parsing/keywords.txt"
|
||||
{"protected", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 37 "src/parsing/keywords.txt"
|
||||
{"function", Token::FUNCTION},
|
||||
#line 53 "src/parsing/keywords.txt"
|
||||
{"super", Token::SUPER},
|
||||
#line 52 "src/parsing/keywords.txt"
|
||||
{"static", Token::STATIC},
|
||||
#line 58 "src/parsing/keywords.txt"
|
||||
{"try", Token::TRY},
|
||||
#line 57 "src/parsing/keywords.txt"
|
||||
{"true", Token::TRUE_LITERAL},
|
||||
#line 19 "src/parsing/keywords.txt"
|
||||
{"await", Token::AWAIT},
|
||||
#line 44 "src/parsing/keywords.txt"
|
||||
{"let", Token::LET},
|
||||
#line 30 "src/parsing/keywords.txt"
|
||||
{"else", Token::ELSE},
|
||||
#line 55 "src/parsing/keywords.txt"
|
||||
{"this", Token::THIS},
|
||||
#line 56 "src/parsing/keywords.txt"
|
||||
{"throw", Token::THROW},
|
||||
{"", Token::IDENTIFIER},
|
||||
#line 54 "src/parsing/keywords.txt"
|
||||
{"switch", Token::SWITCH},
|
||||
#line 61 "src/parsing/keywords.txt"
|
||||
{"void", Token::VOID},
|
||||
#line 40 "src/parsing/keywords.txt"
|
||||
{"import", Token::IMPORT},
|
||||
#line 23 "src/parsing/keywords.txt"
|
||||
{"class", Token::CLASS},
|
||||
#line 59 "src/parsing/keywords.txt"
|
||||
{"typeof", Token::TYPEOF},
|
||||
#line 60 "src/parsing/keywords.txt"
|
||||
{"var", Token::VAR},
|
||||
#line 39 "src/parsing/keywords.txt"
|
||||
{"implements", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||
#line 62 "src/parsing/keywords.txt"
|
||||
{"while", Token::WHILE},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER},
|
||||
{"", Token::IDENTIFIER}};
|
||||
|
||||
inline Token::Value PerfectKeywordHash::GetToken(const char* str, int len) {
|
||||
if (IsInRange(len, MIN_WORD_LENGTH, MAX_WORD_LENGTH)) {
|
||||
unsigned int key = Hash(str, len) & 0x3f;
|
||||
|
||||
DCHECK_LT(key, arraysize(kPerfectKeywordLengthTable));
|
||||
DCHECK_LT(key, arraysize(kPerfectKeywordHashTable));
|
||||
if (len == kPerfectKeywordLengthTable[key]) {
|
||||
const char* s = kPerfectKeywordHashTable[key].name;
|
||||
|
||||
while (*s != 0) {
|
||||
if (*s++ != *str++) return Token::IDENTIFIER;
|
||||
}
|
||||
return kPerfectKeywordHashTable[key].value;
|
||||
}
|
||||
}
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_PARSING_KEYWORDS_GEN_H_
|
64
src/parsing/keywords.txt
Normal file
64
src/parsing/keywords.txt
Normal file
@ -0,0 +1,64 @@
|
||||
%struct-type
|
||||
%language=C++
|
||||
%global-table
|
||||
%define initializer-suffix ,Token::IDENTIFIER
|
||||
%define hash-function-name Hash
|
||||
%define lookup-function-name GetToken
|
||||
%define class-name PerfectKeywordHash
|
||||
%define word-array-name kPerfectKeywordHashTable
|
||||
%define length-table-name kPerfectKeywordLengthTable
|
||||
%7bit
|
||||
%compare-lengths
|
||||
%enum
|
||||
%readonly-tables
|
||||
%compare-strncmp
|
||||
|
||||
struct PerfectKeywordHashTableEntry { const char* name; Token::Value value; };
|
||||
%%
|
||||
async, Token::ASYNC
|
||||
await, Token::AWAIT
|
||||
break, Token::BREAK
|
||||
case, Token::CASE
|
||||
catch, Token::CATCH
|
||||
class, Token::CLASS
|
||||
const, Token::CONST
|
||||
continue, Token::CONTINUE
|
||||
debugger, Token::DEBUGGER
|
||||
default, Token::DEFAULT
|
||||
delete, Token::DELETE
|
||||
do, Token::DO
|
||||
else, Token::ELSE
|
||||
enum, Token::ENUM
|
||||
export, Token::EXPORT
|
||||
extends, Token::EXTENDS
|
||||
false, Token::FALSE_LITERAL
|
||||
finally, Token::FINALLY
|
||||
for, Token::FOR
|
||||
function, Token::FUNCTION
|
||||
if, Token::IF
|
||||
implements, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
import, Token::IMPORT
|
||||
in, Token::IN
|
||||
instanceof, Token::INSTANCEOF
|
||||
interface, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
let, Token::LET
|
||||
new, Token::NEW
|
||||
null, Token::NULL_LITERAL
|
||||
package, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
private, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
protected, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
public, Token::FUTURE_STRICT_RESERVED_WORD
|
||||
return, Token::RETURN
|
||||
static, Token::STATIC
|
||||
super, Token::SUPER
|
||||
switch, Token::SWITCH
|
||||
this, Token::THIS
|
||||
throw, Token::THROW
|
||||
true, Token::TRUE_LITERAL
|
||||
try, Token::TRY
|
||||
typeof, Token::TYPEOF
|
||||
var, Token::VAR
|
||||
void, Token::VOID
|
||||
while, Token::WHILE
|
||||
with, Token::WITH
|
||||
yield, Token::YIELD
|
@ -6,6 +6,7 @@
|
||||
#define V8_PARSING_SCANNER_INL_H_
|
||||
|
||||
#include "src/char-predicates-inl.h"
|
||||
#include "src/parsing/keywords-gen.h"
|
||||
#include "src/parsing/scanner.h"
|
||||
|
||||
namespace v8 {
|
||||
@ -90,44 +91,8 @@ constexpr bool IsKeywordStart(char c) {
|
||||
V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
|
||||
int input_length) {
|
||||
DCHECK_GE(input_length, 1);
|
||||
const int kMinLength = 2;
|
||||
const int kMaxLength = 10;
|
||||
if (!IsInRange(input_length, kMinLength, kMaxLength)) {
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
switch (input[0]) {
|
||||
default:
|
||||
#define KEYWORD_GROUP_CASE(ch) \
|
||||
break; \
|
||||
case ch:
|
||||
#define KEYWORD(keyword, token) \
|
||||
{ \
|
||||
/* 'keyword' is a char array, so sizeof(keyword) is */ \
|
||||
/* strlen(keyword) plus 1 for the NUL char. */ \
|
||||
const int keyword_length = sizeof(keyword) - 1; \
|
||||
STATIC_ASSERT(keyword_length >= kMinLength); \
|
||||
STATIC_ASSERT(keyword_length <= kMaxLength); \
|
||||
DCHECK_EQ(input[0], keyword[0]); \
|
||||
DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
|
||||
0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
|
||||
if (input_length == keyword_length && input[1] == keyword[1] && \
|
||||
(keyword_length <= 2 || input[2] == keyword[2]) && \
|
||||
(keyword_length <= 3 || input[3] == keyword[3]) && \
|
||||
(keyword_length <= 4 || input[4] == keyword[4]) && \
|
||||
(keyword_length <= 5 || input[5] == keyword[5]) && \
|
||||
(keyword_length <= 6 || input[6] == keyword[6]) && \
|
||||
(keyword_length <= 7 || input[7] == keyword[7]) && \
|
||||
(keyword_length <= 8 || input[8] == keyword[8]) && \
|
||||
(keyword_length <= 9 || input[9] == keyword[9]) && \
|
||||
(keyword_length <= 10 || input[10] == keyword[10])) { \
|
||||
return token; \
|
||||
} \
|
||||
}
|
||||
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
|
||||
}
|
||||
return Token::IDENTIFIER;
|
||||
#undef KEYWORD
|
||||
#undef KEYWORD_GROUP_CASE
|
||||
return PerfectKeywordHash::GetToken(reinterpret_cast<const char*>(input),
|
||||
input_length);
|
||||
}
|
||||
|
||||
// Recursive constexpr template magic to check if a character is in a given
|
||||
@ -269,9 +234,15 @@ static constexpr const uint8_t character_scan_flags[128] = {
|
||||
#undef CALL_GET_SCAN_FLAGS
|
||||
};
|
||||
|
||||
inline bool CharCanBeKeyword(uc32 c) {
|
||||
return static_cast<uint32_t>(c) < arraysize(character_scan_flags) &&
|
||||
CanBeKeyword(character_scan_flags[c]);
|
||||
}
|
||||
|
||||
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
|
||||
DCHECK(IsIdentifierStart(c0_));
|
||||
bool escaped = false;
|
||||
bool can_be_keyword = true;
|
||||
|
||||
STATIC_ASSERT(arraysize(character_scan_flags) == kMaxAscii + 1);
|
||||
if (V8_LIKELY(static_cast<uint32_t>(c0_) <= kMaxAscii)) {
|
||||
@ -310,6 +281,8 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
|
||||
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
|
||||
return KeywordOrIdentifierToken(chars.start(), chars.length());
|
||||
}
|
||||
|
||||
can_be_keyword = CanBeKeyword(scan_flags);
|
||||
} else {
|
||||
// Special case for escapes at the start of an identifier.
|
||||
escaped = true;
|
||||
@ -319,10 +292,11 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
|
||||
return Token::ILLEGAL;
|
||||
}
|
||||
AddLiteralChar(c);
|
||||
can_be_keyword = CharCanBeKeyword(c);
|
||||
}
|
||||
}
|
||||
|
||||
return ScanIdentifierOrKeywordInnerSlow(escaped);
|
||||
return ScanIdentifierOrKeywordInnerSlow(escaped, can_be_keyword);
|
||||
}
|
||||
|
||||
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
|
||||
|
@ -987,7 +987,8 @@ uc32 Scanner::ScanUnicodeEscape() {
|
||||
return ScanHexNumber<capture_raw, unicode>(4);
|
||||
}
|
||||
|
||||
Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped) {
|
||||
Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped,
|
||||
bool can_be_keyword) {
|
||||
while (true) {
|
||||
if (c0_ == '\\') {
|
||||
escaped = true;
|
||||
@ -999,16 +1000,18 @@ Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped) {
|
||||
if (c == '\\' || !IsIdentifierPart(c)) {
|
||||
return Token::ILLEGAL;
|
||||
}
|
||||
can_be_keyword = can_be_keyword && CharCanBeKeyword(c);
|
||||
AddLiteralChar(c);
|
||||
} else if (IsIdentifierPart(c0_) ||
|
||||
(CombineSurrogatePair() && IsIdentifierPart(c0_))) {
|
||||
can_be_keyword = can_be_keyword && CharCanBeKeyword(c0_);
|
||||
AddLiteralCharAdvance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (next().literal_chars.is_one_byte()) {
|
||||
if (can_be_keyword && next().literal_chars.is_one_byte()) {
|
||||
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
|
||||
Token::Value token =
|
||||
KeywordOrIdentifierToken(chars.start(), chars.length());
|
||||
|
@ -729,7 +729,8 @@ class Scanner {
|
||||
Token::Value ScanNumber(bool seen_period);
|
||||
V8_INLINE Token::Value ScanIdentifierOrKeyword();
|
||||
V8_INLINE Token::Value ScanIdentifierOrKeywordInner();
|
||||
Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped);
|
||||
Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped,
|
||||
bool can_be_keyword);
|
||||
|
||||
Token::Value ScanString();
|
||||
Token::Value ScanPrivateName();
|
||||
|
247
tools/gen-keywords-gen-h.py
Executable file
247
tools/gen-keywords-gen-h.py
Executable file
@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2018 the V8 project authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import re
|
||||
import math
|
||||
import datetime
|
||||
|
||||
INPUT_PATH = "src/parsing/keywords.txt"
|
||||
OUTPUT_PATH = "src/parsing/keywords-gen.h"
|
||||
|
||||
# TODO(leszeks): Trimming seems to regress performance, investigate.
|
||||
TRIM_CHAR_TABLE = False
|
||||
|
||||
|
||||
def next_power_of_2(x):
|
||||
return 1 if x == 0 else 2**int(math.ceil(math.log(x, 2)))
|
||||
|
||||
|
||||
def call_with_input(cmd, input_string=""):
|
||||
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
stdout, _ = p.communicate(input_string)
|
||||
retcode = p.wait()
|
||||
if retcode != 0:
|
||||
raise subprocess.CalledProcessError(retcode, cmd)
|
||||
return stdout
|
||||
|
||||
|
||||
def checked_sub(pattern, sub, out, count=1, flags=0):
|
||||
out, n = re.subn(pattern, sub, out, flags=flags)
|
||||
if n != count:
|
||||
raise Exception("Didn't get exactly %d replacement(s) for pattern: %s" %
|
||||
(count, pattern))
|
||||
return out
|
||||
|
||||
|
||||
def change_sizet_to_int(out):
|
||||
# Literal buffer lengths are given as ints, not size_t
|
||||
return checked_sub(r'\bsize_t\b', 'int', out, count=4)
|
||||
|
||||
|
||||
def trim_and_dcheck_char_table(out):
|
||||
# Potential keyword strings are known to be lowercase ascii, so chop off the
|
||||
# rest of the table and mask out the char
|
||||
|
||||
reads_re = re.compile(
|
||||
r'asso_values\[static_cast<unsigned char>\(str\[(\d+)\]\)\]')
|
||||
|
||||
dchecks = []
|
||||
for str_read in reads_re.finditer(out):
|
||||
dchecks.append("DCHECK_LT(str[%d], 128);" % int(str_read.group(1)))
|
||||
|
||||
if TRIM_CHAR_TABLE:
|
||||
out = checked_sub(
|
||||
r'static const unsigned char asso_values\[\]\s*=\s*\{(\s*\d+\s*,){96}',
|
||||
"".join(dchecks) + r'static const unsigned char asso_values[32] = {',
|
||||
out,
|
||||
flags=re.MULTILINE)
|
||||
out = checked_sub(
|
||||
reads_re.pattern,
|
||||
r'asso_values[static_cast<unsigned char>(str[(\1)]&31)]',
|
||||
out,
|
||||
count=len(dchecks),
|
||||
flags=re.MULTILINE)
|
||||
else:
|
||||
out = checked_sub(
|
||||
r'static const unsigned char asso_values\[\]\s*=\s*\{',
|
||||
"".join(dchecks) + r'static const unsigned char asso_values[128] = {',
|
||||
out,
|
||||
flags=re.MULTILINE)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def use_isinrange(out):
|
||||
# Our IsInRange method is more efficient than checking for min/max length
|
||||
return checked_sub(r'if \(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH\)',
|
||||
r'if (IsInRange(len, MIN_WORD_LENGTH, MAX_WORD_LENGTH))',
|
||||
out)
|
||||
|
||||
|
||||
def pad_tables(out):
|
||||
# We don't want to compare against the max hash value, so pad the tables up
|
||||
# to a power of two and mask the hash.
|
||||
|
||||
# First get the new size
|
||||
max_hash_value = int(re.search(r'MAX_HASH_VALUE\s*=\s*(\d+)', out).group(1))
|
||||
old_table_length = max_hash_value + 1
|
||||
new_table_length = next_power_of_2(old_table_length)
|
||||
table_padding_len = new_table_length - old_table_length
|
||||
|
||||
# Pad the length table.
|
||||
single_lengthtable_entry = r'\d+'
|
||||
out = checked_sub(
|
||||
r"""
|
||||
static\ const\ unsigned\ char\ kPerfectKeywordLengthTable\[\]\s*=\s*\{
|
||||
(
|
||||
\s*%(single_lengthtable_entry)s\s*
|
||||
(?:,\s*%(single_lengthtable_entry)s\s*)*
|
||||
)
|
||||
\}
|
||||
""" % {'single_lengthtable_entry': single_lengthtable_entry},
|
||||
r'static const unsigned char kPerfectKeywordLengthTable[%d] = { \1 %s }'
|
||||
% (new_table_length, "".join([',0'] * table_padding_len)),
|
||||
out,
|
||||
flags=re.MULTILINE | re.VERBOSE)
|
||||
|
||||
# Pad the word list.
|
||||
single_wordlist_entry = r"""
|
||||
(?:\#line\ \d+\ ".*"$\s*)?
|
||||
\{\s*"[a-z]*"\s*,\s*Token::[A-Z_]+\}
|
||||
"""
|
||||
out = checked_sub(
|
||||
r"""
|
||||
static\ const\ struct\ PerfectKeywordHashTableEntry\ kPerfectKeywordHashTable\[\]\s*=\s*\{
|
||||
(
|
||||
\s*%(single_wordlist_entry)s\s*
|
||||
(?:,\s*%(single_wordlist_entry)s\s*)*
|
||||
)
|
||||
\}
|
||||
""" % {'single_wordlist_entry': single_wordlist_entry},
|
||||
r'static const struct PerfectKeywordHashTableEntry kPerfectKeywordHashTable[%d] = {\1 %s }'
|
||||
% (new_table_length, "".join(
|
||||
[',{"",Token::IDENTIFIER}'] * table_padding_len)),
|
||||
out,
|
||||
flags=re.MULTILINE | re.VERBOSE)
|
||||
|
||||
# Mask the hash and replace the range check with DCHECKs.
|
||||
out = checked_sub(r'Hash\s*\(\s*str,\s*len\s*\)',
|
||||
r'Hash(str, len)&0x%x' % (new_table_length - 1), out)
|
||||
out = checked_sub(
|
||||
r'if \(key <= MAX_HASH_VALUE\)',
|
||||
r'DCHECK_LT(key, arraysize(kPerfectKeywordLengthTable));DCHECK_LT(key, arraysize(kPerfectKeywordHashTable));',
|
||||
out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def return_token(out):
|
||||
# We want to return the actual token rather than the table entry.
|
||||
|
||||
# Change the return type of the function. Make it inline too.
|
||||
out = checked_sub(
|
||||
r'const\s*struct\s*PerfectKeywordHashTableEntry\s*\*\s*((?:PerfectKeywordHash::)?GetToken)',
|
||||
r'inline Token::Value \1',
|
||||
out,
|
||||
count=2)
|
||||
|
||||
# Change the return value when the keyword is found
|
||||
out = checked_sub(r'return &kPerfectKeywordHashTable\[key\];',
|
||||
r'return kPerfectKeywordHashTable[key].value;', out)
|
||||
|
||||
# Change the return value when the keyword is not found
|
||||
out = checked_sub(r'return 0;', r'return Token::IDENTIFIER;', out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def memcmp_to_while(out):
|
||||
# It's faster to loop over the keyword with a while loop than calling memcmp.
|
||||
# Careful, this replacement is quite flaky, because otherwise the regex is
|
||||
# unreadable.
|
||||
return checked_sub(
|
||||
re.escape("if (*str == *s && !memcmp (str + 1, s + 1, len - 1))") + r"\s*"
|
||||
+ re.escape("return kPerfectKeywordHashTable[key].value;"),
|
||||
"""
|
||||
while(*s!=0) {
|
||||
if (*s++ != *str++) return Token::IDENTIFIER;
|
||||
}
|
||||
return kPerfectKeywordHashTable[key].value;
|
||||
""",
|
||||
out,
|
||||
flags=re.MULTILINE)
|
||||
|
||||
|
||||
def wrap_namespace(out):
|
||||
return """// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This file is automatically generated by gen-keywords-gen-h.py and should not
|
||||
// be modified manually.
|
||||
|
||||
#ifndef V8_PARSING_KEYWORDS_GEN_H_
|
||||
#define V8_PARSING_KEYWORDS_GEN_H_
|
||||
|
||||
#include "src/parsing/token.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
%s
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_PARSING_KEYWORDS_GEN_H_
|
||||
""" % (out)
|
||||
|
||||
|
||||
def trim_character_set_warning(out):
|
||||
# gperf generates an error message that is too large, trim it
|
||||
|
||||
return out.replace(
|
||||
'"gperf generated tables don\'t work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."',
|
||||
'"gperf generated tables don\'t work with this execution character set."\\\n// If you see this error, please report a bug to <bug-gperf@gnu.org>.'
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
script_dir = os.path.dirname(sys.argv[0])
|
||||
root_dir = os.path.join(script_dir, '..')
|
||||
|
||||
out = subprocess.check_output(["gperf", "-m100", INPUT_PATH], cwd=root_dir)
|
||||
|
||||
# And now some munging of the generated file.
|
||||
out = change_sizet_to_int(out)
|
||||
out = trim_and_dcheck_char_table(out)
|
||||
out = use_isinrange(out)
|
||||
out = pad_tables(out)
|
||||
out = return_token(out)
|
||||
out = memcmp_to_while(out)
|
||||
out = wrap_namespace(out)
|
||||
out = trim_character_set_warning(out)
|
||||
|
||||
# Final formatting.
|
||||
clang_format_path = os.path.join(root_dir,
|
||||
'third_party/depot_tools/clang-format')
|
||||
out = call_with_input([clang_format_path], out)
|
||||
|
||||
with open(os.path.join(root_dir, OUTPUT_PATH), 'w') as f:
|
||||
f.write(out)
|
||||
|
||||
return 0
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
sys.stderr.write("Error calling '{}'\n".format(" ".join(e.cmd)))
|
||||
return e.returncode
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user