[parser] Perfect hash for keywords
Use gperf to generate a perfect hash table for keyword lookup. Adds a python script which munges the output of gperf and adds additional cleanup and optimisations. Change-Id: I3656a7287dbd0688917893de3a671faef9e4578a Reviewed-on: https://chromium-review.googlesource.com/c/1349240 Commit-Queue: Leszek Swirski <leszeks@chromium.org> Reviewed-by: Toon Verwaest <verwaest@chromium.org> Reviewed-by: Marja Hölttä <marja@chromium.org> Cr-Commit-Position: refs/heads/master@{#57790}
This commit is contained in:
parent
1e85444372
commit
ca086a497c
223
src/parsing/keywords-gen.h
Normal file
223
src/parsing/keywords-gen.h
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
// This file is automatically generated by gen-keywords-gen-h.py and should not
|
||||||
|
// be modified manually.
|
||||||
|
|
||||||
|
#ifndef V8_PARSING_KEYWORDS_GEN_H_
|
||||||
|
#define V8_PARSING_KEYWORDS_GEN_H_
|
||||||
|
|
||||||
|
#include "src/parsing/token.h"
|
||||||
|
|
||||||
|
namespace v8 {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
/* C++ code produced by gperf version 3.1 */
|
||||||
|
/* Command-line: gperf -m100 src/parsing/keywords.txt */
|
||||||
|
/* Computed positions: -k'1-2' */
|
||||||
|
|
||||||
|
#if !( \
|
||||||
|
(' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ('%' == 37) && \
|
||||||
|
('&' == 38) && ('\'' == 39) && ('(' == 40) && (')' == 41) && \
|
||||||
|
('*' == 42) && ('+' == 43) && (',' == 44) && ('-' == 45) && ('.' == 46) && \
|
||||||
|
('/' == 47) && ('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) && \
|
||||||
|
('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) && \
|
||||||
|
('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) && ('=' == 61) && \
|
||||||
|
('>' == 62) && ('?' == 63) && ('A' == 65) && ('B' == 66) && ('C' == 67) && \
|
||||||
|
('D' == 68) && ('E' == 69) && ('F' == 70) && ('G' == 71) && ('H' == 72) && \
|
||||||
|
('I' == 73) && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) && \
|
||||||
|
('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) && ('R' == 82) && \
|
||||||
|
('S' == 83) && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) && \
|
||||||
|
('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) && \
|
||||||
|
('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) && \
|
||||||
|
('a' == 97) && ('b' == 98) && ('c' == 99) && ('d' == 100) && \
|
||||||
|
('e' == 101) && ('f' == 102) && ('g' == 103) && ('h' == 104) && \
|
||||||
|
('i' == 105) && ('j' == 106) && ('k' == 107) && ('l' == 108) && \
|
||||||
|
('m' == 109) && ('n' == 110) && ('o' == 111) && ('p' == 112) && \
|
||||||
|
('q' == 113) && ('r' == 114) && ('s' == 115) && ('t' == 116) && \
|
||||||
|
('u' == 117) && ('v' == 118) && ('w' == 119) && ('x' == 120) && \
|
||||||
|
('y' == 121) && ('z' == 122) && ('{' == 123) && ('|' == 124) && \
|
||||||
|
('}' == 125) && ('~' == 126))
|
||||||
|
/* The character set is not based on ISO-646. */
|
||||||
|
#error "gperf generated tables don't work with this execution character set."
|
||||||
|
// If you see this error, please report a bug to <bug-gperf@gnu.org>.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#line 16 "src/parsing/keywords.txt"
|
||||||
|
struct PerfectKeywordHashTableEntry {
|
||||||
|
const char* name;
|
||||||
|
Token::Value value;
|
||||||
|
};
|
||||||
|
enum {
|
||||||
|
TOTAL_KEYWORDS = 47,
|
||||||
|
MIN_WORD_LENGTH = 2,
|
||||||
|
MAX_WORD_LENGTH = 10,
|
||||||
|
MIN_HASH_VALUE = 2,
|
||||||
|
MAX_HASH_VALUE = 51
|
||||||
|
};
|
||||||
|
|
||||||
|
/* maximum key range = 50, duplicates = 0 */
|
||||||
|
|
||||||
|
class PerfectKeywordHash {
|
||||||
|
private:
|
||||||
|
static inline unsigned int Hash(const char* str, int len);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static inline Token::Value GetToken(const char* str, int len);
|
||||||
|
};
|
||||||
|
|
||||||
|
inline unsigned int PerfectKeywordHash::Hash(const char* str, int len) {
|
||||||
|
static const unsigned char asso_values[] = {
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
|
||||||
|
52, 8, 2, 6, 0, 0, 9, 52, 21, 0, 52, 52, 36, 40, 0, 3,
|
||||||
|
6, 52, 17, 13, 16, 16, 38, 25, 6, 26, 52, 52, 52, 52, 52, 52};
|
||||||
|
return len + asso_values[static_cast<unsigned char>(str[1])] +
|
||||||
|
asso_values[static_cast<unsigned char>(str[0])];
|
||||||
|
}
|
||||||
|
|
||||||
|
static const unsigned char kPerfectKeywordLengthTable[64] = {
|
||||||
|
0, 0, 2, 3, 4, 2, 6, 7, 8, 9, 10, 2, 6, 7, 5, 3, 7, 8, 4, 5, 4, 7,
|
||||||
|
5, 6, 5, 0, 5, 0, 6, 4, 7, 5, 9, 8, 5, 6, 3, 4, 5, 3, 4, 4, 5, 0,
|
||||||
|
6, 4, 6, 5, 6, 3, 10, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
|
static const struct PerfectKeywordHashTableEntry kPerfectKeywordHashTable[64] =
|
||||||
|
{{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
#line 41 "src/parsing/keywords.txt"
|
||||||
|
{"in", Token::IN},
|
||||||
|
#line 45 "src/parsing/keywords.txt"
|
||||||
|
{"new", Token::NEW},
|
||||||
|
#line 31 "src/parsing/keywords.txt"
|
||||||
|
{"enum", Token::ENUM},
|
||||||
|
#line 29 "src/parsing/keywords.txt"
|
||||||
|
{"do", Token::DO},
|
||||||
|
#line 28 "src/parsing/keywords.txt"
|
||||||
|
{"delete", Token::DELETE},
|
||||||
|
#line 27 "src/parsing/keywords.txt"
|
||||||
|
{"default", Token::DEFAULT},
|
||||||
|
#line 26 "src/parsing/keywords.txt"
|
||||||
|
{"debugger", Token::DEBUGGER},
|
||||||
|
#line 43 "src/parsing/keywords.txt"
|
||||||
|
{"interface", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 42 "src/parsing/keywords.txt"
|
||||||
|
{"instanceof", Token::INSTANCEOF},
|
||||||
|
#line 38 "src/parsing/keywords.txt"
|
||||||
|
{"if", Token::IF},
|
||||||
|
#line 32 "src/parsing/keywords.txt"
|
||||||
|
{"export", Token::EXPORT},
|
||||||
|
#line 33 "src/parsing/keywords.txt"
|
||||||
|
{"extends", Token::EXTENDS},
|
||||||
|
#line 24 "src/parsing/keywords.txt"
|
||||||
|
{"const", Token::CONST},
|
||||||
|
#line 36 "src/parsing/keywords.txt"
|
||||||
|
{"for", Token::FOR},
|
||||||
|
#line 35 "src/parsing/keywords.txt"
|
||||||
|
{"finally", Token::FINALLY},
|
||||||
|
#line 25 "src/parsing/keywords.txt"
|
||||||
|
{"continue", Token::CONTINUE},
|
||||||
|
#line 21 "src/parsing/keywords.txt"
|
||||||
|
{"case", Token::CASE},
|
||||||
|
#line 22 "src/parsing/keywords.txt"
|
||||||
|
{"catch", Token::CATCH},
|
||||||
|
#line 46 "src/parsing/keywords.txt"
|
||||||
|
{"null", Token::NULL_LITERAL},
|
||||||
|
#line 47 "src/parsing/keywords.txt"
|
||||||
|
{"package", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 34 "src/parsing/keywords.txt"
|
||||||
|
{"false", Token::FALSE_LITERAL},
|
||||||
|
#line 51 "src/parsing/keywords.txt"
|
||||||
|
{"return", Token::RETURN},
|
||||||
|
#line 20 "src/parsing/keywords.txt"
|
||||||
|
{"break", Token::BREAK},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
#line 18 "src/parsing/keywords.txt"
|
||||||
|
{"async", Token::ASYNC},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
#line 50 "src/parsing/keywords.txt"
|
||||||
|
{"public", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 63 "src/parsing/keywords.txt"
|
||||||
|
{"with", Token::WITH},
|
||||||
|
#line 48 "src/parsing/keywords.txt"
|
||||||
|
{"private", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 64 "src/parsing/keywords.txt"
|
||||||
|
{"yield", Token::YIELD},
|
||||||
|
#line 49 "src/parsing/keywords.txt"
|
||||||
|
{"protected", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 37 "src/parsing/keywords.txt"
|
||||||
|
{"function", Token::FUNCTION},
|
||||||
|
#line 53 "src/parsing/keywords.txt"
|
||||||
|
{"super", Token::SUPER},
|
||||||
|
#line 52 "src/parsing/keywords.txt"
|
||||||
|
{"static", Token::STATIC},
|
||||||
|
#line 58 "src/parsing/keywords.txt"
|
||||||
|
{"try", Token::TRY},
|
||||||
|
#line 57 "src/parsing/keywords.txt"
|
||||||
|
{"true", Token::TRUE_LITERAL},
|
||||||
|
#line 19 "src/parsing/keywords.txt"
|
||||||
|
{"await", Token::AWAIT},
|
||||||
|
#line 44 "src/parsing/keywords.txt"
|
||||||
|
{"let", Token::LET},
|
||||||
|
#line 30 "src/parsing/keywords.txt"
|
||||||
|
{"else", Token::ELSE},
|
||||||
|
#line 55 "src/parsing/keywords.txt"
|
||||||
|
{"this", Token::THIS},
|
||||||
|
#line 56 "src/parsing/keywords.txt"
|
||||||
|
{"throw", Token::THROW},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
#line 54 "src/parsing/keywords.txt"
|
||||||
|
{"switch", Token::SWITCH},
|
||||||
|
#line 61 "src/parsing/keywords.txt"
|
||||||
|
{"void", Token::VOID},
|
||||||
|
#line 40 "src/parsing/keywords.txt"
|
||||||
|
{"import", Token::IMPORT},
|
||||||
|
#line 23 "src/parsing/keywords.txt"
|
||||||
|
{"class", Token::CLASS},
|
||||||
|
#line 59 "src/parsing/keywords.txt"
|
||||||
|
{"typeof", Token::TYPEOF},
|
||||||
|
#line 60 "src/parsing/keywords.txt"
|
||||||
|
{"var", Token::VAR},
|
||||||
|
#line 39 "src/parsing/keywords.txt"
|
||||||
|
{"implements", Token::FUTURE_STRICT_RESERVED_WORD},
|
||||||
|
#line 62 "src/parsing/keywords.txt"
|
||||||
|
{"while", Token::WHILE},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER},
|
||||||
|
{"", Token::IDENTIFIER}};
|
||||||
|
|
||||||
|
inline Token::Value PerfectKeywordHash::GetToken(const char* str, int len) {
|
||||||
|
if (IsInRange(len, MIN_WORD_LENGTH, MAX_WORD_LENGTH)) {
|
||||||
|
unsigned int key = Hash(str, len) & 0x3f;
|
||||||
|
|
||||||
|
DCHECK_LT(key, arraysize(kPerfectKeywordLengthTable));
|
||||||
|
DCHECK_LT(key, arraysize(kPerfectKeywordHashTable));
|
||||||
|
if (len == kPerfectKeywordLengthTable[key]) {
|
||||||
|
const char* s = kPerfectKeywordHashTable[key].name;
|
||||||
|
|
||||||
|
while (*s != 0) {
|
||||||
|
if (*s++ != *str++) return Token::IDENTIFIER;
|
||||||
|
}
|
||||||
|
return kPerfectKeywordHashTable[key].value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Token::IDENTIFIER;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace v8
|
||||||
|
|
||||||
|
#endif // V8_PARSING_KEYWORDS_GEN_H_
|
64
src/parsing/keywords.txt
Normal file
64
src/parsing/keywords.txt
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
%struct-type
|
||||||
|
%language=C++
|
||||||
|
%global-table
|
||||||
|
%define initializer-suffix ,Token::IDENTIFIER
|
||||||
|
%define hash-function-name Hash
|
||||||
|
%define lookup-function-name GetToken
|
||||||
|
%define class-name PerfectKeywordHash
|
||||||
|
%define word-array-name kPerfectKeywordHashTable
|
||||||
|
%define length-table-name kPerfectKeywordLengthTable
|
||||||
|
%7bit
|
||||||
|
%compare-lengths
|
||||||
|
%enum
|
||||||
|
%readonly-tables
|
||||||
|
%compare-strncmp
|
||||||
|
|
||||||
|
struct PerfectKeywordHashTableEntry { const char* name; Token::Value value; };
|
||||||
|
%%
|
||||||
|
async, Token::ASYNC
|
||||||
|
await, Token::AWAIT
|
||||||
|
break, Token::BREAK
|
||||||
|
case, Token::CASE
|
||||||
|
catch, Token::CATCH
|
||||||
|
class, Token::CLASS
|
||||||
|
const, Token::CONST
|
||||||
|
continue, Token::CONTINUE
|
||||||
|
debugger, Token::DEBUGGER
|
||||||
|
default, Token::DEFAULT
|
||||||
|
delete, Token::DELETE
|
||||||
|
do, Token::DO
|
||||||
|
else, Token::ELSE
|
||||||
|
enum, Token::ENUM
|
||||||
|
export, Token::EXPORT
|
||||||
|
extends, Token::EXTENDS
|
||||||
|
false, Token::FALSE_LITERAL
|
||||||
|
finally, Token::FINALLY
|
||||||
|
for, Token::FOR
|
||||||
|
function, Token::FUNCTION
|
||||||
|
if, Token::IF
|
||||||
|
implements, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
import, Token::IMPORT
|
||||||
|
in, Token::IN
|
||||||
|
instanceof, Token::INSTANCEOF
|
||||||
|
interface, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
let, Token::LET
|
||||||
|
new, Token::NEW
|
||||||
|
null, Token::NULL_LITERAL
|
||||||
|
package, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
private, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
protected, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
public, Token::FUTURE_STRICT_RESERVED_WORD
|
||||||
|
return, Token::RETURN
|
||||||
|
static, Token::STATIC
|
||||||
|
super, Token::SUPER
|
||||||
|
switch, Token::SWITCH
|
||||||
|
this, Token::THIS
|
||||||
|
throw, Token::THROW
|
||||||
|
true, Token::TRUE_LITERAL
|
||||||
|
try, Token::TRY
|
||||||
|
typeof, Token::TYPEOF
|
||||||
|
var, Token::VAR
|
||||||
|
void, Token::VOID
|
||||||
|
while, Token::WHILE
|
||||||
|
with, Token::WITH
|
||||||
|
yield, Token::YIELD
|
@ -6,6 +6,7 @@
|
|||||||
#define V8_PARSING_SCANNER_INL_H_
|
#define V8_PARSING_SCANNER_INL_H_
|
||||||
|
|
||||||
#include "src/char-predicates-inl.h"
|
#include "src/char-predicates-inl.h"
|
||||||
|
#include "src/parsing/keywords-gen.h"
|
||||||
#include "src/parsing/scanner.h"
|
#include "src/parsing/scanner.h"
|
||||||
|
|
||||||
namespace v8 {
|
namespace v8 {
|
||||||
@ -90,44 +91,8 @@ constexpr bool IsKeywordStart(char c) {
|
|||||||
V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
|
V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
|
||||||
int input_length) {
|
int input_length) {
|
||||||
DCHECK_GE(input_length, 1);
|
DCHECK_GE(input_length, 1);
|
||||||
const int kMinLength = 2;
|
return PerfectKeywordHash::GetToken(reinterpret_cast<const char*>(input),
|
||||||
const int kMaxLength = 10;
|
input_length);
|
||||||
if (!IsInRange(input_length, kMinLength, kMaxLength)) {
|
|
||||||
return Token::IDENTIFIER;
|
|
||||||
}
|
|
||||||
switch (input[0]) {
|
|
||||||
default:
|
|
||||||
#define KEYWORD_GROUP_CASE(ch) \
|
|
||||||
break; \
|
|
||||||
case ch:
|
|
||||||
#define KEYWORD(keyword, token) \
|
|
||||||
{ \
|
|
||||||
/* 'keyword' is a char array, so sizeof(keyword) is */ \
|
|
||||||
/* strlen(keyword) plus 1 for the NUL char. */ \
|
|
||||||
const int keyword_length = sizeof(keyword) - 1; \
|
|
||||||
STATIC_ASSERT(keyword_length >= kMinLength); \
|
|
||||||
STATIC_ASSERT(keyword_length <= kMaxLength); \
|
|
||||||
DCHECK_EQ(input[0], keyword[0]); \
|
|
||||||
DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
|
|
||||||
0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
|
|
||||||
if (input_length == keyword_length && input[1] == keyword[1] && \
|
|
||||||
(keyword_length <= 2 || input[2] == keyword[2]) && \
|
|
||||||
(keyword_length <= 3 || input[3] == keyword[3]) && \
|
|
||||||
(keyword_length <= 4 || input[4] == keyword[4]) && \
|
|
||||||
(keyword_length <= 5 || input[5] == keyword[5]) && \
|
|
||||||
(keyword_length <= 6 || input[6] == keyword[6]) && \
|
|
||||||
(keyword_length <= 7 || input[7] == keyword[7]) && \
|
|
||||||
(keyword_length <= 8 || input[8] == keyword[8]) && \
|
|
||||||
(keyword_length <= 9 || input[9] == keyword[9]) && \
|
|
||||||
(keyword_length <= 10 || input[10] == keyword[10])) { \
|
|
||||||
return token; \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
|
|
||||||
}
|
|
||||||
return Token::IDENTIFIER;
|
|
||||||
#undef KEYWORD
|
|
||||||
#undef KEYWORD_GROUP_CASE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recursive constexpr template magic to check if a character is in a given
|
// Recursive constexpr template magic to check if a character is in a given
|
||||||
|
228
tools/gen-keywords-gen-h.py
Executable file
228
tools/gen-keywords-gen-h.py
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright 2018 the V8 project authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE file.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
INPUT_PATH = "src/parsing/keywords.txt"
|
||||||
|
OUTPUT_PATH = "src/parsing/keywords-gen.h"
|
||||||
|
|
||||||
|
|
||||||
|
def next_power_of_2(x):
|
||||||
|
return 1 if x == 0 else 2**int(math.ceil(math.log(x, 2)))
|
||||||
|
|
||||||
|
|
||||||
|
def call_with_input(cmd, input_string=""):
|
||||||
|
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
|
stdout, _ = p.communicate(input_string)
|
||||||
|
retcode = p.wait()
|
||||||
|
if retcode != 0:
|
||||||
|
raise subprocess.CalledProcessError(retcode, cmd)
|
||||||
|
return stdout
|
||||||
|
|
||||||
|
|
||||||
|
def checked_sub(pattern, sub, out, count=1, flags=0):
|
||||||
|
out, n = re.subn(pattern, sub, out, flags=flags)
|
||||||
|
if n != count:
|
||||||
|
raise Exception("Didn't get exactly %d replacement(s) for pattern: %s" %
|
||||||
|
(count, pattern))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def change_sizet_to_int(out):
|
||||||
|
# Literal buffer lengths are given as ints, not size_t
|
||||||
|
return checked_sub(r'\bsize_t\b', 'int', out, count=4)
|
||||||
|
|
||||||
|
|
||||||
|
def trim_char_table(out):
|
||||||
|
# Potential keyword strings are known to be lowercase ascii, so chop off the
|
||||||
|
# rest of the table and mask out the char
|
||||||
|
out = checked_sub(
|
||||||
|
r'static const unsigned char asso_values\[\]\s*=\s*\{(\s*\d+\s*,){96}',
|
||||||
|
r'static const unsigned char asso_values[] = {',
|
||||||
|
out,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
out = checked_sub(
|
||||||
|
r'asso_values\[static_cast<unsigned char>\(str\[(\d+)\]\)\]',
|
||||||
|
r'asso_values[static_cast<unsigned char>(str[(\1)]&31)]',
|
||||||
|
out,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def use_isinrange(out):
|
||||||
|
# Our IsInRange method is more efficient than checking for min/max length
|
||||||
|
return checked_sub(r'if \(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH\)',
|
||||||
|
r'if (IsInRange(len, MIN_WORD_LENGTH, MAX_WORD_LENGTH))',
|
||||||
|
out)
|
||||||
|
|
||||||
|
|
||||||
|
def pad_tables(out):
|
||||||
|
# We don't want to compare against the max hash value, so pad the tables up
|
||||||
|
# to a power of two and mask the hash.
|
||||||
|
|
||||||
|
# First get the new size
|
||||||
|
max_hash_value = int(re.search(r'MAX_HASH_VALUE\s*=\s*(\d+)', out).group(1))
|
||||||
|
old_table_length = max_hash_value + 1
|
||||||
|
new_table_length = next_power_of_2(old_table_length)
|
||||||
|
table_padding_len = new_table_length - old_table_length
|
||||||
|
|
||||||
|
# Pad the length table.
|
||||||
|
single_lengthtable_entry = r'\d+'
|
||||||
|
out = checked_sub(
|
||||||
|
r"""
|
||||||
|
static\ const\ unsigned\ char\ kPerfectKeywordLengthTable\[\]\s*=\s*\{
|
||||||
|
(
|
||||||
|
\s*%(single_lengthtable_entry)s\s*
|
||||||
|
(?:,\s*%(single_lengthtable_entry)s\s*)*
|
||||||
|
)
|
||||||
|
\}
|
||||||
|
""" % {'single_lengthtable_entry': single_lengthtable_entry},
|
||||||
|
r'static const unsigned char kPerfectKeywordLengthTable[%d] = { \1 %s }'
|
||||||
|
% (new_table_length, "".join([',0'] * table_padding_len)),
|
||||||
|
out,
|
||||||
|
flags=re.MULTILINE | re.VERBOSE)
|
||||||
|
|
||||||
|
# Pad the word list.
|
||||||
|
single_wordlist_entry = r"""
|
||||||
|
(?:\#line\ \d+\ ".*"$\s*)?
|
||||||
|
\{\s*"[a-z]*"\s*,\s*Token::[A-Z_]+\}
|
||||||
|
"""
|
||||||
|
out = checked_sub(
|
||||||
|
r"""
|
||||||
|
static\ const\ struct\ PerfectKeywordHashTableEntry\ kPerfectKeywordHashTable\[\]\s*=\s*\{
|
||||||
|
(
|
||||||
|
\s*%(single_wordlist_entry)s\s*
|
||||||
|
(?:,\s*%(single_wordlist_entry)s\s*)*
|
||||||
|
)
|
||||||
|
\}
|
||||||
|
""" % {'single_wordlist_entry': single_wordlist_entry},
|
||||||
|
r'static const struct PerfectKeywordHashTableEntry kPerfectKeywordHashTable[%d] = {\1 %s }'
|
||||||
|
% (new_table_length, "".join(
|
||||||
|
[',{"",Token::IDENTIFIER}'] * table_padding_len)),
|
||||||
|
out,
|
||||||
|
flags=re.MULTILINE | re.VERBOSE)
|
||||||
|
|
||||||
|
# Mask the hash and replace the range check with DCHECKs.
|
||||||
|
out = checked_sub(r'Hash\s*\(\s*str,\s*len\s*\)',
|
||||||
|
r'Hash(str, len)&0x%x' % (new_table_length - 1), out)
|
||||||
|
out = checked_sub(
|
||||||
|
r'if \(key <= MAX_HASH_VALUE\)',
|
||||||
|
r'DCHECK_LT(key, arraysize(kPerfectKeywordLengthTable));DCHECK_LT(key, arraysize(kPerfectKeywordHashTable));',
|
||||||
|
out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def return_token(out):
|
||||||
|
# We want to return the actual token rather than the table entry.
|
||||||
|
|
||||||
|
# Change the return type of the function. Make it inline too.
|
||||||
|
out = checked_sub(
|
||||||
|
r'const\s*struct\s*PerfectKeywordHashTableEntry\s*\*\s*((?:PerfectKeywordHash::)?GetToken)',
|
||||||
|
r'inline Token::Value \1',
|
||||||
|
out,
|
||||||
|
count=2)
|
||||||
|
|
||||||
|
# Change the return value when the keyword is found
|
||||||
|
out = checked_sub(r'return &kPerfectKeywordHashTable\[key\];',
|
||||||
|
r'return kPerfectKeywordHashTable[key].value;', out)
|
||||||
|
|
||||||
|
# Change the return value when the keyword is not found
|
||||||
|
out = checked_sub(r'return 0;', r'return Token::IDENTIFIER;', out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def memcmp_to_while(out):
|
||||||
|
# It's faster to loop over the keyword with a while loop than calling memcmp.
|
||||||
|
# Careful, this replacement is quite flaky, because otherwise the regex is
|
||||||
|
# unreadable.
|
||||||
|
return checked_sub(
|
||||||
|
re.escape("if (*str == *s && !memcmp (str + 1, s + 1, len - 1))") + r"\s*"
|
||||||
|
+ re.escape("return kPerfectKeywordHashTable[key].value;"),
|
||||||
|
"""
|
||||||
|
while(*s!=0) {
|
||||||
|
if (*s++ != *str++) return Token::IDENTIFIER;
|
||||||
|
}
|
||||||
|
return kPerfectKeywordHashTable[key].value;
|
||||||
|
""",
|
||||||
|
out,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_namespace(out):
|
||||||
|
return """// Copyright 2018 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
// This file is automatically generated by gen-keywords-gen-h.py and should not
|
||||||
|
// be modified manually.
|
||||||
|
|
||||||
|
#ifndef V8_PARSING_KEYWORDS_GEN_H_
|
||||||
|
#define V8_PARSING_KEYWORDS_GEN_H_
|
||||||
|
|
||||||
|
#include "src/parsing/token.h"
|
||||||
|
|
||||||
|
namespace v8 {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
%s
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace v8
|
||||||
|
|
||||||
|
#endif // V8_PARSING_KEYWORDS_GEN_H_
|
||||||
|
""" % (out)
|
||||||
|
|
||||||
|
|
||||||
|
def trim_character_set_warning(out):
|
||||||
|
# gperf generates an error message that is too large, trim it
|
||||||
|
|
||||||
|
return out.replace(
|
||||||
|
'"gperf generated tables don\'t work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."',
|
||||||
|
'"gperf generated tables don\'t work with this execution character set."\\\n// If you see this error, please report a bug to <bug-gperf@gnu.org>.'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
script_dir = os.path.dirname(sys.argv[0])
|
||||||
|
root_dir = os.path.join(script_dir, '..')
|
||||||
|
|
||||||
|
out = subprocess.check_output(["gperf", "-m100", INPUT_PATH], cwd=root_dir)
|
||||||
|
|
||||||
|
# And now some munging of the generated file.
|
||||||
|
out = change_sizet_to_int(out)
|
||||||
|
# TODO(leszeks): This seems to regress performance, investigate.
|
||||||
|
#out = trim_char_table(out)
|
||||||
|
out = use_isinrange(out)
|
||||||
|
out = pad_tables(out)
|
||||||
|
out = return_token(out)
|
||||||
|
out = memcmp_to_while(out)
|
||||||
|
out = wrap_namespace(out)
|
||||||
|
out = trim_character_set_warning(out)
|
||||||
|
|
||||||
|
# Final formatting.
|
||||||
|
clang_format_path = os.path.join(root_dir,
|
||||||
|
'third_party/depot_tools/clang-format')
|
||||||
|
out = call_with_input([clang_format_path], out)
|
||||||
|
|
||||||
|
with open(os.path.join(root_dir, OUTPUT_PATH), 'w') as f:
|
||||||
|
f.write(out)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
sys.stderr.write("Error calling '{}'\n".format(" ".join(e.cmd)))
|
||||||
|
return e.returncode
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user