Changed keyword token recognition to be done inline in the identifier scanner.
Review URL: http://codereview.chromium.org/360048 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3221 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
877db0f539
commit
273e860cc1
155
src/scanner.cc
155
src/scanner.cc
@ -193,6 +193,139 @@ void TwoByteStringUTF16Buffer::SeekForward(int pos) {
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Keyword Matcher
|
||||
KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
|
||||
{ "break", KEYWORD_PREFIX, Token::BREAK },
|
||||
{ NULL, C, Token::ILLEGAL },
|
||||
{ NULL, D, Token::ILLEGAL },
|
||||
{ "else", KEYWORD_PREFIX, Token::ELSE },
|
||||
{ NULL, F, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, I, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, N, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ "return", KEYWORD_PREFIX, Token::RETURN },
|
||||
{ "switch", KEYWORD_PREFIX, Token::SWITCH },
|
||||
{ NULL, T, Token::ILLEGAL },
|
||||
{ NULL, UNMATCHABLE, Token::ILLEGAL },
|
||||
{ NULL, V, Token::ILLEGAL },
|
||||
{ NULL, W, Token::ILLEGAL }
|
||||
};
|
||||
|
||||
|
||||
void KeywordMatcher::Step(uc32 input) {
|
||||
switch (state_) {
|
||||
case INITIAL: {
|
||||
// matching the first character is the only state with significant fanout.
|
||||
// Match only lower-case letters in range 'b'..'w'.
|
||||
unsigned int offset = input - kFirstCharRangeMin;
|
||||
if (offset < kFirstCharRangeLength) {
|
||||
state_ = first_states_[offset].state;
|
||||
if (state_ == KEYWORD_PREFIX) {
|
||||
keyword_ = first_states_[offset].keyword;
|
||||
counter_ = 1;
|
||||
keyword_token_ = first_states_[offset].token;
|
||||
}
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KEYWORD_PREFIX:
|
||||
if (keyword_[counter_] == input) {
|
||||
ASSERT_NE(input, '\0');
|
||||
counter_++;
|
||||
if (keyword_[counter_] == '\0') {
|
||||
state_ = KEYWORD_MATCHED;
|
||||
token_ = keyword_token_;
|
||||
}
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case KEYWORD_MATCHED:
|
||||
token_ = Token::IDENTIFIER;
|
||||
break;
|
||||
case C:
|
||||
if (MatchState(input, 'a', CA)) return;
|
||||
if (MatchState(input, 'o', CO)) return;
|
||||
break;
|
||||
case CA:
|
||||
if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
|
||||
if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
|
||||
break;
|
||||
case CO:
|
||||
if (MatchState(input, 'n', CON)) return;
|
||||
break;
|
||||
case CON:
|
||||
if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
|
||||
if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
|
||||
break;
|
||||
case D:
|
||||
if (MatchState(input, 'e', DE)) return;
|
||||
if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
|
||||
break;
|
||||
case DE:
|
||||
if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
|
||||
if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
|
||||
if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
|
||||
break;
|
||||
case F:
|
||||
if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
|
||||
if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
|
||||
if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
|
||||
if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
|
||||
break;
|
||||
case I:
|
||||
if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
|
||||
if (MatchKeyword(input, 'n', IN, Token::IN)) return;
|
||||
break;
|
||||
case IN:
|
||||
token_ = Token::IDENTIFIER;
|
||||
if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case N:
|
||||
if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
|
||||
if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
|
||||
if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
|
||||
break;
|
||||
case T:
|
||||
if (MatchState(input, 'h', TH)) return;
|
||||
if (MatchState(input, 'r', TR)) return;
|
||||
if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
|
||||
break;
|
||||
case TH:
|
||||
if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
|
||||
if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
|
||||
break;
|
||||
case TR:
|
||||
if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
|
||||
if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
|
||||
break;
|
||||
case V:
|
||||
if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
|
||||
if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
|
||||
break;
|
||||
case W:
|
||||
if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
|
||||
if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
// On fallthrough, it's a failure.
|
||||
state_ = UNMATCHABLE;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Scanner
|
||||
|
||||
@ -855,48 +988,40 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
|
||||
|
||||
Token::Value Scanner::ScanIdentifier() {
|
||||
ASSERT(kIsIdentifierStart.get(c0_));
|
||||
bool has_escapes = false;
|
||||
|
||||
StartLiteral();
|
||||
KeywordMatcher keyword_match;
|
||||
|
||||
// Scan identifier start character.
|
||||
if (c0_ == '\\') {
|
||||
has_escapes = true;
|
||||
uc32 c = ScanIdentifierUnicodeEscape();
|
||||
// Only allow legal identifier start characters.
|
||||
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
||||
AddChar(c);
|
||||
keyword_match.Fail();
|
||||
} else {
|
||||
AddChar(c0_);
|
||||
keyword_match.AddChar(c0_);
|
||||
Advance();
|
||||
}
|
||||
|
||||
// Scan the rest of the identifier characters.
|
||||
while (kIsIdentifierPart.get(c0_)) {
|
||||
if (c0_ == '\\') {
|
||||
has_escapes = true;
|
||||
uc32 c = ScanIdentifierUnicodeEscape();
|
||||
// Only allow legal identifier part characters.
|
||||
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
||||
AddChar(c);
|
||||
keyword_match.Fail();
|
||||
} else {
|
||||
AddChar(c0_);
|
||||
keyword_match.AddChar(c0_);
|
||||
Advance();
|
||||
}
|
||||
}
|
||||
TerminateLiteral();
|
||||
|
||||
// We don't have any 1-letter keywords (this is probably a common case).
|
||||
if ((next_.literal_end - next_.literal_pos) == 1) {
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
|
||||
// If the identifier contains unicode escapes, it must not be
|
||||
// resolved to a keyword.
|
||||
if (has_escapes) {
|
||||
return Token::IDENTIFIER;
|
||||
}
|
||||
|
||||
return Token::Lookup(&literals_.data()[next_.literal_pos]);
|
||||
return keyword_match.token();
|
||||
}
|
||||
|
||||
|
||||
|
115
src/scanner.h
115
src/scanner.h
@ -123,6 +123,121 @@ class TwoByteStringUTF16Buffer: public UTF16Buffer {
|
||||
};
|
||||
|
||||
|
||||
class KeywordMatcher {
|
||||
// Incrementally recognize keywords.
|
||||
//
|
||||
// Recognized keywords:
|
||||
// break case catch const* continue debugger* default delete do else
|
||||
// finally false for function if in instanceof native* new null
|
||||
// return switch this throw true try typeof var void while with
|
||||
//
|
||||
// *: Actually "future reserved keywords". These are the only ones we
|
||||
// recognized, the remaining are allowed as identifiers.
|
||||
public:
|
||||
KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {}
|
||||
|
||||
Token::Value token() { return token_; }
|
||||
|
||||
inline void AddChar(uc32 input) {
|
||||
if (state_ != UNMATCHABLE) {
|
||||
Step(input);
|
||||
}
|
||||
}
|
||||
|
||||
void Fail() {
|
||||
token_ = Token::IDENTIFIER;
|
||||
state_ = UNMATCHABLE;
|
||||
}
|
||||
|
||||
private:
|
||||
enum State {
|
||||
UNMATCHABLE,
|
||||
INITIAL,
|
||||
KEYWORD_PREFIX,
|
||||
KEYWORD_MATCHED,
|
||||
C,
|
||||
CA,
|
||||
CO,
|
||||
CON,
|
||||
D,
|
||||
DE,
|
||||
F,
|
||||
I,
|
||||
IN,
|
||||
N,
|
||||
T,
|
||||
TH,
|
||||
TR,
|
||||
V,
|
||||
W
|
||||
};
|
||||
|
||||
struct FirstState {
|
||||
const char* keyword;
|
||||
State state;
|
||||
Token::Value token;
|
||||
};
|
||||
|
||||
// Range of possible first characters of a keyword.
|
||||
static const unsigned int kFirstCharRangeMin = 'b';
|
||||
static const unsigned int kFirstCharRangeMax = 'w';
|
||||
static const unsigned int kFirstCharRangeLength =
|
||||
kFirstCharRangeMax - kFirstCharRangeMin + 1;
|
||||
// State map for first keyword character range.
|
||||
static FirstState first_states_[kFirstCharRangeLength];
|
||||
|
||||
// Current state.
|
||||
State state_;
|
||||
// Token for currently added characters.
|
||||
Token::Value token_;
|
||||
|
||||
// Matching a specific keyword string (there is only one possible valid
|
||||
// keyword with the current prefix).
|
||||
const char* keyword_;
|
||||
int counter_;
|
||||
Token::Value keyword_token_;
|
||||
|
||||
// If input equals keyword's character at position, continue matching keyword
|
||||
// from that position.
|
||||
inline bool MatchKeywordStart(uc32 input,
|
||||
const char* keyword,
|
||||
int position,
|
||||
Token::Value token_if_match) {
|
||||
if (input == keyword[position]) {
|
||||
state_ = KEYWORD_PREFIX;
|
||||
this->keyword_ = keyword;
|
||||
this->counter_ = position + 1;
|
||||
this->keyword_token_ = token_if_match;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// If input equals match character, transition to new state and return true.
|
||||
inline bool MatchState(uc32 input, char match, State new_state) {
|
||||
if (input == match) {
|
||||
state_ = new_state;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool MatchKeyword(uc32 input,
|
||||
char match,
|
||||
State new_state,
|
||||
Token::Value keyword_token) {
|
||||
if (input == match) { // Matched "do".
|
||||
state_ = new_state;
|
||||
token_ = keyword_token;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Step(uc32 input);
|
||||
};
|
||||
|
||||
|
||||
class Scanner {
|
||||
public:
|
||||
|
||||
|
@ -52,6 +52,7 @@ SOURCES = {
|
||||
'test-log.cc',
|
||||
'test-log-utils.cc',
|
||||
'test-mark-compact.cc',
|
||||
'test-parsing.cc',
|
||||
'test-regexp.cc',
|
||||
'test-serialize.cc',
|
||||
'test-sockets.cc',
|
||||
|
128
test/cctest/test-parsing.cc
Executable file
128
test/cctest/test-parsing.cc
Executable file
@ -0,0 +1,128 @@
|
||||
// Copyright 2006-2009 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "v8.h"
|
||||
|
||||
#include "token.h"
|
||||
#include "scanner.h"
|
||||
|
||||
#include "cctest.h"
|
||||
|
||||
namespace i = ::v8::internal;
|
||||
|
||||
TEST(KeywordMatcher) {
|
||||
struct KeywordToken {
|
||||
const char* keyword;
|
||||
i::Token::Value token;
|
||||
};
|
||||
|
||||
static const KeywordToken keywords[] = {
|
||||
#define KEYWORD(t, s, d) { s, i::Token::t },
|
||||
#define IGNORE(t, s, d) /* */
|
||||
TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
|
||||
#undef KEYWORD
|
||||
{ NULL, i::Token::IDENTIFIER }
|
||||
};
|
||||
|
||||
static const char* future_keywords[] = {
|
||||
#define FUTURE(t, s, d) s,
|
||||
TOKEN_LIST(IGNORE, IGNORE, FUTURE)
|
||||
#undef FUTURE
|
||||
#undef IGNORE
|
||||
NULL
|
||||
};
|
||||
|
||||
KeywordToken key_token;
|
||||
for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
|
||||
i::KeywordMatcher matcher;
|
||||
const char* keyword = key_token.keyword;
|
||||
int length = strlen(keyword);
|
||||
for (int j = 0; j < length; j++) {
|
||||
if (key_token.token == i::Token::INSTANCEOF && j == 2) {
|
||||
// "in" is a prefix of "instanceof". It's the only keyword
|
||||
// that is a prefix of another.
|
||||
CHECK_EQ(i::Token::IN, matcher.token());
|
||||
} else {
|
||||
CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
|
||||
}
|
||||
matcher.AddChar(keyword[j]);
|
||||
}
|
||||
CHECK_EQ(key_token.token, matcher.token());
|
||||
// Adding more characters will make keyword matching fail.
|
||||
matcher.AddChar('z');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
|
||||
// Adding a keyword later will not make it match again.
|
||||
matcher.AddChar('i');
|
||||
matcher.AddChar('f');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
|
||||
}
|
||||
|
||||
// Future keywords are not recognized.
|
||||
const char* future_keyword;
|
||||
for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
|
||||
i::KeywordMatcher matcher;
|
||||
int length = strlen(future_keyword);
|
||||
for (int j = 0; j < length; j++) {
|
||||
matcher.AddChar(future_keyword[j]);
|
||||
}
|
||||
CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
|
||||
}
|
||||
|
||||
// Zero isn't ignored at first.
|
||||
i::KeywordMatcher bad_start;
|
||||
bad_start.AddChar(0);
|
||||
CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
|
||||
bad_start.AddChar('i');
|
||||
bad_start.AddChar('f');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
|
||||
|
||||
// Zero isn't ignored at end.
|
||||
i::KeywordMatcher bad_end;
|
||||
bad_end.AddChar('i');
|
||||
bad_end.AddChar('f');
|
||||
CHECK_EQ(i::Token::IF, bad_end.token());
|
||||
bad_end.AddChar(0);
|
||||
CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
|
||||
|
||||
// Case isn't ignored.
|
||||
i::KeywordMatcher bad_case;
|
||||
bad_case.AddChar('i');
|
||||
bad_case.AddChar('F');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
|
||||
|
||||
// If we mark it as failure, continuing won't help.
|
||||
i::KeywordMatcher full_stop;
|
||||
full_stop.AddChar('i');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
|
||||
full_stop.Fail();
|
||||
CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
|
||||
full_stop.AddChar('f');
|
||||
CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user