2011-04-12 08:27:38 +00:00
|
|
|
// Copyright 2011 the V8 project authors. All rights reserved.
|
2014-04-29 06:42:26 +00:00
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// Features shared by parsing and pre-parsing scanners.
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2013-10-10 11:58:16 +00:00
|
|
|
#include <cmath>
|
|
|
|
|
2014-06-20 08:40:11 +00:00
|
|
|
#include "src/v8.h"
|
2014-06-03 08:12:43 +00:00
|
|
|
|
|
|
|
#include "include/v8stdint.h"
|
2014-06-24 14:03:24 +00:00
|
|
|
#include "src/ast-value-factory.h"
|
2014-06-03 08:12:43 +00:00
|
|
|
#include "src/char-predicates-inl.h"
|
|
|
|
#include "src/conversions-inl.h"
|
|
|
|
#include "src/list-inl.h"
|
|
|
|
#include "src/parser.h"
|
2014-06-20 08:40:11 +00:00
|
|
|
#include "src/scanner.h"
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2009-05-25 10:05:56 +00:00
|
|
|
namespace v8 {
|
|
|
|
namespace internal {
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2014-07-02 07:01:31 +00:00
|
|
|
|
|
|
|
Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {
|
|
|
|
if (is_one_byte()) {
|
|
|
|
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
|
|
|
|
}
|
|
|
|
return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// ----------------------------------------------------------------------------
|
2011-11-01 07:47:15 +00:00
|
|
|
// Scanner
|
2011-09-12 07:49:01 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Scanner::Scanner(UnicodeCache* unicode_cache)
|
|
|
|
: unicode_cache_(unicode_cache),
|
|
|
|
octal_pos_(Location::invalid()),
|
2012-02-08 10:53:58 +00:00
|
|
|
harmony_scoping_(false),
|
2013-07-19 09:57:35 +00:00
|
|
|
harmony_modules_(false),
|
|
|
|
harmony_numeric_literals_(false) { }
|
2011-09-12 07:49:01 +00:00
|
|
|
|
|
|
|
|
2012-03-12 12:35:28 +00:00
|
|
|
void Scanner::Initialize(Utf16CharacterStream* source) {
|
2011-11-01 07:47:15 +00:00
|
|
|
source_ = source;
|
|
|
|
// Need to capture identifiers in order to recognize "get" and "set"
|
|
|
|
// in object literals.
|
|
|
|
Init();
|
|
|
|
// Skip initial whitespace allowing HTML comment ends just like
|
|
|
|
// after a newline and scan first token.
|
|
|
|
has_line_terminator_before_next_ = true;
|
|
|
|
SkipWhiteSpace();
|
|
|
|
Scan();
|
2011-09-12 07:49:01 +00:00
|
|
|
}
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
uc32 Scanner::ScanHexNumber(int expected_length) {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(expected_length <= 4); // prevent overflow
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
uc32 digits[4] = { 0, 0, 0, 0 };
|
|
|
|
uc32 x = 0;
|
|
|
|
for (int i = 0; i < expected_length; i++) {
|
|
|
|
digits[i] = c0_;
|
|
|
|
int d = HexValue(c0_);
|
|
|
|
if (d < 0) {
|
|
|
|
// According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
|
|
|
|
// should be illegal, but other JS VMs just return the
|
|
|
|
// non-escaped version of the original character.
|
|
|
|
|
|
|
|
// Push back digits that we have advanced past.
|
|
|
|
for (int j = i-1; j >= 0; j--) {
|
|
|
|
PushBack(digits[j]);
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
x = x * 16 + d;
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
return x;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2009-11-09 12:18:25 +00:00
|
|
|
|
2009-08-18 07:14:02 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// Ensure that tokens can be stored in a byte.
|
|
|
|
STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
|
|
|
|
|
|
|
|
// Table of one-character tokens, by character (0x00..0x7f only).
|
|
|
|
static const byte one_char_tokens[] = {
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::LPAREN, // 0x28
|
|
|
|
Token::RPAREN, // 0x29
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::COMMA, // 0x2c
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::COLON, // 0x3a
|
|
|
|
Token::SEMICOLON, // 0x3b
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::CONDITIONAL, // 0x3f
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::LBRACK, // 0x5b
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::RBRACK, // 0x5d
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::LBRACE, // 0x7b
|
|
|
|
Token::ILLEGAL,
|
|
|
|
Token::RBRACE, // 0x7d
|
|
|
|
Token::BIT_NOT, // 0x7e
|
|
|
|
Token::ILLEGAL
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::Next() {
|
2011-09-08 13:06:44 +00:00
|
|
|
current_ = next_;
|
|
|
|
has_line_terminator_before_next_ = false;
|
|
|
|
has_multiline_comment_before_next_ = false;
|
|
|
|
if (static_cast<unsigned>(c0_) <= 0x7f) {
|
|
|
|
Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
|
|
|
|
if (token != Token::ILLEGAL) {
|
|
|
|
int pos = source_pos();
|
|
|
|
next_.token = token;
|
|
|
|
next_.location.beg_pos = pos;
|
|
|
|
next_.location.end_pos = pos + 1;
|
|
|
|
Advance();
|
|
|
|
return current_.token;
|
|
|
|
}
|
2011-01-14 10:49:18 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
Scan();
|
|
|
|
return current_.token;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-02-10 12:43:10 +00:00
|
|
|
// TODO(yangguo): check whether this is actually necessary.
|
|
|
|
static inline bool IsLittleEndianByteOrderMark(uc32 c) {
|
2011-09-08 13:06:44 +00:00
|
|
|
// The Unicode value U+FFFE is guaranteed never to be assigned as a
|
|
|
|
// Unicode character; this implies that in a Unicode context the
|
|
|
|
// 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
|
|
|
|
// character expressed in little-endian byte order (since it could
|
|
|
|
// not be a U+FFFE character expressed in big-endian byte
|
|
|
|
// order). Nevertheless, we check for it to be compatible with
|
|
|
|
// Spidermonkey.
|
2014-02-10 12:43:10 +00:00
|
|
|
return c == 0xFFFE;
|
2011-09-08 13:06:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
bool Scanner::SkipWhiteSpace() {
|
2011-09-08 13:06:44 +00:00
|
|
|
int start_position = source_pos();
|
|
|
|
|
|
|
|
while (true) {
|
2014-02-10 12:43:10 +00:00
|
|
|
while (true) {
|
|
|
|
// Advance as long as character is a WhiteSpace or LineTerminator.
|
|
|
|
// Remember if the latter is the case.
|
2011-09-08 13:06:44 +00:00
|
|
|
if (unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
has_line_terminator_before_next_ = true;
|
2014-02-10 12:43:10 +00:00
|
|
|
} else if (!unicode_cache_->IsWhiteSpace(c0_) &&
|
|
|
|
!IsLittleEndianByteOrderMark(c0_)) {
|
|
|
|
break;
|
2011-09-08 13:06:44 +00:00
|
|
|
}
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is an HTML comment end '-->' at the beginning of a
|
|
|
|
// line (with only whitespace in front of it), we treat the rest
|
|
|
|
// of the line as a comment. This is in line with the way
|
|
|
|
// SpiderMonkey handles it.
|
|
|
|
if (c0_ == '-' && has_line_terminator_before_next_) {
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '-') {
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '>') {
|
|
|
|
// Treat the rest of the line as a comment.
|
|
|
|
SkipSingleLineComment();
|
|
|
|
// Continue skipping white space after the comment.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
PushBack('-'); // undo Advance()
|
|
|
|
}
|
|
|
|
PushBack('-'); // undo Advance()
|
|
|
|
}
|
|
|
|
// Return whether or not we skipped any characters.
|
|
|
|
return source_pos() != start_position;
|
2010-03-03 13:16:10 +00:00
|
|
|
}
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2009-11-09 12:18:25 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::SkipSingleLineComment() {
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance();
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// The line terminator at the end of the line is not considered
|
|
|
|
// to be part of the single-line comment; it is recognized
|
|
|
|
// separately by the lexical grammar and becomes part of the
|
|
|
|
// stream of input elements for the syntactic grammar (see
|
|
|
|
// ECMA-262, section 7.4).
|
|
|
|
while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
Advance();
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
return Token::WHITESPACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-02 07:01:31 +00:00
|
|
|
Token::Value Scanner::SkipSourceURLComment() {
|
|
|
|
TryToParseSourceURLComment();
|
|
|
|
while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
return Token::WHITESPACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Scanner::TryToParseSourceURLComment() {
|
|
|
|
// Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
|
|
|
|
// function will just return if it cannot parse a magic comment.
|
|
|
|
if (!unicode_cache_->IsWhiteSpace(c0_))
|
|
|
|
return;
|
|
|
|
Advance();
|
|
|
|
LiteralBuffer name;
|
|
|
|
while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) &&
|
|
|
|
c0_ != '=') {
|
|
|
|
name.AddChar(c0_);
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
if (!name.is_one_byte()) return;
|
|
|
|
Vector<const uint8_t> name_literal = name.one_byte_literal();
|
|
|
|
LiteralBuffer* value;
|
|
|
|
if (name_literal == STATIC_ASCII_VECTOR("sourceURL")) {
|
|
|
|
value = &source_url_;
|
|
|
|
} else if (name_literal == STATIC_ASCII_VECTOR("sourceMappingURL")) {
|
|
|
|
value = &source_mapping_url_;
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (c0_ != '=')
|
|
|
|
return;
|
|
|
|
Advance();
|
|
|
|
value->Reset();
|
|
|
|
while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) {
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
// Disallowed characters.
|
|
|
|
if (c0_ == '"' || c0_ == '\'') {
|
|
|
|
value->Reset();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (unicode_cache_->IsWhiteSpace(c0_)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
value->AddChar(c0_);
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
// Allow whitespace at the end.
|
|
|
|
while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
if (!unicode_cache_->IsWhiteSpace(c0_)) {
|
|
|
|
value->Reset();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::SkipMultiLineComment() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(c0_ == '*');
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance();
|
|
|
|
|
|
|
|
while (c0_ >= 0) {
|
|
|
|
uc32 ch = c0_;
|
|
|
|
Advance();
|
|
|
|
if (unicode_cache_->IsLineTerminator(ch)) {
|
|
|
|
// Following ECMA-262, section 7.4, a comment containing
|
|
|
|
// a newline will make the comment count as a line-terminator.
|
|
|
|
has_multiline_comment_before_next_ = true;
|
|
|
|
}
|
|
|
|
// If we have reached the end of the multi-line comment, we
|
|
|
|
// consume the '/' and insert a whitespace. This way all
|
|
|
|
// multi-line comments are treated as whitespace.
|
|
|
|
if (ch == '*' && c0_ == '/') {
|
|
|
|
c0_ = ' ';
|
|
|
|
return Token::WHITESPACE;
|
|
|
|
}
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
// Unterminated multi-line comment.
|
|
|
|
return Token::ILLEGAL;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2009-11-09 12:18:25 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::ScanHtmlComment() {
|
2011-09-08 13:06:44 +00:00
|
|
|
// Check for <!-- comments.
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(c0_ == '!');
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance();
|
|
|
|
if (c0_ == '-') {
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '-') return SkipSingleLineComment();
|
|
|
|
PushBack('-'); // undo Advance()
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
PushBack('!'); // undo Advance()
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(c0_ == '!');
|
2011-09-08 13:06:44 +00:00
|
|
|
return Token::LT;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
void Scanner::Scan() {
|
2011-09-08 13:06:44 +00:00
|
|
|
next_.literal_chars = NULL;
|
|
|
|
Token::Value token;
|
|
|
|
do {
|
|
|
|
// Remember the position of the next token
|
|
|
|
next_.location.beg_pos = source_pos();
|
|
|
|
|
|
|
|
switch (c0_) {
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
Advance();
|
|
|
|
token = Token::WHITESPACE;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
Advance();
|
|
|
|
has_line_terminator_before_next_ = true;
|
|
|
|
token = Token::WHITESPACE;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '"': case '\'':
|
|
|
|
token = ScanString();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '<':
|
|
|
|
// < <= << <<= <!--
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '=') {
|
|
|
|
token = Select(Token::LTE);
|
|
|
|
} else if (c0_ == '<') {
|
|
|
|
token = Select('=', Token::ASSIGN_SHL, Token::SHL);
|
|
|
|
} else if (c0_ == '!') {
|
|
|
|
token = ScanHtmlComment();
|
|
|
|
} else {
|
|
|
|
token = Token::LT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '>':
|
|
|
|
// > >= >> >>= >>> >>>=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '=') {
|
|
|
|
token = Select(Token::GTE);
|
|
|
|
} else if (c0_ == '>') {
|
|
|
|
// >> >>= >>> >>>=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_SAR);
|
|
|
|
} else if (c0_ == '>') {
|
|
|
|
token = Select('=', Token::ASSIGN_SHR, Token::SHR);
|
|
|
|
} else {
|
|
|
|
token = Token::SAR;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
token = Token::GT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '=':
|
Implement handling of arrow functions in the parser
Arrow functions are parsed from ParseAssignmentExpression(). Handling the
parameter list is done by letting ParseConditionalExpression() parse a comma
separated list of identifiers, and it returns a tree of BinaryOperation nodes
with VariableProxy leaves, or a single VariableProxy if there is only one
parameter. When the arrow token "=>" is found, the VariableProxy nodes are
passed to ParseArrowFunctionLiteral(), which will then skip parsing the
paramaeter list. This avoids having to rewind when the arrow is found and
restart parsing the parameter list.
Note that the empty parameter list "()" is handled directly in
ParsePrimaryExpression(): after is has consumed the opening parenthesis,
if a closing parenthesis follows, then the only valid input is an arrow
function. In this case, ParsePrimaryExpression() directly calls
ParseArrowFunctionLiteral(), to avoid needing to return a sentinel value
to signal the empty parameter list. Because it will consume the body of
the arrow function, ParseAssignmentExpression() will not see the arrow
"=>" token as next, and return the already-parser expression.
The implementation is done in ParserBase, so it was needed to do some
additions to ParserBase, ParserTraits and PreParserTraits. Some of the
glue code can be removed later on when more more functionality is moved
to ParserBase.
Additionally, this adds a runtime flag "harmony_arrow_functions"
(disabled by default); enabling "harmony" will enable it as well.
BUG=v8:2700
LOG=N
R=marja@chromium.org
Review URL: https://codereview.chromium.org/383983002
Patch from Adrián Pérez de Castro <aperez@igalia.com>.
git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22366 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2014-07-14 07:55:45 +00:00
|
|
|
// = == === =>
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance();
|
|
|
|
if (c0_ == '=') {
|
|
|
|
token = Select('=', Token::EQ_STRICT, Token::EQ);
|
Implement handling of arrow functions in the parser
Arrow functions are parsed from ParseAssignmentExpression(). Handling the
parameter list is done by letting ParseConditionalExpression() parse a comma
separated list of identifiers, and it returns a tree of BinaryOperation nodes
with VariableProxy leaves, or a single VariableProxy if there is only one
parameter. When the arrow token "=>" is found, the VariableProxy nodes are
passed to ParseArrowFunctionLiteral(), which will then skip parsing the
paramaeter list. This avoids having to rewind when the arrow is found and
restart parsing the parameter list.
Note that the empty parameter list "()" is handled directly in
ParsePrimaryExpression(): after is has consumed the opening parenthesis,
if a closing parenthesis follows, then the only valid input is an arrow
function. In this case, ParsePrimaryExpression() directly calls
ParseArrowFunctionLiteral(), to avoid needing to return a sentinel value
to signal the empty parameter list. Because it will consume the body of
the arrow function, ParseAssignmentExpression() will not see the arrow
"=>" token as next, and return the already-parser expression.
The implementation is done in ParserBase, so it was needed to do some
additions to ParserBase, ParserTraits and PreParserTraits. Some of the
glue code can be removed later on when more more functionality is moved
to ParserBase.
Additionally, this adds a runtime flag "harmony_arrow_functions"
(disabled by default); enabling "harmony" will enable it as well.
BUG=v8:2700
LOG=N
R=marja@chromium.org
Review URL: https://codereview.chromium.org/383983002
Patch from Adrián Pérez de Castro <aperez@igalia.com>.
git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22366 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2014-07-14 07:55:45 +00:00
|
|
|
} else if (c0_ == '>') {
|
|
|
|
token = Select(Token::ARROW);
|
2011-09-08 13:06:44 +00:00
|
|
|
} else {
|
|
|
|
token = Token::ASSIGN;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '!':
|
|
|
|
// ! != !==
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '=') {
|
|
|
|
token = Select('=', Token::NE_STRICT, Token::NE);
|
|
|
|
} else {
|
|
|
|
token = Token::NOT;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '+':
|
|
|
|
// + ++ +=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '+') {
|
|
|
|
token = Select(Token::INC);
|
|
|
|
} else if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_ADD);
|
|
|
|
} else {
|
|
|
|
token = Token::ADD;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
// - -- --> -=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '-') {
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '>' && has_line_terminator_before_next_) {
|
|
|
|
// For compatibility with SpiderMonkey, we skip lines that
|
|
|
|
// start with an HTML comment end '-->'.
|
|
|
|
token = SkipSingleLineComment();
|
|
|
|
} else {
|
|
|
|
token = Token::DEC;
|
|
|
|
}
|
|
|
|
} else if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_SUB);
|
|
|
|
} else {
|
|
|
|
token = Token::SUB;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '*':
|
|
|
|
// * *=
|
|
|
|
token = Select('=', Token::ASSIGN_MUL, Token::MUL);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '%':
|
|
|
|
// % %=
|
|
|
|
token = Select('=', Token::ASSIGN_MOD, Token::MOD);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '/':
|
|
|
|
// / // /* /=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '/') {
|
2014-07-02 07:01:31 +00:00
|
|
|
Advance();
|
|
|
|
if (c0_ == '@' || c0_ == '#') {
|
|
|
|
Advance();
|
|
|
|
token = SkipSourceURLComment();
|
|
|
|
} else {
|
|
|
|
PushBack(c0_);
|
|
|
|
token = SkipSingleLineComment();
|
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
} else if (c0_ == '*') {
|
|
|
|
token = SkipMultiLineComment();
|
|
|
|
} else if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_DIV);
|
|
|
|
} else {
|
|
|
|
token = Token::DIV;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '&':
|
|
|
|
// & && &=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '&') {
|
|
|
|
token = Select(Token::AND);
|
|
|
|
} else if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_BIT_AND);
|
|
|
|
} else {
|
|
|
|
token = Token::BIT_AND;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '|':
|
|
|
|
// | || |=
|
|
|
|
Advance();
|
|
|
|
if (c0_ == '|') {
|
|
|
|
token = Select(Token::OR);
|
|
|
|
} else if (c0_ == '=') {
|
|
|
|
token = Select(Token::ASSIGN_BIT_OR);
|
|
|
|
} else {
|
|
|
|
token = Token::BIT_OR;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
// ^ ^=
|
|
|
|
token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
// . Number
|
|
|
|
Advance();
|
|
|
|
if (IsDecimalDigit(c0_)) {
|
|
|
|
token = ScanNumber(true);
|
|
|
|
} else {
|
|
|
|
token = Token::PERIOD;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ':':
|
|
|
|
token = Select(Token::COLON);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ';':
|
|
|
|
token = Select(Token::SEMICOLON);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
token = Select(Token::COMMA);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '(':
|
|
|
|
token = Select(Token::LPAREN);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ')':
|
|
|
|
token = Select(Token::RPAREN);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '[':
|
|
|
|
token = Select(Token::LBRACK);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ']':
|
|
|
|
token = Select(Token::RBRACK);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '{':
|
|
|
|
token = Select(Token::LBRACE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '}':
|
|
|
|
token = Select(Token::RBRACE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '?':
|
|
|
|
token = Select(Token::CONDITIONAL);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '~':
|
|
|
|
token = Select(Token::BIT_NOT);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
if (unicode_cache_->IsIdentifierStart(c0_)) {
|
|
|
|
token = ScanIdentifierOrKeyword();
|
|
|
|
} else if (IsDecimalDigit(c0_)) {
|
|
|
|
token = ScanNumber(false);
|
|
|
|
} else if (SkipWhiteSpace()) {
|
|
|
|
token = Token::WHITESPACE;
|
|
|
|
} else if (c0_ < 0) {
|
|
|
|
token = Token::EOS;
|
|
|
|
} else {
|
|
|
|
token = Select(Token::ILLEGAL);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue scanning for tokens as long as we're just skipping
|
|
|
|
// whitespace.
|
|
|
|
} while (token == Token::WHITESPACE);
|
|
|
|
|
|
|
|
next_.location.end_pos = source_pos();
|
|
|
|
next_.token = token;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
void Scanner::SeekForward(int pos) {
|
2011-09-08 13:06:44 +00:00
|
|
|
// After this call, we will have the token at the given position as
|
|
|
|
// the "next" token. The "current" token will be invalid.
|
|
|
|
if (pos == next_.location.beg_pos) return;
|
|
|
|
int current_pos = source_pos();
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK_EQ(next_.location.end_pos, current_pos);
|
2011-09-08 13:06:44 +00:00
|
|
|
// Positions inside the lookahead token aren't supported.
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(pos >= current_pos);
|
2011-09-08 13:06:44 +00:00
|
|
|
if (pos != current_pos) {
|
|
|
|
source_->SeekForward(pos - source_->pos());
|
|
|
|
Advance();
|
|
|
|
// This function is only called to seek to the location
|
|
|
|
// of the end of a function (at the "}" token). It doesn't matter
|
|
|
|
// whether there was a line terminator in the part we skip.
|
|
|
|
has_line_terminator_before_next_ = false;
|
|
|
|
has_multiline_comment_before_next_ = false;
|
|
|
|
}
|
|
|
|
Scan();
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-04-16 15:54:02 +00:00
|
|
|
bool Scanner::ScanEscape() {
|
2011-09-08 13:06:44 +00:00
|
|
|
uc32 c = c0_;
|
|
|
|
Advance();
|
|
|
|
|
|
|
|
// Skip escaped newlines.
|
|
|
|
if (unicode_cache_->IsLineTerminator(c)) {
|
|
|
|
// Allow CR+LF newlines in multiline string literals.
|
|
|
|
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
|
|
|
|
// Allow LF+CR newlines in multiline string literals.
|
|
|
|
if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
|
2012-04-16 15:54:02 +00:00
|
|
|
return true;
|
2011-09-08 13:06:44 +00:00
|
|
|
}
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
switch (c) {
|
|
|
|
case '\'': // fall through
|
|
|
|
case '"' : // fall through
|
|
|
|
case '\\': break;
|
|
|
|
case 'b' : c = '\b'; break;
|
|
|
|
case 'f' : c = '\f'; break;
|
|
|
|
case 'n' : c = '\n'; break;
|
|
|
|
case 'r' : c = '\r'; break;
|
|
|
|
case 't' : c = '\t'; break;
|
|
|
|
case 'u' : {
|
|
|
|
c = ScanHexNumber(4);
|
2012-04-16 15:54:02 +00:00
|
|
|
if (c < 0) return false;
|
2011-09-08 13:06:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'v' : c = '\v'; break;
|
|
|
|
case 'x' : {
|
|
|
|
c = ScanHexNumber(2);
|
2012-04-16 15:54:02 +00:00
|
|
|
if (c < 0) return false;
|
2011-09-08 13:06:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '0' : // fall through
|
|
|
|
case '1' : // fall through
|
|
|
|
case '2' : // fall through
|
|
|
|
case '3' : // fall through
|
|
|
|
case '4' : // fall through
|
|
|
|
case '5' : // fall through
|
|
|
|
case '6' : // fall through
|
|
|
|
case '7' : c = ScanOctalEscape(c, 2); break;
|
|
|
|
}
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2012-04-16 15:54:02 +00:00
|
|
|
// According to ECMA-262, section 7.8.4, characters not covered by the
|
|
|
|
// above cases should be illegal, but they are commonly handled as
|
|
|
|
// non-escaped characters by JS VMs.
|
2011-09-08 13:06:44 +00:00
|
|
|
AddLiteralChar(c);
|
2012-04-16 15:54:02 +00:00
|
|
|
return true;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
|
|
|
|
// ECMA-262. Other JS VMs support them.
|
2011-11-01 07:47:15 +00:00
|
|
|
uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
|
2011-09-08 13:06:44 +00:00
|
|
|
uc32 x = c - '0';
|
|
|
|
int i = 0;
|
|
|
|
for (; i < length; i++) {
|
|
|
|
int d = c0_ - '0';
|
|
|
|
if (d < 0 || d > 7) break;
|
|
|
|
int nx = x * 8 + d;
|
|
|
|
if (nx >= 256) break;
|
|
|
|
x = nx;
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
// Anything except '\0' is an octal escape sequence, illegal in strict mode.
|
|
|
|
// Remember the position of octal escape sequences so that an error
|
|
|
|
// can be reported later (in strict mode).
|
|
|
|
// We don't report the error immediately, because the octal escape can
|
|
|
|
// occur before the "use strict" directive.
|
|
|
|
if (c != '0' || i > 0) {
|
|
|
|
octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
return x;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::ScanString() {
|
2011-09-08 13:06:44 +00:00
|
|
|
uc32 quote = c0_;
|
|
|
|
Advance(); // consume quote
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
LiteralScope literal(this);
|
|
|
|
while (c0_ != quote && c0_ >= 0
|
|
|
|
&& !unicode_cache_->IsLineTerminator(c0_)) {
|
|
|
|
uc32 c = c0_;
|
|
|
|
Advance();
|
|
|
|
if (c == '\\') {
|
2012-04-16 15:54:02 +00:00
|
|
|
if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
|
2011-09-08 13:06:44 +00:00
|
|
|
} else {
|
|
|
|
AddLiteralChar(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (c0_ != quote) return Token::ILLEGAL;
|
|
|
|
literal.Complete();
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance(); // consume quote
|
|
|
|
return Token::STRING;
|
|
|
|
}
|
2010-12-07 14:03:59 +00:00
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
void Scanner::ScanDecimalDigits() {
|
2011-09-08 13:06:44 +00:00
|
|
|
while (IsDecimalDigit(c0_))
|
|
|
|
AddLiteralCharAdvance();
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::ScanNumber(bool seen_period) {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
|
2011-09-08 13:06:44 +00:00
|
|
|
|
2013-07-19 09:57:35 +00:00
|
|
|
enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
LiteralScope literal(this);
|
|
|
|
if (seen_period) {
|
|
|
|
// we have already seen a decimal point of the float
|
|
|
|
AddLiteralChar('.');
|
|
|
|
ScanDecimalDigits(); // we know we have at least one digit
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// if the first character is '0' we must check for octals and hex
|
|
|
|
if (c0_ == '0') {
|
|
|
|
int start_pos = source_pos(); // For reporting octal positions.
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
|
2013-07-19 09:57:35 +00:00
|
|
|
// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
|
|
|
|
// an octal number.
|
2011-09-08 13:06:44 +00:00
|
|
|
if (c0_ == 'x' || c0_ == 'X') {
|
|
|
|
// hex number
|
|
|
|
kind = HEX;
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (!IsHexDigit(c0_)) {
|
|
|
|
// we must have at least one hex digit after 'x'/'X'
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
while (IsHexDigit(c0_)) {
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
}
|
2013-07-19 09:57:35 +00:00
|
|
|
} else if (harmony_numeric_literals_ && (c0_ == 'o' || c0_ == 'O')) {
|
|
|
|
kind = OCTAL;
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (!IsOctalDigit(c0_)) {
|
|
|
|
// we must have at least one octal digit after 'o'/'O'
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
while (IsOctalDigit(c0_)) {
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
}
|
|
|
|
} else if (harmony_numeric_literals_ && (c0_ == 'b' || c0_ == 'B')) {
|
|
|
|
kind = BINARY;
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (!IsBinaryDigit(c0_)) {
|
|
|
|
// we must have at least one binary digit after 'b'/'B'
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
while (IsBinaryDigit(c0_)) {
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
} else if ('0' <= c0_ && c0_ <= '7') {
|
|
|
|
// (possible) octal number
|
2013-07-19 09:57:35 +00:00
|
|
|
kind = IMPLICIT_OCTAL;
|
2011-09-08 13:06:44 +00:00
|
|
|
while (true) {
|
|
|
|
if (c0_ == '8' || c0_ == '9') {
|
|
|
|
kind = DECIMAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c0_ < '0' || '7' < c0_) {
|
|
|
|
// Octal literal finished.
|
|
|
|
octal_pos_ = Location(start_pos, source_pos());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse decimal digits and allow trailing fractional part.
|
|
|
|
if (kind == DECIMAL) {
|
|
|
|
ScanDecimalDigits(); // optional
|
|
|
|
if (c0_ == '.') {
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
ScanDecimalDigits(); // optional
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// scan exponent, if any
|
|
|
|
if (c0_ == 'e' || c0_ == 'E') {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
|
2013-07-19 09:57:35 +00:00
|
|
|
if (kind != DECIMAL) return Token::ILLEGAL;
|
2011-09-08 13:06:44 +00:00
|
|
|
// scan exponent
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (c0_ == '+' || c0_ == '-')
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (!IsDecimalDigit(c0_)) {
|
|
|
|
// we must have at least one decimal digit after 'e'/'E'
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
ScanDecimalDigits();
|
|
|
|
}
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// The source character immediately following a numeric literal must
|
|
|
|
// not be an identifier start or a decimal digit; see ECMA-262
|
|
|
|
// section 7.8.3, page 17 (note that we read only one decimal digit
|
|
|
|
// if the value is 0).
|
|
|
|
if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_))
|
|
|
|
return Token::ILLEGAL;
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
literal.Complete();
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
return Token::NUMBER;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
uc32 Scanner::ScanIdentifierUnicodeEscape() {
|
2011-09-08 13:06:44 +00:00
|
|
|
Advance();
|
|
|
|
if (c0_ != 'u') return -1;
|
|
|
|
Advance();
|
|
|
|
uc32 result = ScanHexNumber(4);
|
|
|
|
if (result < 0) PushBack('u');
|
|
|
|
return result;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// Keyword Matcher
|
|
|
|
|
|
|
|
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
|
|
|
|
KEYWORD_GROUP('b') \
|
|
|
|
KEYWORD("break", Token::BREAK) \
|
|
|
|
KEYWORD_GROUP('c') \
|
|
|
|
KEYWORD("case", Token::CASE) \
|
|
|
|
KEYWORD("catch", Token::CATCH) \
|
|
|
|
KEYWORD("class", Token::FUTURE_RESERVED_WORD) \
|
|
|
|
KEYWORD("const", Token::CONST) \
|
|
|
|
KEYWORD("continue", Token::CONTINUE) \
|
|
|
|
KEYWORD_GROUP('d') \
|
|
|
|
KEYWORD("debugger", Token::DEBUGGER) \
|
|
|
|
KEYWORD("default", Token::DEFAULT) \
|
|
|
|
KEYWORD("delete", Token::DELETE) \
|
|
|
|
KEYWORD("do", Token::DO) \
|
|
|
|
KEYWORD_GROUP('e') \
|
|
|
|
KEYWORD("else", Token::ELSE) \
|
|
|
|
KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \
|
2012-02-08 10:53:58 +00:00
|
|
|
KEYWORD("export", harmony_modules \
|
|
|
|
? Token::EXPORT : Token::FUTURE_RESERVED_WORD) \
|
2011-09-08 13:06:44 +00:00
|
|
|
KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \
|
|
|
|
KEYWORD_GROUP('f') \
|
|
|
|
KEYWORD("false", Token::FALSE_LITERAL) \
|
|
|
|
KEYWORD("finally", Token::FINALLY) \
|
|
|
|
KEYWORD("for", Token::FOR) \
|
|
|
|
KEYWORD("function", Token::FUNCTION) \
|
|
|
|
KEYWORD_GROUP('i') \
|
|
|
|
KEYWORD("if", Token::IF) \
|
|
|
|
KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
|
2012-02-08 10:53:58 +00:00
|
|
|
KEYWORD("import", harmony_modules \
|
|
|
|
? Token::IMPORT : Token::FUTURE_RESERVED_WORD) \
|
2011-09-08 13:06:44 +00:00
|
|
|
KEYWORD("in", Token::IN) \
|
|
|
|
KEYWORD("instanceof", Token::INSTANCEOF) \
|
|
|
|
KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD_GROUP('l') \
|
2011-10-12 12:23:06 +00:00
|
|
|
KEYWORD("let", harmony_scoping \
|
2011-09-08 13:06:44 +00:00
|
|
|
? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD_GROUP('n') \
|
|
|
|
KEYWORD("new", Token::NEW) \
|
|
|
|
KEYWORD("null", Token::NULL_LITERAL) \
|
|
|
|
KEYWORD_GROUP('p') \
|
|
|
|
KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD_GROUP('r') \
|
|
|
|
KEYWORD("return", Token::RETURN) \
|
|
|
|
KEYWORD_GROUP('s') \
|
|
|
|
KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \
|
|
|
|
KEYWORD("super", Token::FUTURE_RESERVED_WORD) \
|
|
|
|
KEYWORD("switch", Token::SWITCH) \
|
|
|
|
KEYWORD_GROUP('t') \
|
|
|
|
KEYWORD("this", Token::THIS) \
|
|
|
|
KEYWORD("throw", Token::THROW) \
|
|
|
|
KEYWORD("true", Token::TRUE_LITERAL) \
|
|
|
|
KEYWORD("try", Token::TRY) \
|
|
|
|
KEYWORD("typeof", Token::TYPEOF) \
|
|
|
|
KEYWORD_GROUP('v') \
|
|
|
|
KEYWORD("var", Token::VAR) \
|
|
|
|
KEYWORD("void", Token::VOID) \
|
|
|
|
KEYWORD_GROUP('w') \
|
|
|
|
KEYWORD("while", Token::WHILE) \
|
|
|
|
KEYWORD("with", Token::WITH) \
|
|
|
|
KEYWORD_GROUP('y') \
|
2013-04-02 17:34:59 +00:00
|
|
|
KEYWORD("yield", Token::YIELD)
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
|
2011-09-08 13:06:44 +00:00
|
|
|
int input_length,
|
2012-02-08 10:53:58 +00:00
|
|
|
bool harmony_scoping,
|
|
|
|
bool harmony_modules) {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(input_length >= 1);
|
2011-09-08 13:06:44 +00:00
|
|
|
const int kMinLength = 2;
|
|
|
|
const int kMaxLength = 10;
|
|
|
|
if (input_length < kMinLength || input_length > kMaxLength) {
|
|
|
|
return Token::IDENTIFIER;
|
|
|
|
}
|
|
|
|
switch (input[0]) {
|
|
|
|
default:
|
|
|
|
#define KEYWORD_GROUP_CASE(ch) \
|
|
|
|
break; \
|
|
|
|
case ch:
|
|
|
|
#define KEYWORD(keyword, token) \
|
|
|
|
{ \
|
|
|
|
/* 'keyword' is a char array, so sizeof(keyword) is */ \
|
|
|
|
/* strlen(keyword) plus 1 for the NUL char. */ \
|
|
|
|
const int keyword_length = sizeof(keyword) - 1; \
|
|
|
|
STATIC_ASSERT(keyword_length >= kMinLength); \
|
|
|
|
STATIC_ASSERT(keyword_length <= kMaxLength); \
|
|
|
|
if (input_length == keyword_length && \
|
|
|
|
input[1] == keyword[1] && \
|
|
|
|
(keyword_length <= 2 || input[2] == keyword[2]) && \
|
|
|
|
(keyword_length <= 3 || input[3] == keyword[3]) && \
|
|
|
|
(keyword_length <= 4 || input[4] == keyword[4]) && \
|
|
|
|
(keyword_length <= 5 || input[5] == keyword[5]) && \
|
|
|
|
(keyword_length <= 6 || input[6] == keyword[6]) && \
|
|
|
|
(keyword_length <= 7 || input[7] == keyword[7]) && \
|
|
|
|
(keyword_length <= 8 || input[8] == keyword[8]) && \
|
|
|
|
(keyword_length <= 9 || input[9] == keyword[9])) { \
|
|
|
|
return token; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
return Token::IDENTIFIER;
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
Implement handling of arrow functions in the parser
Arrow functions are parsed from ParseAssignmentExpression(). Handling the
parameter list is done by letting ParseConditionalExpression() parse a comma
separated list of identifiers, and it returns a tree of BinaryOperation nodes
with VariableProxy leaves, or a single VariableProxy if there is only one
parameter. When the arrow token "=>" is found, the VariableProxy nodes are
passed to ParseArrowFunctionLiteral(), which will then skip parsing the
paramaeter list. This avoids having to rewind when the arrow is found and
restart parsing the parameter list.
Note that the empty parameter list "()" is handled directly in
ParsePrimaryExpression(): after is has consumed the opening parenthesis,
if a closing parenthesis follows, then the only valid input is an arrow
function. In this case, ParsePrimaryExpression() directly calls
ParseArrowFunctionLiteral(), to avoid needing to return a sentinel value
to signal the empty parameter list. Because it will consume the body of
the arrow function, ParseAssignmentExpression() will not see the arrow
"=>" token as next, and return the already-parser expression.
The implementation is done in ParserBase, so it was needed to do some
additions to ParserBase, ParserTraits and PreParserTraits. Some of the
glue code can be removed later on when more more functionality is moved
to ParserBase.
Additionally, this adds a runtime flag "harmony_arrow_functions"
(disabled by default); enabling "harmony" will enable it as well.
BUG=v8:2700
LOG=N
R=marja@chromium.org
Review URL: https://codereview.chromium.org/383983002
Patch from Adrián Pérez de Castro <aperez@igalia.com>.
git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22366 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2014-07-14 07:55:45 +00:00
|
|
|
bool Scanner::IdentifierIsFutureStrictReserved(
|
|
|
|
const AstRawString* string) const {
|
|
|
|
// Keywords are always 1-byte strings.
|
|
|
|
return string->is_one_byte() &&
|
|
|
|
Token::FUTURE_STRICT_RESERVED_WORD ==
|
|
|
|
KeywordOrIdentifierToken(string->raw_data(), string->length(),
|
|
|
|
harmony_scoping_, harmony_modules_);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::ScanIdentifierOrKeyword() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(unicode_cache_->IsIdentifierStart(c0_));
|
2011-09-08 13:06:44 +00:00
|
|
|
LiteralScope literal(this);
|
|
|
|
// Scan identifier start character.
|
|
|
|
if (c0_ == '\\') {
|
|
|
|
uc32 c = ScanIdentifierUnicodeEscape();
|
|
|
|
// Only allow legal identifier start characters.
|
|
|
|
if (c < 0 ||
|
|
|
|
c == '\\' || // No recursive escapes.
|
|
|
|
!unicode_cache_->IsIdentifierStart(c)) {
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
AddLiteralChar(c);
|
|
|
|
return ScanIdentifierSuffix(&literal);
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
uc32 first_char = c0_;
|
|
|
|
Advance();
|
|
|
|
AddLiteralChar(first_char);
|
2010-12-07 14:03:59 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// Scan the rest of the identifier characters.
|
|
|
|
while (unicode_cache_->IsIdentifierPart(c0_)) {
|
|
|
|
if (c0_ != '\\') {
|
|
|
|
uc32 next_char = c0_;
|
|
|
|
Advance();
|
|
|
|
AddLiteralChar(next_char);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Fallthrough if no longer able to complete keyword.
|
|
|
|
return ScanIdentifierSuffix(&literal);
|
2010-12-07 14:03:59 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
literal.Complete();
|
|
|
|
|
2014-03-12 14:03:25 +00:00
|
|
|
if (next_.literal_chars->is_one_byte()) {
|
2014-03-13 09:15:14 +00:00
|
|
|
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
|
2011-09-08 13:06:44 +00:00
|
|
|
return KeywordOrIdentifierToken(chars.start(),
|
|
|
|
chars.length(),
|
2012-02-08 10:53:58 +00:00
|
|
|
harmony_scoping_,
|
|
|
|
harmony_modules_);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
2011-09-08 13:06:44 +00:00
|
|
|
|
|
|
|
return Token::IDENTIFIER;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2009-11-09 12:18:25 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
|
2011-09-08 13:06:44 +00:00
|
|
|
// Scan the rest of the identifier characters.
|
|
|
|
while (unicode_cache_->IsIdentifierPart(c0_)) {
|
|
|
|
if (c0_ == '\\') {
|
|
|
|
uc32 c = ScanIdentifierUnicodeEscape();
|
|
|
|
// Only allow legal identifier part characters.
|
|
|
|
if (c < 0 ||
|
|
|
|
c == '\\' ||
|
|
|
|
!unicode_cache_->IsIdentifierPart(c)) {
|
|
|
|
return Token::ILLEGAL;
|
|
|
|
}
|
|
|
|
AddLiteralChar(c);
|
|
|
|
} else {
|
|
|
|
AddLiteralChar(c0_);
|
|
|
|
Advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
literal->Complete();
|
|
|
|
|
|
|
|
return Token::IDENTIFIER;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2009-11-09 12:18:25 +00:00
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
bool Scanner::ScanRegExpPattern(bool seen_equal) {
|
2011-09-08 13:06:44 +00:00
|
|
|
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
|
|
|
|
bool in_character_class = false;
|
|
|
|
|
|
|
|
// Previous token is either '/' or '/=', in the second case, the
|
|
|
|
// pattern starts at =.
|
|
|
|
next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
|
|
|
|
next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
|
2010-08-24 12:29:50 +00:00
|
|
|
|
2011-09-08 13:06:44 +00:00
|
|
|
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
|
|
|
|
// the scanner should pass uninterpreted bodies to the RegExp
|
|
|
|
// constructor.
|
|
|
|
LiteralScope literal(this);
|
|
|
|
if (seen_equal) {
|
|
|
|
AddLiteralChar('=');
|
|
|
|
}
|
|
|
|
|
|
|
|
while (c0_ != '/' || in_character_class) {
|
|
|
|
if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
|
|
|
|
if (c0_ == '\\') { // Escape sequence.
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
// If the escape allows more characters, i.e., \x??, \u????, or \c?,
|
|
|
|
// only "safe" characters are allowed (letters, digits, underscore),
|
|
|
|
// otherwise the escape isn't valid and the invalid character has
|
|
|
|
// its normal meaning. I.e., we can just continue scanning without
|
|
|
|
// worrying whether the following characters are part of the escape
|
|
|
|
// or not, since any '/', '\\' or '[' is guaranteed to not be part
|
|
|
|
// of the escape sequence.
|
|
|
|
|
|
|
|
// TODO(896): At some point, parse RegExps more throughly to capture
|
|
|
|
// octal esacpes in strict mode.
|
|
|
|
} else { // Unescaped character.
|
|
|
|
if (c0_ == '[') in_character_class = true;
|
|
|
|
if (c0_ == ']') in_character_class = false;
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Advance(); // consume '/'
|
|
|
|
|
|
|
|
literal.Complete();
|
|
|
|
|
|
|
|
return true;
|
2010-08-24 12:29:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
bool Scanner::ScanLiteralUnicodeEscape() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(c0_ == '\\');
|
2011-09-08 13:06:44 +00:00
|
|
|
uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
|
|
|
|
Advance();
|
|
|
|
int i = 1;
|
|
|
|
if (c0_ == 'u') {
|
|
|
|
i++;
|
|
|
|
while (i < 6) {
|
|
|
|
Advance();
|
|
|
|
if (!IsHexDigit(c0_)) break;
|
|
|
|
chars_read[i] = c0_;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i < 6) {
|
|
|
|
// Incomplete escape. Undo all advances and return false.
|
|
|
|
while (i > 0) {
|
|
|
|
i--;
|
|
|
|
PushBack(chars_read[i]);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// Complete escape. Add all chars to current literal buffer.
|
|
|
|
for (int i = 0; i < 6; i++) {
|
|
|
|
AddLiteralChar(chars_read[i]);
|
|
|
|
}
|
|
|
|
return true;
|
2010-08-24 12:29:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-01 07:47:15 +00:00
|
|
|
bool Scanner::ScanRegExpFlags() {
|
2011-09-08 13:06:44 +00:00
|
|
|
// Scan regular expression flags.
|
|
|
|
LiteralScope literal(this);
|
|
|
|
while (unicode_cache_->IsIdentifierPart(c0_)) {
|
|
|
|
if (c0_ != '\\') {
|
|
|
|
AddLiteralCharAdvance();
|
|
|
|
} else {
|
|
|
|
if (!ScanLiteralUnicodeEscape()) {
|
|
|
|
break;
|
|
|
|
}
|
2012-07-06 14:04:15 +00:00
|
|
|
Advance();
|
2011-09-08 13:06:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
literal.Complete();
|
|
|
|
|
|
|
|
next_.location.end_pos = source_pos() - 1;
|
|
|
|
return true;
|
2010-08-24 12:29:50 +00:00
|
|
|
}
|
|
|
|
|
2013-10-10 11:58:16 +00:00
|
|
|
|
2014-06-24 14:03:24 +00:00
|
|
|
const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
|
|
|
|
if (is_literal_one_byte()) {
|
|
|
|
return ast_value_factory->GetOneByteString(literal_one_byte_string());
|
2014-03-12 14:03:25 +00:00
|
|
|
}
|
2014-06-24 14:03:24 +00:00
|
|
|
return ast_value_factory->GetTwoByteString(literal_two_byte_string());
|
2014-03-12 14:03:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-24 14:03:24 +00:00
|
|
|
const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
|
|
|
|
if (is_next_literal_one_byte()) {
|
|
|
|
return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
|
2014-03-12 14:03:25 +00:00
|
|
|
}
|
2014-06-24 14:03:24 +00:00
|
|
|
return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
|
2014-03-12 14:03:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
double Scanner::DoubleValue() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(is_literal_one_byte());
|
2014-03-12 14:03:25 +00:00
|
|
|
return StringToDouble(
|
2014-04-11 07:27:25 +00:00
|
|
|
unicode_cache_,
|
|
|
|
literal_one_byte_string(),
|
2014-03-12 14:03:25 +00:00
|
|
|
ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 08:29:31 +00:00
|
|
|
int Scanner::FindNumber(DuplicateFinder* finder, int value) {
|
|
|
|
return finder->AddNumber(literal_one_byte_string(), value);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
|
|
|
|
if (is_literal_one_byte()) {
|
2014-03-13 09:15:14 +00:00
|
|
|
return finder->AddOneByteSymbol(literal_one_byte_string(), value);
|
2014-03-13 08:29:31 +00:00
|
|
|
}
|
2014-03-13 09:15:14 +00:00
|
|
|
return finder->AddTwoByteSymbol(literal_two_byte_string(), value);
|
2014-03-13 08:29:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
|
|
|
|
return AddSymbol(key, true, value);
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
|
|
|
|
return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
|
2014-03-12 14:03:25 +00:00
|
|
|
bool is_one_byte,
|
2013-10-10 11:58:16 +00:00
|
|
|
int value) {
|
2014-03-12 14:03:25 +00:00
|
|
|
uint32_t hash = Hash(key, is_one_byte);
|
|
|
|
byte* encoding = BackupKey(key, is_one_byte);
|
2013-10-10 11:58:16 +00:00
|
|
|
HashMap::Entry* entry = map_.Lookup(encoding, hash, true);
|
|
|
|
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
|
|
|
|
entry->value =
|
|
|
|
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
|
|
|
|
return old_value;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(key.length() > 0);
|
2013-10-10 11:58:16 +00:00
|
|
|
// Quick check for already being in canonical form.
|
|
|
|
if (IsNumberCanonical(key)) {
|
2014-03-13 09:15:14 +00:00
|
|
|
return AddOneByteSymbol(key, value);
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
|
2014-03-13 09:15:14 +00:00
|
|
|
double double_value = StringToDouble(
|
2014-04-11 07:27:25 +00:00
|
|
|
unicode_constants_, key, flags, 0.0);
|
2013-10-10 11:58:16 +00:00
|
|
|
int length;
|
|
|
|
const char* string;
|
|
|
|
if (!std::isfinite(double_value)) {
|
|
|
|
string = "Infinity";
|
|
|
|
length = 8; // strlen("Infinity");
|
|
|
|
} else {
|
|
|
|
string = DoubleToCString(double_value,
|
|
|
|
Vector<char>(number_buffer_, kBufferSize));
|
|
|
|
length = StrLength(string);
|
|
|
|
}
|
|
|
|
return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
|
|
|
|
length), true, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
|
2013-10-10 11:58:16 +00:00
|
|
|
// Test for a safe approximation of number literals that are already
|
|
|
|
// in canonical form: max 15 digits, no leading zeroes, except an
|
|
|
|
// integer part that is a single zero, and no trailing zeros below
|
|
|
|
// the decimal point.
|
|
|
|
int pos = 0;
|
|
|
|
int length = number.length();
|
|
|
|
if (number.length() > 15) return false;
|
|
|
|
if (number[pos] == '0') {
|
|
|
|
pos++;
|
|
|
|
} else {
|
|
|
|
while (pos < length &&
|
|
|
|
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
|
|
|
|
}
|
|
|
|
if (length == pos) return true;
|
|
|
|
if (number[pos] != '.') return false;
|
|
|
|
pos++;
|
|
|
|
bool invalid_last_digit = true;
|
|
|
|
while (pos < length) {
|
2014-03-13 09:15:14 +00:00
|
|
|
uint8_t digit = number[pos] - '0';
|
2013-10-10 11:58:16 +00:00
|
|
|
if (digit > '9' - '0') return false;
|
|
|
|
invalid_last_digit = (digit == 0);
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
return !invalid_last_digit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
|
2013-10-10 11:58:16 +00:00
|
|
|
// Primitive hash function, almost identical to the one used
|
|
|
|
// for strings (except that it's seeded by the length and ASCII-ness).
|
|
|
|
int length = key.length();
|
2014-03-12 14:03:25 +00:00
|
|
|
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0) ;
|
2013-10-10 11:58:16 +00:00
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
uint32_t c = key[i];
|
|
|
|
hash = (hash + c) * 1025;
|
|
|
|
hash ^= (hash >> 6);
|
|
|
|
}
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool DuplicateFinder::Match(void* first, void* second) {
|
|
|
|
// Decode lengths.
|
|
|
|
// Length + ASCII-bit is encoded as base 128, most significant heptet first,
|
|
|
|
// with a 8th bit being non-zero while there are more heptets.
|
|
|
|
// The value encodes the number of bytes following, and whether the original
|
|
|
|
// was ASCII.
|
|
|
|
byte* s1 = reinterpret_cast<byte*>(first);
|
|
|
|
byte* s2 = reinterpret_cast<byte*>(second);
|
2014-03-12 14:03:25 +00:00
|
|
|
uint32_t length_one_byte_field = 0;
|
2013-10-10 11:58:16 +00:00
|
|
|
byte c1;
|
|
|
|
do {
|
|
|
|
c1 = *s1;
|
|
|
|
if (c1 != *s2) return false;
|
2014-03-12 14:03:25 +00:00
|
|
|
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
|
2013-10-10 11:58:16 +00:00
|
|
|
s1++;
|
|
|
|
s2++;
|
|
|
|
} while ((c1 & 0x80) != 0);
|
2014-03-12 14:03:25 +00:00
|
|
|
int length = static_cast<int>(length_one_byte_field >> 1);
|
2013-10-10 11:58:16 +00:00
|
|
|
return memcmp(s1, s2, length) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-13 09:15:14 +00:00
|
|
|
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
|
2014-03-12 14:03:25 +00:00
|
|
|
bool is_one_byte) {
|
|
|
|
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
|
2013-10-10 11:58:16 +00:00
|
|
|
backing_store_.StartSequence();
|
2014-03-12 14:03:25 +00:00
|
|
|
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
|
2013-10-10 11:58:16 +00:00
|
|
|
// on the byte of every heptet except the last, least significant, one.
|
2014-03-12 14:03:25 +00:00
|
|
|
if (one_byte_length >= (1 << 7)) {
|
|
|
|
if (one_byte_length >= (1 << 14)) {
|
|
|
|
if (one_byte_length >= (1 << 21)) {
|
|
|
|
if (one_byte_length >= (1 << 28)) {
|
2014-03-13 09:15:14 +00:00
|
|
|
backing_store_.Add(
|
|
|
|
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
2014-03-13 09:15:14 +00:00
|
|
|
backing_store_.Add(
|
|
|
|
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
2014-03-13 09:15:14 +00:00
|
|
|
backing_store_.Add(
|
|
|
|
static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
2014-03-13 09:15:14 +00:00
|
|
|
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
|
2013-10-10 11:58:16 +00:00
|
|
|
}
|
2014-03-13 09:15:14 +00:00
|
|
|
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
|
2013-10-10 11:58:16 +00:00
|
|
|
|
|
|
|
backing_store_.AddBlock(bytes);
|
|
|
|
return backing_store_.EndSequence().start();
|
|
|
|
}
|
|
|
|
|
2009-11-09 12:18:25 +00:00
|
|
|
} } // namespace v8::internal
|