Remove scanner abstraction layer from JSON parsing.

Review URL: http://codereview.chromium.org/7020018

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8147 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
sandholm@chromium.org 2011-06-01 14:06:30 +00:00
parent ff76d1ab0c
commit 3ed8c2f520
2 changed files with 218 additions and 315 deletions

View File

@ -53,203 +53,19 @@ Handle<Object> JsonParser::ParseJson(Handle<String> source) {
// Set initial position right before the string.
position_ = -1;
// Advance to the first character (posibly EOS)
Advance();
Next();
AdvanceSkipWhitespace();
Handle<Object> result = ParseJsonValue();
if (result.is_null() || Next() != Token::EOS) {
// Parse failed. Scanner's current token is the unexpected token.
Token::Value token = current_.token;
if (result.is_null() || c0_ != kEndOfString) {
// Parse failed. Current character is the unexpected token.
const char* message;
const char* name_opt = NULL;
switch (token) {
case Token::EOS:
message = "unexpected_eos";
break;
case Token::NUMBER:
message = "unexpected_token_number";
break;
case Token::STRING:
message = "unexpected_token_string";
break;
case Token::IDENTIFIER:
case Token::FUTURE_RESERVED_WORD:
message = "unexpected_token_identifier";
break;
default:
message = "unexpected_token";
name_opt = Token::String(token);
ASSERT(name_opt != NULL);
break;
}
Factory* factory = isolate()->factory();
MessageLocation location(factory->NewScript(source),
current_.beg_pos,
current_.end_pos);
Handle<JSArray> array;
if (name_opt == NULL) {
array = factory->NewJSArray(0);
} else {
Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));
Handle<FixedArray> element = factory->NewFixedArray(1);
element->set(0, *name);
array = factory->NewJSArrayWithElements(element);
}
Handle<Object> result = factory->NewSyntaxError(message, array);
isolate()->Throw(*result, &location);
return Handle<Object>::null();
}
return result;
}
// Parse any JSON value.
Handle<Object> JsonParser::ParseJsonValue() {
Token::Value token = Next();
switch (token) {
case Token::STRING:
return GetString(false);
case Token::NUMBER:
return isolate()->factory()->NewNumber(number_);
case Token::FALSE_LITERAL:
return isolate()->factory()->false_value();
case Token::TRUE_LITERAL:
return isolate()->factory()->true_value();
case Token::NULL_LITERAL:
return isolate()->factory()->null_value();
case Token::LBRACE:
return ParseJsonObject();
case Token::LBRACK:
return ParseJsonArray();
default:
return ReportUnexpectedToken();
}
}
// Parse a JSON object. Scanner must be right after '{' token.
Handle<Object> JsonParser::ParseJsonObject() {
Handle<JSFunction> object_constructor(
isolate()->global_context()->object_function());
Handle<JSObject> json_object =
isolate()->factory()->NewJSObject(object_constructor);
if (Peek() == Token::RBRACE) {
Next();
} else {
do {
if (Next() != Token::STRING) {
return ReportUnexpectedToken();
}
Handle<String> key = GetString(true);
if (Next() != Token::COLON) {
return ReportUnexpectedToken();
}
Handle<Object> value = ParseJsonValue();
if (value.is_null()) return Handle<Object>::null();
uint32_t index;
if (key->AsArrayIndex(&index)) {
SetOwnElement(json_object, index, value, kNonStrictMode);
} else if (key->Equals(isolate()->heap()->Proto_symbol())) {
SetPrototype(json_object, value);
} else {
SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
}
} while (Next() == Token::COMMA);
if (current_.token != Token::RBRACE) {
return ReportUnexpectedToken();
}
}
return json_object;
}
// Parse a JSON array. Scanner must be right after '[' token.
Handle<Object> JsonParser::ParseJsonArray() {
ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
ZoneList<Handle<Object> > elements(4);
Token::Value token = Peek();
if (token == Token::RBRACK) {
Next();
} else {
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return Handle<Object>::null();
elements.Add(element);
token = Next();
} while (token == Token::COMMA);
if (token != Token::RBRACK) {
return ReportUnexpectedToken();
}
}
// Allocate a fixed array with all the elements.
Handle<FixedArray> fast_elements =
isolate()->factory()->NewFixedArray(elements.length());
for (int i = 0, n = elements.length(); i < n; i++) {
fast_elements->set(i, *elements[i]);
}
return isolate()->factory()->NewJSArrayWithElements(fast_elements);
}
Token::Value JsonParser::Next() {
current_ = next_;
ScanJson();
return current_.token;
}
void JsonParser::ScanJson() {
if (source_->IsSeqAsciiString()) {
is_sequential_ascii_ = true;
} else {
is_sequential_ascii_ = false;
}
Token::Value token;
do {
// Remember the position of the next token
next_.beg_pos = position_;
switch (c0_) {
case '\t':
case '\r':
case '\n':
case ' ':
Advance();
token = Token::WHITESPACE;
break;
case '{':
Advance();
token = Token::LBRACE;
break;
case '}':
Advance();
token = Token::RBRACE;
break;
case '[':
Advance();
token = Token::LBRACK;
break;
case ']':
Advance();
token = Token::RBRACK;
break;
case ':':
Advance();
token = Token::COLON;
break;
case ',':
Advance();
token = Token::COMMA;
break;
case '"':
token = ScanJsonString();
case kEndOfString:
message = "unexpected_eos";
array = factory->NewJSArray(0);
break;
case '-':
case '0':
@ -262,47 +78,149 @@ void JsonParser::ScanJson() {
case '7':
case '8':
case '9':
token = ScanJsonNumber();
message = "unexpected_token_number";
array = factory->NewJSArray(0);
break;
case 't':
token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
break;
case 'f':
token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
break;
case 'n':
token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
case '"':
message = "unexpected_token_string";
array = factory->NewJSArray(0);
break;
default:
if (c0_ < 0) {
Advance();
token = Token::EOS;
} else {
Advance();
token = Token::ILLEGAL;
}
message = "unexpected_token";
Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);
Handle<FixedArray> element = factory->NewFixedArray(1);
element->set(0, *name);
array = factory->NewJSArrayWithElements(element);
break;
}
} while (token == Token::WHITESPACE);
next_.end_pos = position_;
next_.token = token;
}
Token::Value JsonParser::ScanJsonIdentifier(const char* text,
Token::Value token) {
while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL;
Advance();
text++;
MessageLocation location(factory->NewScript(source),
position_,
position_ + 1);
Handle<Object> result = factory->NewSyntaxError(message, array);
isolate()->Throw(*result, &location);
return Handle<Object>::null();
}
return token;
return result;
}
Token::Value JsonParser::ScanJsonNumber() {
bool negative = false;
// Parse any JSON value.
Handle<Object> JsonParser::ParseJsonValue() {
switch (c0_) {
case '"':
return ParseJsonString();
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return ParseJsonNumber();
case 'f':
if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
return isolate()->factory()->false_value();
} else {
return ReportUnexpectedCharacter();
}
case 't':
if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
return isolate()->factory()->true_value();
} else {
return ReportUnexpectedCharacter();
}
case 'n':
if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 'l') {
AdvanceSkipWhitespace();
return isolate()->factory()->null_value();
} else {
return ReportUnexpectedCharacter();
}
case '{':
return ParseJsonObject();
case '[':
return ParseJsonArray();
default:
return ReportUnexpectedCharacter();
}
}
// Parse a JSON object. Position must be right at '{'.
Handle<Object> JsonParser::ParseJsonObject() {
Handle<JSFunction> object_constructor(
isolate()->global_context()->object_function());
Handle<JSObject> json_object =
isolate()->factory()->NewJSObject(object_constructor);
ASSERT_EQ(c0_, '{');
AdvanceSkipWhitespace();
if (c0_ != '}') {
do {
Handle<String> key = ParseJsonSymbol();
if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
AdvanceSkipWhitespace();
Handle<Object> value = ParseJsonValue();
if (value.is_null()) return ReportUnexpectedCharacter();
uint32_t index;
if (key->AsArrayIndex(&index)) {
SetOwnElement(json_object, index, value, kNonStrictMode);
} else if (key->Equals(isolate()->heap()->Proto_symbol())) {
SetPrototype(json_object, value);
} else {
SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
}
} while (MatchSkipWhiteSpace(','));
if (c0_ != '}') {
return ReportUnexpectedCharacter();
}
}
AdvanceSkipWhitespace();
return json_object;
}
// Parse a JSON array. Position must be right at '['.
Handle<Object> JsonParser::ParseJsonArray() {
ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
ZoneList<Handle<Object> > elements(4);
ASSERT_EQ(c0_, '[');
AdvanceSkipWhitespace();
if (c0_ != ']') {
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return ReportUnexpectedCharacter();
elements.Add(element);
} while (MatchSkipWhiteSpace(','));
if (c0_ != ']') {
return ReportUnexpectedCharacter();
}
}
AdvanceSkipWhitespace();
// Allocate a fixed array with all the elements.
Handle<FixedArray> fast_elements =
isolate()->factory()->NewFixedArray(elements.length());
for (int i = 0, n = elements.length(); i < n; i++) {
fast_elements->set(i, *elements[i]);
}
return isolate()->factory()->NewJSArrayWithElements(fast_elements);
}
Handle<Object> JsonParser::ParseJsonNumber() {
bool negative = false;
beg_pos_ = position_;
if (c0_ == '-') {
Advance();
negative = true;
@ -311,11 +229,11 @@ Token::Value JsonParser::ScanJsonNumber() {
Advance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
} else {
int i = 0;
int digits = 0;
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
do {
i = i * 10 + c0_ - '0';
digits++;
@ -323,12 +241,13 @@ Token::Value JsonParser::ScanJsonNumber() {
} while (c0_ >= '0' && c0_ <= '9');
if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
number_ = (negative ? -i : i);
return Token::NUMBER;
SkipWhitespace();
return isolate()->factory()->NewNumber(number_);
}
}
if (c0_ == '.') {
Advance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
do {
Advance();
} while (c0_ >= '0' && c0_ <= '9');
@ -336,37 +255,38 @@ Token::Value JsonParser::ScanJsonNumber() {
if (AsciiAlphaToLower(c0_) == 'e') {
Advance();
if (c0_ == '-' || c0_ == '+') Advance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
do {
Advance();
} while (c0_ >= '0' && c0_ <= '9');
}
int length = position_ - beg_pos_;
if (is_sequential_ascii_) {
Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos,
position_ - next_.beg_pos);
Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length);
number_ = StringToDouble(isolate()->unicode_cache(),
chars,
NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value());
} else {
Vector<char> buffer = Vector<char>::New(position_ - next_.beg_pos);
String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_);
Vector<char> buffer = Vector<char>::New(length);
String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_);
Vector<const char> result =
Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
position_ - next_.beg_pos);
length);
number_ = StringToDouble(isolate()->unicode_cache(),
result,
NO_FLAGS, // Hex, octal or trailing junk.
0.0);
buffer.Dispose();
}
return Token::NUMBER;
SkipWhitespace();
return isolate()->factory()->NewNumber(number_);
}
Token::Value JsonParser::SlowScanJsonString() {
Handle<Object> JsonParser::SlowScanJsonString() {
// The currently scanned ascii characters.
Handle<String> ascii(isolate()->factory()->NewSubString(source_,
next_.beg_pos + 1,
beg_pos_,
position_));
Handle<String> two_byte =
isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
@ -392,7 +312,7 @@ Token::Value JsonParser::SlowScanJsonString() {
}
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ < 0x20) return ReportUnexpectedCharacter();
if (c0_ != '\\') {
seq_two_byte->SeqTwoByteStringSet(count++, c0_);
Advance();
@ -425,7 +345,7 @@ Token::Value JsonParser::SlowScanJsonString() {
Advance();
int digit = HexValue(c0_);
if (digit < 0) {
return Token::ILLEGAL;
return ReportUnexpectedCharacter();
}
value = value * 16 + digit;
}
@ -433,14 +353,14 @@ Token::Value JsonParser::SlowScanJsonString() {
break;
}
default:
return Token::ILLEGAL;
return ReportUnexpectedCharacter();
}
Advance();
}
}
// Advance past the last '"'.
ASSERT_EQ('"', c0_);
Advance();
AdvanceSkipWhitespace();
// Shrink the the string to our length.
if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
@ -456,21 +376,19 @@ Token::Value JsonParser::SlowScanJsonString() {
seq_two_byte->set_length(count);
isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
}
string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte);
return Token::STRING;
return isolate()->factory()->NewConsString(ascii, seq_two_byte);
}
Token::Value JsonParser::ScanJsonString() {
template <bool is_symbol>
Handle<Object> JsonParser::ScanJsonString() {
ASSERT_EQ('"', c0_);
// Set string_val to null. If string_val is not set we assume an
// ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1.
string_val_ = Handle<String>::null();
Advance();
beg_pos_ = position_;
// Fast case for ascii only without escape characters.
while (c0_ != '"') {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ < 0x20) return ReportUnexpectedCharacter();
if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {
Advance();
} else {
@ -478,36 +396,16 @@ Token::Value JsonParser::ScanJsonString() {
}
}
ASSERT_EQ('"', c0_);
end_pos_ = position_;
// Advance past the last '"'.
Advance();
return Token::STRING;
}
Handle<String> JsonParser::GetString() {
return GetString(false);
}
Handle<String> JsonParser::GetSymbol() {
Handle<String> result = GetString(true);
if (result->IsSymbol()) return result;
return isolate()->factory()->LookupSymbol(result);
}
Handle<String> JsonParser::GetString(bool hint_symbol) {
// We have a non ascii string, return that.
if (!string_val_.is_null()) return string_val_;
if (is_sequential_ascii_ && hint_symbol) {
Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_);
// The current token includes the '"' in both ends.
int length = current_.end_pos - current_.beg_pos - 2;
AdvanceSkipWhitespace();
if (is_sequential_ascii_ && is_symbol) {
return isolate()->factory()->LookupAsciiSymbol(seq_source_,
current_.beg_pos + 1,
length);
beg_pos_,
end_pos_ - beg_pos_);
} else {
return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_);
}
// The current token includes the '"' in both ends.
return isolate()->factory()->NewSubString(
source_, current_.beg_pos + 1, current_.end_pos - 1);
}
} } // namespace v8::internal

View File

@ -47,41 +47,62 @@ class JsonParser BASE_EMBEDDED {
Handle<Object> ParseJson(Handle<String> source);
inline void Advance() {
if (position_ >= source_length_) {
position_++;
position_++;
if (position_ > source_length_) {
c0_ = kEndOfString;
} else if (is_sequential_ascii_) {
position_++;
c0_ = seq_source_->SeqAsciiStringGet(position_);
} else {
position_++;
c0_ = source_->Get(position_);
}
}
inline Isolate* isolate() { return isolate_; }
// The JSON lexical grammar is specified in the ECMAScript 5 standard,
// section 15.12.1.1. The only allowed whitespace characters between tokens
// are tab, carriage-return, newline and space.
// Get the string for the current string token.
Handle<String> GetString(bool hint_symbol);
Handle<String> GetString();
Handle<String> GetSymbol();
inline void AdvanceSkipWhitespace() {
do {
Advance();
} while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ');
}
// Scan a single JSON token. The JSON lexical grammar is specified in the
// ECMAScript 5 standard, section 15.12.1.1.
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
// carriage-return, newline and space.
void ScanJson();
inline void SkipWhitespace() {
while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') {
Advance();
}
}
inline uc32 AdvanceGetChar() {
Advance();
return c0_;
}
// Checks that current charater is c.
// If so, then consume c and skip whitespace.
inline bool MatchSkipWhiteSpace(uc32 c) {
if (c0_ == c) {
AdvanceSkipWhitespace();
return true;
}
return false;
}
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Token::Value ScanJsonString();
Handle<Object> ParseJsonString() {
return ScanJsonString<false>();
}
Handle<Object> ParseJsonSymbol() {
return ScanJsonString<true>();
}
template <bool is_symbol>
Handle<Object> ScanJsonString();
// Slow version for unicode support, uses the first ascii_count characters,
// as first part of a ConsString
Token::Value SlowScanJsonString();
Handle<Object> SlowScanJsonString();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
@ -89,12 +110,7 @@ class JsonParser BASE_EMBEDDED {
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Token::Value ScanJsonNumber();
// Used to recognizes one of the literals "true", "false", or "null". These
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
Handle<Object> ParseJsonNumber();
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
@ -119,21 +135,11 @@ class JsonParser BASE_EMBEDDED {
// Mark that a parsing error has happened at the current token, and
// return a null handle. Primarily for readability.
Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); }
inline Handle<Object> ReportUnexpectedCharacter() {
return Handle<Object>::null();
}
// Peek at the next token.
Token::Value Peek() { return next_.token; }
// Scan the next token and return the token scanned on the last call.
Token::Value Next();
struct TokenInfo {
TokenInfo() : token(Token::ILLEGAL),
beg_pos(0),
end_pos(0) { }
Token::Value token;
int beg_pos;
int end_pos;
};
inline Isolate* isolate() { return isolate_; }
static const int kInitialSpecialStringSize = 1024;
@ -144,15 +150,14 @@ class JsonParser BASE_EMBEDDED {
Handle<SeqAsciiString> seq_source_;
bool is_sequential_ascii_;
// Current and next token
TokenInfo current_;
TokenInfo next_;
// begin and end position of scanned string or number
int beg_pos_;
int end_pos_;
Isolate* isolate_;
uc32 c0_;
int position_;
Handle<String> string_val_;
double number_;
};