Create stand-alone json parser (including scanner).

The current json parser and scanner inherits fromt he normal scanners and parsers,
which are more complicated than we need for parsing json.

The supplied scanner works directly on the string supplied and has a
fast case mode for scanning only ascii characters (it will simply
create a substring or a symbol directly from the existing string). To
allow for creating symbols from a substring I have added a
SubStringAsciiSymbolKey that creates the hash based from our string
without extracting the sub-string. In case we need to add the symbol
it simply creates the symbol directly from the characters inside the
given string.
Review URL: http://codereview.chromium.org/7039037

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8029 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
ricow@chromium.org 2011-05-24 12:16:23 +00:00
parent 0e76bfeae7
commit 3c7e1d7015
16 changed files with 818 additions and 549 deletions

View File

@ -85,6 +85,7 @@ SOURCES = {
inspector.cc
interpreter-irregexp.cc
isolate.cc
json-parser.cc
jsregexp.cc
lithium-allocator.cc
lithium.cc

View File

@ -734,6 +734,15 @@ double StringToDouble(UnicodeCache* unicode_cache,
empty_string_val);
}
double StringToDouble(UnicodeCache* unicode_cache,
Vector<const uc16> str,
int flags,
double empty_string_val) {
const uc16* end = str.start() + str.length();
return InternalStringToDouble(unicode_cache, str.start(), end, flags,
empty_string_val);
}
const char* DoubleToCString(double v, Vector<char> buffer) {
switch (fpclassify(v)) {

View File

@ -101,6 +101,10 @@ double StringToDouble(UnicodeCache* unicode_cache,
Vector<const char> str,
int flags,
double empty_string_val = 0);
double StringToDouble(UnicodeCache* unicode_cache,
Vector<const uc16> str,
int flags,
double empty_string_val = 0);
// This version expects a zero-terminated character array.
double StringToDouble(UnicodeCache* unicode_cache,
const char* str,

View File

@ -111,12 +111,31 @@ Handle<String> Factory::LookupSymbol(Vector<const char> string) {
String);
}
// Symbols are created in the old generation (data space).
Handle<String> Factory::LookupSymbol(Handle<String> string) {
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->LookupSymbol(*string),
String);
}
Handle<String> Factory::LookupAsciiSymbol(Vector<const char> string) {
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->LookupAsciiSymbol(string),
String);
}
Handle<String> Factory::LookupAsciiSymbol(Handle<SeqAsciiString> string,
int from,
int length) {
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->LookupAsciiSymbol(string,
from,
length),
String);
}
Handle<String> Factory::LookupTwoByteSymbol(Vector<const uc16> string) {
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->LookupTwoByteSymbol(string),

View File

@ -62,7 +62,11 @@ class Factory {
PretenureFlag pretenure);
Handle<String> LookupSymbol(Vector<const char> str);
Handle<String> LookupSymbol(Handle<String> str);
Handle<String> LookupAsciiSymbol(Vector<const char> str);
Handle<String> LookupAsciiSymbol(Handle<SeqAsciiString>,
int from,
int length);
Handle<String> LookupTwoByteSymbol(Vector<const uc16> str);
Handle<String> LookupAsciiSymbol(const char* str) {
return LookupSymbol(CStrVector(str));

View File

@ -4179,6 +4179,26 @@ MaybeObject* Heap::LookupAsciiSymbol(Vector<const char> string) {
}
MaybeObject* Heap::LookupAsciiSymbol(Handle<SeqAsciiString> string,
int from,
int length) {
Object* symbol = NULL;
Object* new_table;
{ MaybeObject* maybe_new_table =
symbol_table()->LookupSubStringAsciiSymbol(string,
from,
length,
&symbol);
if (!maybe_new_table->ToObject(&new_table)) return maybe_new_table;
}
// Can't use set_symbol_table because SymbolTable::cast knows that
// SymbolTable is a singleton and checks for identity.
roots_[kSymbolTableRootIndex] = new_table;
ASSERT(symbol != NULL);
return symbol;
}
MaybeObject* Heap::LookupTwoByteSymbol(Vector<const uc16> string) {
Object* symbol = NULL;
Object* new_table;

View File

@ -790,6 +790,10 @@ class Heap {
return LookupSymbol(CStrVector(str));
}
MUST_USE_RESULT MaybeObject* LookupSymbol(String* str);
MUST_USE_RESULT MaybeObject* LookupAsciiSymbol(Handle<SeqAsciiString> string,
int from,
int length);
bool LookupSymbolIfExists(String* str, String** symbol);
bool LookupTwoCharsSymbolIfExists(String* str, String** symbol);

504
src/json-parser.cc Normal file
View File

@ -0,0 +1,504 @@
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "v8.h"
#include "char-predicates-inl.h"
#include "conversions.h"
#include "json-parser.h"
#include "messages.h"
#include "spaces.h"
namespace v8 {
namespace internal {
Handle<Object> JsonParser::ParseJson(Handle<String> source) {
isolate_ = source->map()->isolate();
source_ = Handle<String>(source->TryFlattenGetString());
source_length_ = source_->length() - 1;
// Optimized fast case where we only have ascii characters.
if (source_->IsSeqAsciiString()) {
is_sequential_ascii_ = true;
seq_source_ = Handle<SeqAsciiString>::cast(source_);
} else {
is_sequential_ascii_ = false;
}
// Set initial position right before the string.
position_ = -1;
// Advance to the first character (posibly EOS)
Advance();
Next();
Handle<Object> result = ParseJsonValue();
if (result.is_null() || Next() != Token::EOS) {
// Parse failed. Scanner's current token is the unexpected token.
Token::Value token = current_.token;
const char* message;
const char* name_opt = NULL;
switch (token) {
case Token::EOS:
message = "unexpected_eos";
break;
case Token::NUMBER:
message = "unexpected_token_number";
break;
case Token::STRING:
message = "unexpected_token_string";
break;
case Token::IDENTIFIER:
case Token::FUTURE_RESERVED_WORD:
message = "unexpected_token_identifier";
break;
default:
message = "unexpected_token";
name_opt = Token::String(token);
ASSERT(name_opt != NULL);
break;
}
Factory* factory = isolate()->factory();
MessageLocation location(factory->NewScript(source),
current_.beg_pos,
current_.end_pos);
Handle<JSArray> array;
if (name_opt == NULL) {
array = factory->NewJSArray(0);
} else {
Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));
Handle<FixedArray> element = factory->NewFixedArray(1);
element->set(0, *name);
array = factory->NewJSArrayWithElements(element);
}
Handle<Object> result = factory->NewSyntaxError(message, array);
isolate()->Throw(*result, &location);
return Handle<Object>::null();
}
return result;
}
// Parse any JSON value.
Handle<Object> JsonParser::ParseJsonValue() {
Token::Value token = Next();
switch (token) {
case Token::STRING:
return GetString(false);
case Token::NUMBER:
return isolate()->factory()->NewNumber(number_);
case Token::FALSE_LITERAL:
return isolate()->factory()->false_value();
case Token::TRUE_LITERAL:
return isolate()->factory()->true_value();
case Token::NULL_LITERAL:
return isolate()->factory()->null_value();
case Token::LBRACE:
return ParseJsonObject();
case Token::LBRACK:
return ParseJsonArray();
default:
return ReportUnexpectedToken();
}
}
// Parse a JSON object. Scanner must be right after '{' token.
Handle<Object> JsonParser::ParseJsonObject() {
Handle<JSFunction> object_constructor(
isolate()->global_context()->object_function());
Handle<JSObject> json_object =
isolate()->factory()->NewJSObject(object_constructor);
if (Peek() == Token::RBRACE) {
Next();
} else {
do {
if (Next() != Token::STRING) {
return ReportUnexpectedToken();
}
Handle<String> key = GetString(true);
if (Next() != Token::COLON) {
return ReportUnexpectedToken();
}
Handle<Object> value = ParseJsonValue();
if (value.is_null()) return Handle<Object>::null();
uint32_t index;
if (key->AsArrayIndex(&index)) {
SetOwnElement(json_object, index, value, kNonStrictMode);
} else if (key->Equals(isolate()->heap()->Proto_symbol())) {
SetPrototype(json_object, value);
} else {
SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
}
} while (Next() == Token::COMMA);
if (current_.token != Token::RBRACE) {
return ReportUnexpectedToken();
}
}
return json_object;
}
// Parse a JSON array. Scanner must be right after '[' token.
Handle<Object> JsonParser::ParseJsonArray() {
ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
ZoneList<Handle<Object> > elements(4);
Token::Value token = Peek();
if (token == Token::RBRACK) {
Next();
} else {
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return Handle<Object>::null();
elements.Add(element);
token = Next();
} while (token == Token::COMMA);
if (token != Token::RBRACK) {
return ReportUnexpectedToken();
}
}
// Allocate a fixed array with all the elements.
Handle<FixedArray> fast_elements =
isolate()->factory()->NewFixedArray(elements.length());
for (int i = 0, n = elements.length(); i < n; i++) {
fast_elements->set(i, *elements[i]);
}
return isolate()->factory()->NewJSArrayWithElements(fast_elements);
}
Token::Value JsonParser::Next() {
current_ = next_;
ScanJson();
return current_.token;
}
void JsonParser::ScanJson() {
if (source_->IsSeqAsciiString()) {
is_sequential_ascii_ = true;
} else {
is_sequential_ascii_ = false;
}
Token::Value token;
do {
// Remember the position of the next token
next_.beg_pos = position_;
switch (c0_) {
case '\t':
case '\r':
case '\n':
case ' ':
Advance();
token = Token::WHITESPACE;
break;
case '{':
Advance();
token = Token::LBRACE;
break;
case '}':
Advance();
token = Token::RBRACE;
break;
case '[':
Advance();
token = Token::LBRACK;
break;
case ']':
Advance();
token = Token::RBRACK;
break;
case ':':
Advance();
token = Token::COLON;
break;
case ',':
Advance();
token = Token::COMMA;
break;
case '"':
token = ScanJsonString();
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
token = ScanJsonNumber();
break;
case 't':
token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
break;
case 'f':
token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
break;
case 'n':
token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
break;
default:
if (c0_ < 0) {
Advance();
token = Token::EOS;
} else {
Advance();
token = Token::ILLEGAL;
}
}
} while (token == Token::WHITESPACE);
next_.end_pos = position_;
next_.token = token;
}
Token::Value JsonParser::ScanJsonIdentifier(const char* text,
Token::Value token) {
while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL;
Advance();
text++;
}
return token;
}
Token::Value JsonParser::ScanJsonNumber() {
bool negative = false;
if (c0_ == '-') {
Advance();
negative = true;
}
if (c0_ == '0') {
Advance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
} else {
int i = 0;
int digits = 0;
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
do {
i = i * 10 + c0_ - '0';
digits++;
Advance();
} while (c0_ >= '0' && c0_ <= '9');
if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
number_ = (negative ? -i : i);
return Token::NUMBER;
}
}
if (c0_ == '.') {
Advance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
Advance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (AsciiAlphaToLower(c0_) == 'e') {
Advance();
if (c0_ == '-' || c0_ == '+') Advance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
Advance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (is_sequential_ascii_) {
Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos,
position_ - next_.beg_pos);
number_ = StringToDouble(isolate()->unicode_cache(),
chars,
NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value());
} else {
Vector<char> buffer = Vector<char>::New(position_ - next_.beg_pos);
String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_);
Vector<const char> result =
Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
position_ - next_.beg_pos);
number_ = StringToDouble(isolate()->unicode_cache(),
result,
NO_FLAGS, // Hex, octal or trailing junk.
0.0);
buffer.Dispose();
}
return Token::NUMBER;
}
Token::Value JsonParser::SlowScanJsonString() {
// The currently scanned ascii characters.
Handle<String> ascii(isolate()->factory()->NewSubString(source_,
next_.beg_pos + 1,
position_));
Handle<String> two_byte =
isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
NOT_TENURED);
Handle<SeqTwoByteString> seq_two_byte =
Handle<SeqTwoByteString>::cast(two_byte);
int allocation_count = 1;
int count = 0;
while (c0_ != '"') {
// Create new seq string
if (count >= kInitialSpecialStringSize * allocation_count) {
allocation_count++;
int new_size = allocation_count * kInitialSpecialStringSize;
Handle<String> new_two_byte =
isolate()->factory()->NewRawTwoByteString(new_size,
NOT_TENURED);
uc16* char_start =
Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
String::WriteToFlat(*seq_two_byte, char_start, 0, count);
seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
}
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\') {
seq_two_byte->SeqTwoByteStringSet(count++, c0_);
Advance();
} else {
Advance();
switch (c0_) {
case '"':
case '\\':
case '/':
seq_two_byte->SeqTwoByteStringSet(count++, c0_);
break;
case 'b':
seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
break;
case 'f':
seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
break;
case 'n':
seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
break;
case 'r':
seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
break;
case 't':
seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
break;
case 'u': {
uc32 value = 0;
for (int i = 0; i < 4; i++) {
Advance();
int digit = HexValue(c0_);
if (digit < 0) {
return Token::ILLEGAL;
}
value = value * 16 + digit;
}
seq_two_byte->SeqTwoByteStringSet(count++, value);
break;
}
default:
return Token::ILLEGAL;
}
Advance();
}
}
// Advance past the last '"'.
ASSERT_EQ('"', c0_);
Advance();
// Shrink the the string to our length.
isolate()->heap()->
new_space()->
ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,
count);
string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte);
return Token::STRING;
}
Token::Value JsonParser::ScanJsonString() {
ASSERT_EQ('"', c0_);
// Set string_val to null. If string_val is not set we assume an
// ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1.
string_val_ = Handle<String>::null();
Advance();
// Fast case for ascii only without escape characters.
while (c0_ != '"') {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {
Advance();
} else {
return SlowScanJsonString();
}
}
ASSERT_EQ('"', c0_);
// Advance past the last '"'.
Advance();
return Token::STRING;
}
Handle<String> JsonParser::GetString() {
return GetString(false);
}
Handle<String> JsonParser::GetSymbol() {
Handle<String> result = GetString(true);
if (result->IsSymbol()) return result;
return isolate()->factory()->LookupSymbol(result);
}
Handle<String> JsonParser::GetString(bool hint_symbol) {
// We have a non ascii string, return that.
if (!string_val_.is_null()) return string_val_;
if (is_sequential_ascii_ && hint_symbol) {
Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_);
// The current token includes the '"' in both ends.
int length = current_.end_pos - current_.beg_pos - 2;
return isolate()->factory()->LookupAsciiSymbol(seq_source_,
current_.beg_pos + 1,
length);
}
// The current token includes the '"' in both ends.
return isolate()->factory()->NewSubString(
source_, current_.beg_pos + 1, current_.end_pos - 1);
}
} } // namespace v8::internal

161
src/json-parser.h Normal file
View File

@ -0,0 +1,161 @@
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_JSON_PARSER_H_
#define V8_JSON_PARSER_H_
#include "token.h"
namespace v8 {
namespace internal {
// A simple json parser.
class JsonParser BASE_EMBEDDED {
public:
static Handle<Object> Parse(Handle<String> source) {
return JsonParser().ParseJson(source);
}
static const int kEndOfString = -1;
private:
// Parse a string containing a single JSON value.
Handle<Object> ParseJson(Handle<String> source);
inline void Advance() {
if (position_ >= source_length_) {
position_++;
c0_ = kEndOfString;
} else if (is_sequential_ascii_) {
position_++;
c0_ = seq_source_->SeqAsciiStringGet(position_);
} else {
position_++;
c0_ = source_->Get(position_);
}
}
inline Isolate* isolate() { return isolate_; }
// Get the string for the current string token.
Handle<String> GetString(bool hint_symbol);
Handle<String> GetString();
Handle<String> GetSymbol();
// Scan a single JSON token. The JSON lexical grammar is specified in the
// ECMAScript 5 standard, section 15.12.1.1.
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
// carriage-return, newline and space.
void ScanJson();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Token::Value ScanJsonString();
// Slow version for unicode support, uses the first ascii_count characters,
// as first part of a ConsString
Token::Value SlowScanJsonString();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
// It includes an optional minus sign, must have at least one
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Token::Value ScanJsonNumber();
// Used to recognizes one of the literals "true", "false", or "null". These
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal.
Handle<Object> ParseJsonValue();
// Parse a JSON object literal (grammar production JSONObject).
// An object literal is a squiggly-braced and comma separated sequence
// (possibly empty) of key/value pairs, where the key is a JSON string
// literal, the value is a JSON value, and the two are separated by a colon.
// A JSON array dosn't allow numbers and identifiers as keys, like a
// JavaScript array.
Handle<Object> ParseJsonObject();
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
// of JSON values.
// A JSON array doesn't allow leaving out values from the sequence, nor does
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
// Mark that a parsing error has happened at the current token, and
// return a null handle. Primarily for readability.
Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); }
// Peek at the next token.
Token::Value Peek() { return next_.token; }
// Scan the next token and return the token scanned on the last call.
Token::Value Next();
struct TokenInfo {
TokenInfo() : token(Token::ILLEGAL),
beg_pos(0),
end_pos(0) { }
Token::Value token;
int beg_pos;
int end_pos;
};
static const int kInitialSpecialStringSize = 100;
private:
Handle<String> source_;
int source_length_;
Handle<SeqAsciiString> seq_source_;
bool is_sequential_ascii_;
// Current and next token
TokenInfo current_;
TokenInfo next_;
Isolate* isolate_;
uc32 c0_;
int position_;
Handle<String> string_val_;
double number_;
};
} } // namespace v8::internal
#endif // V8_JSON_PARSER_H_

View File

@ -5509,8 +5509,16 @@ bool String::IsEqualTo(Vector<const char> str) {
bool String::IsAsciiEqualTo(Vector<const char> str) {
int slen = length();
if (str.length() != slen) return false;
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
if (this->IsSeqAsciiString()) {
SeqAsciiString* seq = SeqAsciiString::cast(this);
char* ch = seq->GetChars();
for (int i = 0; i < slen; i++, ch++) {
if (*ch != str[i]) return false;
}
} else {
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
}
}
return true;
}
@ -8760,6 +8768,71 @@ class AsciiSymbolKey : public SequentialSymbolKey<char> {
};
class SubStringAsciiSymbolKey : public HashTableKey {
public:
explicit SubStringAsciiSymbolKey(Handle<SeqAsciiString> string,
int from,
int length)
: string_(string), from_(from), length_(length) { }
uint32_t Hash() {
ASSERT(length_ >= 0);
ASSERT(from_ + length_ <= string_->length());
StringHasher hasher(length_);
// Very long strings have a trivial hash that doesn't inspect the
// string contents.
if (hasher.has_trivial_hash()) {
hash_field_ = hasher.GetHashField();
} else {
int i = 0;
// Do the iterative array index computation as long as there is a
// chance this is an array index.
while (i < length_ && hasher.is_array_index()) {
hasher.AddCharacter(static_cast<uc32>(
string_->SeqAsciiStringGet(i + from_)));
i++;
}
// Process the remaining characters without updating the array
// index.
while (i < length_) {
hasher.AddCharacterNoIndex(static_cast<uc32>(
string_->SeqAsciiStringGet(i + from_)));
i++;
}
hash_field_ = hasher.GetHashField();
}
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
return result;
}
uint32_t HashForObject(Object* other) {
return String::cast(other)->Hash();
}
bool IsMatch(Object* string) {
Vector<const char> chars(string_->GetChars() + from_, length_);
return String::cast(string)->IsAsciiEqualTo(chars);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
Vector<const char> chars(string_->GetChars() + from_, length_);
return HEAP->AllocateAsciiSymbol(chars, hash_field_);
}
private:
Handle<SeqAsciiString> string_;
int from_;
int length_;
uint32_t hash_field_;
};
class TwoByteSymbolKey : public SequentialSymbolKey<uc16> {
public:
explicit TwoByteSymbolKey(Vector<const uc16> str)
@ -9554,6 +9627,15 @@ MaybeObject* SymbolTable::LookupAsciiSymbol(Vector<const char> str,
}
MaybeObject* SymbolTable::LookupSubStringAsciiSymbol(Handle<SeqAsciiString> str,
int from,
int length,
Object** s) {
SubStringAsciiSymbolKey key(str, from, length);
return LookupKey(&key, s);
}
MaybeObject* SymbolTable::LookupTwoByteSymbol(Vector<const uc16> str,
Object** s) {
TwoByteSymbolKey key(str);

View File

@ -2451,6 +2451,8 @@ class SymbolTableShape {
static const int kEntrySize = 1;
};
class SeqAsciiString;
// SymbolTable.
//
// No special elements in the prefix and the element size is 1
@ -2464,6 +2466,11 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
MUST_USE_RESULT MaybeObject* LookupSymbol(Vector<const char> str, Object** s);
MUST_USE_RESULT MaybeObject* LookupAsciiSymbol(Vector<const char> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupSubStringAsciiSymbol(
Handle<SeqAsciiString> str,
int from,
int length,
Object** s);
MUST_USE_RESULT MaybeObject* LookupTwoByteSymbol(Vector<const uc16> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupString(String* key, Object** s);

View File

@ -4017,201 +4017,6 @@ Expression* Parser::NewThrowError(Handle<String> constructor,
scanner().location().beg_pos);
}
// ----------------------------------------------------------------------------
// JSON
Handle<Object> JsonParser::ParseJson(Handle<String> script,
UC16CharacterStream* source) {
scanner_.Initialize(source);
stack_overflow_ = false;
Handle<Object> result = ParseJsonValue();
if (result.is_null() || scanner_.Next() != Token::EOS) {
if (stack_overflow_) {
// Scanner failed.
isolate()->StackOverflow();
} else {
// Parse failed. Scanner's current token is the unexpected token.
Token::Value token = scanner_.current_token();
const char* message;
const char* name_opt = NULL;
switch (token) {
case Token::EOS:
message = "unexpected_eos";
break;
case Token::NUMBER:
message = "unexpected_token_number";
break;
case Token::STRING:
message = "unexpected_token_string";
break;
case Token::IDENTIFIER:
case Token::FUTURE_RESERVED_WORD:
message = "unexpected_token_identifier";
break;
default:
message = "unexpected_token";
name_opt = Token::String(token);
ASSERT(name_opt != NULL);
break;
}
Scanner::Location source_location = scanner_.location();
Factory* factory = isolate()->factory();
MessageLocation location(factory->NewScript(script),
source_location.beg_pos,
source_location.end_pos);
Handle<JSArray> array;
if (name_opt == NULL) {
array = factory->NewJSArray(0);
} else {
Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));
Handle<FixedArray> element = factory->NewFixedArray(1);
element->set(0, *name);
array = factory->NewJSArrayWithElements(element);
}
Handle<Object> result = factory->NewSyntaxError(message, array);
isolate()->Throw(*result, &location);
return Handle<Object>::null();
}
}
return result;
}
Handle<String> JsonParser::GetString() {
int literal_length = scanner_.literal_length();
if (literal_length == 0) {
return isolate()->factory()->empty_string();
}
if (scanner_.is_literal_ascii()) {
return isolate()->factory()->NewStringFromAscii(
scanner_.literal_ascii_string());
} else {
return isolate()->factory()->NewStringFromTwoByte(
scanner_.literal_uc16_string());
}
}
Handle<String> JsonParser::GetSymbol() {
int literal_length = scanner_.literal_length();
if (literal_length == 0) {
return isolate()->factory()->empty_string();
}
if (scanner_.is_literal_ascii()) {
return isolate()->factory()->LookupAsciiSymbol(
scanner_.literal_ascii_string());
} else {
return isolate()->factory()->LookupTwoByteSymbol(
scanner_.literal_uc16_string());
}
}
// Parse any JSON value.
Handle<Object> JsonParser::ParseJsonValue() {
Token::Value token = scanner_.Next();
switch (token) {
case Token::STRING:
return GetString();
case Token::NUMBER:
return isolate()->factory()->NewNumber(scanner_.number());
case Token::FALSE_LITERAL:
return isolate()->factory()->false_value();
case Token::TRUE_LITERAL:
return isolate()->factory()->true_value();
case Token::NULL_LITERAL:
return isolate()->factory()->null_value();
case Token::LBRACE:
return ParseJsonObject();
case Token::LBRACK:
return ParseJsonArray();
default:
return ReportUnexpectedToken();
}
}
// Parse a JSON object. Scanner must be right after '{' token.
Handle<Object> JsonParser::ParseJsonObject() {
Handle<JSFunction> object_constructor(
isolate()->global_context()->object_function());
Handle<JSObject> json_object =
isolate()->factory()->NewJSObject(object_constructor);
if (scanner_.peek() == Token::RBRACE) {
scanner_.Next();
} else {
if (StackLimitCheck(isolate()).HasOverflowed()) {
stack_overflow_ = true;
return Handle<Object>::null();
}
do {
if (scanner_.Next() != Token::STRING) {
return ReportUnexpectedToken();
}
Handle<String> key = GetSymbol();
if (scanner_.Next() != Token::COLON) {
return ReportUnexpectedToken();
}
Handle<Object> value = ParseJsonValue();
if (value.is_null()) return Handle<Object>::null();
uint32_t index;
if (key->AsArrayIndex(&index)) {
SetOwnElement(json_object, index, value, kNonStrictMode);
} else if (key->Equals(isolate()->heap()->Proto_symbol())) {
// We can't remove the __proto__ accessor since it's hardcoded
// in several places. Instead go along and add the value as
// the prototype of the created object if possible.
SetPrototype(json_object, value);
} else {
SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
}
} while (scanner_.Next() == Token::COMMA);
if (scanner_.current_token() != Token::RBRACE) {
return ReportUnexpectedToken();
}
}
return json_object;
}
// Parse a JSON array. Scanner must be right after '[' token.
Handle<Object> JsonParser::ParseJsonArray() {
ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
ZoneList<Handle<Object> > elements(4);
Token::Value token = scanner_.peek();
if (token == Token::RBRACK) {
scanner_.Next();
} else {
if (StackLimitCheck(isolate()).HasOverflowed()) {
stack_overflow_ = true;
return Handle<Object>::null();
}
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return Handle<Object>::null();
elements.Add(element);
token = scanner_.Next();
} while (token == Token::COMMA);
if (token != Token::RBRACK) {
return ReportUnexpectedToken();
}
}
// Allocate a fixed array with all the elements.
Handle<FixedArray> fast_elements =
isolate()->factory()->NewFixedArray(elements.length());
for (int i = 0, n = elements.length(); i < n; i++) {
fast_elements->set(i, *elements[i]);
}
return isolate()->factory()->NewJSArrayWithElements(fast_elements);
}
// ----------------------------------------------------------------------------
// Regular expressions

View File

@ -760,68 +760,6 @@ class CompileTimeValue: public AllStatic {
DISALLOW_IMPLICIT_CONSTRUCTORS(CompileTimeValue);
};
// ----------------------------------------------------------------------------
// JSON PARSING
// JSON is a subset of JavaScript, as specified in, e.g., the ECMAScript 5
// specification section 15.12.1 (and appendix A.8).
// The grammar is given section 15.12.1.2 (and appendix A.8.2).
class JsonParser BASE_EMBEDDED {
public:
// Parse JSON input as a single JSON value.
// Returns null handle and sets exception if parsing failed.
static Handle<Object> Parse(Handle<String> source) {
if (source->IsExternalTwoByteString()) {
ExternalTwoByteStringUC16CharacterStream stream(
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
return JsonParser().ParseJson(source, &stream);
} else {
GenericStringUC16CharacterStream stream(source, 0, source->length());
return JsonParser().ParseJson(source, &stream);
}
}
private:
JsonParser()
: isolate_(Isolate::Current()),
scanner_(isolate_->unicode_cache()) { }
~JsonParser() { }
Isolate* isolate() { return isolate_; }
// Parse a string containing a single JSON value.
Handle<Object> ParseJson(Handle<String> script, UC16CharacterStream* source);
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal.
Handle<Object> ParseJsonValue();
// Parse a JSON object literal (grammar production JSONObject).
// An object literal is a squiggly-braced and comma separated sequence
// (possibly empty) of key/value pairs, where the key is a JSON string
// literal, the value is a JSON value, and the two are separated by a colon.
// A JSON array dosn't allow numbers and identifiers as keys, like a
// JavaScript array.
Handle<Object> ParseJsonObject();
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
// of JSON values.
// A JSON array doesn't allow leaving out values from the sequence, nor does
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
// Mark that a parsing error has happened at the current token, and
// return a null handle. Primarily for readability.
Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); }
// Converts the currently parsed literal to a JavaScript String.
Handle<String> GetString();
// Converts the currently parsed literal to a JavaScript Symbol String.
Handle<String> GetSymbol();
Isolate* isolate_;
JsonScanner scanner_;
bool stack_overflow_;
};
} } // namespace v8::internal
#endif // V8_PARSER_H_

View File

@ -42,6 +42,7 @@
#include "execution.h"
#include "global-handles.h"
#include "jsregexp.h"
#include "json-parser.h"
#include "liveedit.h"
#include "liveobjectlist-inl.h"
#include "parser.h"

View File

@ -342,244 +342,4 @@ void V8JavaScriptScanner::Initialize(UC16CharacterStream* source) {
}
// ----------------------------------------------------------------------------
// JsonScanner
JsonScanner::JsonScanner(UnicodeCache* unicode_cache)
: Scanner(unicode_cache) { }
void JsonScanner::Initialize(UC16CharacterStream* source) {
source_ = source;
Init();
// Skip initial whitespace.
SkipJsonWhiteSpace();
// Preload first token as look-ahead.
ScanJson();
}
Token::Value JsonScanner::Next() {
// BUG 1215673: Find a thread safe way to set a stack limit in
// pre-parse mode. Otherwise, we cannot safely pre-parse from other
// threads.
current_ = next_;
// Check for stack-overflow before returning any tokens.
ScanJson();
return current_.token;
}
bool JsonScanner::SkipJsonWhiteSpace() {
int start_position = source_pos();
// JSON WhiteSpace is tab, carrige-return, newline and space.
while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
Advance();
}
return source_pos() != start_position;
}
void JsonScanner::ScanJson() {
next_.literal_chars = NULL;
Token::Value token;
do {
// Remember the position of the next token
next_.location.beg_pos = source_pos();
switch (c0_) {
case '\t':
case '\r':
case '\n':
case ' ':
Advance();
token = Token::WHITESPACE;
break;
case '{':
Advance();
token = Token::LBRACE;
break;
case '}':
Advance();
token = Token::RBRACE;
break;
case '[':
Advance();
token = Token::LBRACK;
break;
case ']':
Advance();
token = Token::RBRACK;
break;
case ':':
Advance();
token = Token::COLON;
break;
case ',':
Advance();
token = Token::COMMA;
break;
case '"':
token = ScanJsonString();
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
token = ScanJsonNumber();
break;
case 't':
token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
break;
case 'f':
token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
break;
case 'n':
token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
break;
default:
if (c0_ < 0) {
Advance();
token = Token::EOS;
} else {
Advance();
token = Select(Token::ILLEGAL);
}
}
} while (token == Token::WHITESPACE);
next_.location.end_pos = source_pos();
next_.token = token;
}
Token::Value JsonScanner::ScanJsonString() {
ASSERT_EQ('"', c0_);
Advance();
LiteralScope literal(this);
while (c0_ != '"') {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\') {
AddLiteralCharAdvance();
} else {
Advance();
switch (c0_) {
case '"':
case '\\':
case '/':
AddLiteralChar(c0_);
break;
case 'b':
AddLiteralChar('\x08');
break;
case 'f':
AddLiteralChar('\x0c');
break;
case 'n':
AddLiteralChar('\x0a');
break;
case 'r':
AddLiteralChar('\x0d');
break;
case 't':
AddLiteralChar('\x09');
break;
case 'u': {
uc32 value = 0;
for (int i = 0; i < 4; i++) {
Advance();
int digit = HexValue(c0_);
if (digit < 0) {
return Token::ILLEGAL;
}
value = value * 16 + digit;
}
AddLiteralChar(value);
break;
}
default:
return Token::ILLEGAL;
}
Advance();
}
}
literal.Complete();
Advance();
return Token::STRING;
}
Token::Value JsonScanner::ScanJsonNumber() {
LiteralScope literal(this);
bool negative = false;
if (c0_ == '-') {
AddLiteralCharAdvance();
negative = true;
}
if (c0_ == '0') {
AddLiteralCharAdvance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
} else {
int i = 0;
int digits = 0;
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
do {
i = i * 10 + c0_ - '0';
digits++;
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
number_ = (negative ? -i : i);
return Token::NUMBER;
}
}
if (c0_ == '.') {
AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (AsciiAlphaToLower(c0_) == 'e') {
AddLiteralCharAdvance();
if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
literal.Complete();
ASSERT_NOT_NULL(next_.literal_chars);
number_ = StringToDouble(unicode_cache_,
next_.literal_chars->ascii_literal(),
NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value());
return Token::NUMBER;
}
Token::Value JsonScanner::ScanJsonIdentifier(const char* text,
Token::Value token) {
LiteralScope literal(this);
while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL;
Advance();
text++;
}
if (unicode_cache_->IsIdentifierPart(c0_)) return Token::ILLEGAL;
literal.Complete();
return token;
}
} } // namespace v8::internal

View File

@ -141,56 +141,6 @@ class V8JavaScriptScanner : public JavaScriptScanner {
};
class JsonScanner : public Scanner {
public:
explicit JsonScanner(UnicodeCache* unicode_cache);
void Initialize(UC16CharacterStream* source);
// Returns the next token.
Token::Value Next();
// Returns the value of a number token.
double number() {
return number_;
}
protected:
// Skip past JSON whitespace (only space, tab, newline and carrige-return).
bool SkipJsonWhiteSpace();
// Scan a single JSON token. The JSON lexical grammar is specified in the
// ECMAScript 5 standard, section 15.12.1.1.
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
// carriage-return, newline and space.
void ScanJson();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
// It includes an optional minus sign, must have at least one
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Token::Value ScanJsonNumber();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Token::Value ScanJsonString();
// Used to recognizes one of the literals "true", "false", or "null". These
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
// Holds the value of a scanned number token.
double number_;
};
} } // namespace v8::internal
#endif // V8_SCANNER_H_