[json] Make json parsing iterative

This avoids the need to throw range errors when we run out of stack, limiting
us only by available memory.

The main parser loop is implemented by two subloops.

The first subloop finishes whenever it generates primitive values, empty
arrays, or empty objects. If a non-empty object or array is started, the loop
continues to parse its first member.

The second subloop consumes produced values and either adds them to the parent
array or object, or returns it. The second loop finishes whenever a next value
needs to be produced. When the loop itself produces a finished array or object,
the loop continues.

Exceptions are handled by moving the cursor to end-of-input. Upon end-of-input,
the first loop sets the continuation to "kFail". That causes the second loop to
tear down continuation stack and related handle scopes, resulting in an empty
handle.

The CL additionally buffers all named properties and elements so we can
immediately allocate a correctly shaped object. For object elements we'll take
flat array or dictionary encoding depending on what is more efficient.

This means that element handles are now allocated in their parent HandleScope,
rather than having local handlescopes per-property (of big objects); which is
why I've adjusted the handle-count test to not allocate as many properties. In
the future it would be nice to not have to allocate (as many) handles since
almost everything in the JSON graph will survive JSON parsing...

Bug: chromium:710383
Change-Id: Ia3a7fd0ac260fb1c0e5f929276792b2f8e5fc0ca
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1609802
Reviewed-by: Hannes Payer <hpayer@chromium.org>
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61533}
This commit is contained in:
Toon Verwaest 2019-05-15 14:43:03 +02:00 committed by Commit Bot
parent c39cabbcbe
commit 4b60b40aa7
9 changed files with 775 additions and 522 deletions

View File

@ -29,14 +29,6 @@ const Handle<T> Handle<T>::cast(Handle<S> that) {
return Handle<T>(that.location_);
}
HandleScope::HandleScope(Isolate* isolate) {
HandleScopeData* data = isolate->handle_scope_data();
isolate_ = isolate;
prev_next_ = data->next;
prev_limit_ = data->limit;
data->level++;
}
template <typename T>
Handle<T>::Handle(T object, Isolate* isolate)
: HandleBase(object.ptr(), isolate) {}
@ -51,24 +43,45 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle) {
return os << Brief(*handle);
}
HandleScope::HandleScope(Isolate* isolate) {
HandleScopeData* data = isolate->handle_scope_data();
isolate_ = isolate;
prev_next_ = data->next;
prev_limit_ = data->limit;
data->level++;
}
HandleScope::HandleScope(HandleScope&& other) V8_NOEXCEPT
: isolate_(other.isolate_),
prev_next_(other.prev_next_),
prev_limit_(other.prev_limit_) {
other.isolate_ = nullptr;
}
HandleScope::~HandleScope() {
#ifdef DEBUG
if (FLAG_check_handle_count) {
int before = NumberOfHandles(isolate_);
CloseScope(isolate_, prev_next_, prev_limit_);
int after = NumberOfHandles(isolate_);
DCHECK_LT(after - before, kCheckHandleThreshold);
DCHECK_LT(before, kCheckHandleThreshold);
if (isolate_ == nullptr) return;
CloseScope(isolate_, prev_next_, prev_limit_);
}
HandleScope& HandleScope::operator=(HandleScope&& other) V8_NOEXCEPT {
if (isolate_ == nullptr) {
isolate_ = other.isolate_;
} else {
#endif // DEBUG
DCHECK_EQ(isolate_, other.isolate_);
CloseScope(isolate_, prev_next_, prev_limit_);
#ifdef DEBUG
}
#endif // DEBUG
prev_next_ = other.prev_next_;
prev_limit_ = other.prev_limit_;
other.isolate_ = nullptr;
return *this;
}
void HandleScope::CloseScope(Isolate* isolate, Address* prev_next,
Address* prev_limit) {
#ifdef DEBUG
int before = FLAG_check_handle_count ? NumberOfHandles(isolate) : 0;
#endif
DCHECK_NOT_NULL(isolate);
HandleScopeData* current = isolate->handle_scope_data();
std::swap(current->next, prev_next);
@ -86,6 +99,11 @@ void HandleScope::CloseScope(Isolate* isolate, Address* prev_next,
current->next,
static_cast<size_t>(reinterpret_cast<Address>(limit) -
reinterpret_cast<Address>(current->next)));
#ifdef DEBUG
int after = FLAG_check_handle_count ? NumberOfHandles(isolate) : 0;
DCHECK_LT(after - before, kCheckHandleThreshold);
DCHECK_LT(before, kCheckHandleThreshold);
#endif
}
template <typename T>

View File

@ -187,9 +187,12 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle);
class HandleScope {
public:
explicit inline HandleScope(Isolate* isolate);
inline HandleScope(HandleScope&& other) V8_NOEXCEPT;
inline ~HandleScope();
inline HandleScope& operator=(HandleScope&& other) V8_NOEXCEPT;
// Counts the number of allocated handles.
V8_EXPORT_PRIVATE static int NumberOfHandles(Isolate* isolate);

View File

@ -2917,7 +2917,7 @@ Handle<JSObject> Factory::NewJSObjectFromMap(
DCHECK(js_obj->HasFastElements() || js_obj->HasFixedTypedArrayElements() ||
js_obj->HasFastStringWrapperElements() ||
js_obj->HasFastArgumentsElements());
js_obj->HasFastArgumentsElements() || js_obj->HasDictionaryElements());
return js_obj;
}

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,6 @@
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects.h"
#include "src/parsing/literal-buffer.h"
#include "src/zone/zone-containers.h"
namespace v8 {
@ -16,6 +15,88 @@ namespace internal {
enum ParseElementResult { kElementFound, kElementNotFound };
class JsonString final {
public:
JsonString()
: start_(0),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(false) {}
explicit JsonString(uint32_t index)
: index_(index),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(true) {}
JsonString(int start, int length, bool needs_conversion,
bool needs_internalization, bool has_escape)
: start_(start),
length_(length),
needs_conversion_(needs_conversion),
internalize_(needs_internalization ||
length_ <= kMaxInternalizedStringValueLength),
has_escape_(has_escape),
is_index_(false) {}
bool internalize() const {
DCHECK(!is_index_);
return internalize_;
}
bool needs_conversion() const {
DCHECK(!is_index_);
return needs_conversion_;
}
bool has_escape() const {
DCHECK(!is_index_);
return has_escape_;
}
int start() const {
DCHECK(!is_index_);
return start_;
}
int length() const {
DCHECK(!is_index_);
return length_;
}
uint32_t index() const {
DCHECK(is_index_);
return index_;
}
bool is_index() const { return is_index_; }
private:
static const int kMaxInternalizedStringValueLength = 25;
union {
const int start_;
const uint32_t index_;
};
const int length_;
const bool needs_conversion_ : 1;
const bool internalize_ : 1;
const bool has_escape_ : 1;
const bool is_index_ : 1;
};
struct JsonProperty {
JsonProperty() { UNREACHABLE(); }
explicit JsonProperty(const JsonString& string) : string(string) {}
JsonString string;
Handle<Object> value;
};
class JsonParseInternalizer {
public:
static MaybeHandle<Object> Internalize(Isolate* isolate,
@ -37,7 +118,6 @@ class JsonParseInternalizer {
enum class JsonToken : uint8_t {
NUMBER,
NEGATIVE_NUMBER,
STRING,
LBRACE,
RBRACE,
@ -74,12 +154,25 @@ class JsonParser final {
static const int kEndOfString = -1;
private:
template <typename LiteralChar>
Handle<String> MakeString(bool requires_internalization,
const Vector<const LiteralChar>& chars);
struct JsonContinuation {
enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
JsonContinuation(Isolate* isolate, Type type, size_t index)
: scope(isolate),
type_(type),
index(static_cast<uint32_t>(index)),
max_index(0),
elements(0) {}
Handle<String> MakeString(bool requires_internalization, int offset,
int length);
Type type() const { return static_cast<Type>(type_); }
void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
HandleScope scope;
// Unfortunately GCC doesn't like packing Type in two bits.
uint32_t type_ : 2;
uint32_t index : 30;
uint32_t max_index;
uint32_t elements;
};
JsonParser(Isolate* isolate, Handle<String> source);
~JsonParser();
@ -164,9 +257,20 @@ class JsonParser final {
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Handle<String> ParseJsonString(bool requires_internalization,
Handle<String> expected = Handle<String>());
JsonString ScanJsonString(bool needs_internalization);
JsonString ScanJsonPropertyKey(JsonContinuation* cont);
uc32 ScanUnicodeCharacter();
Handle<String> MakeString(const JsonString& string,
Handle<String> hint = Handle<String>());
template <typename SinkChar>
void DecodeString(SinkChar* sink, int start, int length);
template <typename SinkChar>
Handle<String> DecodeString(
const JsonString& string,
Handle<typename CharTraits<SinkChar>::String> intermediate,
Handle<String> hint);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
@ -174,32 +278,19 @@ class JsonParser final {
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Handle<Object> ParseJsonNumber(int sign, const Char* start);
Handle<Object> ParseJsonNumber();
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal.
Handle<Object> ParseJsonValue();
MaybeHandle<Object> ParseJsonValue();
// Parse a JSON object literal (grammar production JSONObject).
// An object literal is a squiggly-braced and comma separated sequence
// (possibly empty) of key/value pairs, where the key is a JSON string
// literal, the value is a JSON value, and the two are separated by a colon.
// A JSON array doesn't allow numbers and identifiers as keys, like a
// JavaScript array.
Handle<Object> ParseJsonObject();
// Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
// as an element, not a property. Returns false if we should retry parsing the
// key as a non-element. (Returns true if it's an index or hits EOS).
bool ParseElement(Handle<JSObject> json_object);
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
// of JSON values.
// A JSON array doesn't allow leaving out values from the sequence, nor does
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
Handle<Object> BuildJsonObject(
const JsonContinuation& cont,
const std::vector<JsonProperty>& property_stack);
Handle<Object> BuildJsonArray(
const JsonContinuation& cont,
const std::vector<Handle<Object>>& element_stack);
// Mark that a parsing error has happened at the current character.
void ReportUnexpectedCharacter(uc32 c);
@ -231,14 +322,6 @@ class JsonParser final {
private:
static const bool kIsOneByte = sizeof(Char) == 1;
static const int kMaxInternalizedStringValueLength = 25;
// Casts |c| to uc32 avoiding LiteralBuffer::AddChar(char) in one-byte-strings
// with escapes that can result in two-byte strings.
void AddLiteralChar(uc32 c) { literal_buffer_.AddChar(c); }
void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
const Vector<const Handle<Object>>& properties);
bool is_at_end() const {
DCHECK_LE(cursor_, end_);
@ -248,7 +331,6 @@ class JsonParser final {
int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_;
Zone zone_;
const uint64_t hash_seed_;
JsonToken next_;
// Indicates whether the bytes underneath source_ can relocate during GC.
@ -265,11 +347,6 @@ class JsonParser final {
const Char* cursor_;
const Char* end_;
const Char* chars_;
LiteralBuffer literal_buffer_;
// Property handles are stored here inside ParseJsonObject.
ZoneVector<Handle<Object>> properties_;
};
// Explicit instantiation declarations.

View File

@ -63,7 +63,7 @@ LookupIterator LookupIterator::PropertyOrElement(
it.name_ = name;
return it;
}
return LookupIterator(receiver, name, holder, configuration);
return LookupIterator(isolate, receiver, name, holder, configuration);
}
LookupIterator LookupIterator::PropertyOrElement(

View File

@ -1008,6 +1008,17 @@ ACCESSORS(JSAsyncFromSyncIterator, next, Object, kNextOffset)
ACCESSORS(JSStringIterator, string, String, kStringOffset)
SMI_ACCESSORS(JSStringIterator, index, kNextIndexOffset)
// If the fast-case backing storage takes up much more memory than a dictionary
// backing storage would, the object should have slow elements.
// static
static inline bool ShouldConvertToSlowElements(uint32_t used_elements,
uint32_t new_capacity) {
uint32_t size_threshold = NumberDictionary::kPreferFastElementsSizeFactor *
NumberDictionary::ComputeCapacity(used_elements) *
NumberDictionary::kEntrySize;
return size_threshold <= new_capacity;
}
static inline bool ShouldConvertToSlowElements(JSObject object,
uint32_t capacity,
uint32_t index,
@ -1027,13 +1038,8 @@ static inline bool ShouldConvertToSlowElements(JSObject object,
ObjectInYoungGeneration(object))) {
return false;
}
// If the fast-case backing storage takes up much more memory than a
// dictionary backing storage would, the object should have slow elements.
int used_elements = object->GetFastElementsUsage();
uint32_t size_threshold = NumberDictionary::kPreferFastElementsSizeFactor *
NumberDictionary::ComputeCapacity(used_elements) *
NumberDictionary::kEntrySize;
return size_threshold <= *new_capacity;
return ShouldConvertToSlowElements(object->GetFastElementsUsage(),
*new_capacity);
}
} // namespace internal

View File

@ -30,4 +30,5 @@ for (var i = 0; i < 100000; i++) {
str = "[1," + str + "]";
}
assertThrows(function() { JSON.parse(str); }, RangeError);
// Make sure we don't overflow on very deeply nested JSON objects.
JSON.parse(str);

View File

@ -72,7 +72,7 @@ function generate(n) {
print("generating");
var str = generate(50000);
var str = generate(30000);
print("parsing " + str.length);
JSON.parse(str);