[json] Speed up two-byte json substring internalization
Inputs to JSON can be two-byte because payload strings can contain two-byte characters, without actually having any one-byte property key. Rather than eagerly converting the string to one-byte, we can perform a string-table lookup with a two-byte string key; only converting the result to one-byte if it's a new key. This speeds up json parsing of two-byte json from the Youtube benchmark by 20%. Change-Id: If6d4a37d331724f48b008aef8ec3e28d366cd038 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1619866 Commit-Queue: Toon Verwaest <verwaest@chromium.org> Reviewed-by: Igor Sheludko <ishell@chromium.org> Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Cr-Commit-Position: refs/heads/master@{#61680}
This commit is contained in:
parent
eca505afcf
commit
84841783e8
@ -736,8 +736,16 @@ Handle<String> Factory::InternalizeOneByteString(
|
||||
}
|
||||
|
||||
Handle<String> Factory::InternalizeTwoByteString(
|
||||
const Vector<const uc16>& string) {
|
||||
TwoByteStringKey key(string, HashSeed(isolate()));
|
||||
Handle<SeqTwoByteString> string, int from, int length,
|
||||
bool convert_to_one_byte) {
|
||||
SeqTwoByteSubStringKey key(isolate(), string, from, length,
|
||||
convert_to_one_byte);
|
||||
return InternalizeStringWithKey(&key);
|
||||
}
|
||||
|
||||
Handle<String> Factory::InternalizeTwoByteString(
|
||||
const Vector<const uc16>& string, bool convert_to_one_byte) {
|
||||
TwoByteStringKey key(string, HashSeed(isolate()), convert_to_one_byte);
|
||||
return InternalizeStringWithKey(&key);
|
||||
}
|
||||
|
||||
@ -924,22 +932,29 @@ Handle<SeqOneByteString> Factory::AllocateRawOneByteInternalizedString(
|
||||
|
||||
Handle<String> Factory::AllocateTwoByteInternalizedString(
|
||||
const Vector<const uc16>& str, uint32_t hash_field) {
|
||||
CHECK_GE(String::kMaxLength, str.length());
|
||||
DCHECK_NE(0, str.length()); // Use Heap::empty_string() instead.
|
||||
|
||||
Map map = *internalized_string_map();
|
||||
int size = SeqTwoByteString::SizeFor(str.length());
|
||||
HeapObject result =
|
||||
AllocateRawWithImmortalMap(size, AllocationType::kOld, map);
|
||||
Handle<SeqTwoByteString> answer(SeqTwoByteString::cast(result), isolate());
|
||||
answer->set_length(str.length());
|
||||
answer->set_hash_field(hash_field);
|
||||
DCHECK_EQ(size, answer->Size());
|
||||
Handle<SeqTwoByteString> result =
|
||||
AllocateRawTwoByteInternalizedString(str.length(), hash_field);
|
||||
DisallowHeapAllocation no_gc;
|
||||
|
||||
// Fill in the characters.
|
||||
MemCopy(answer->GetChars(no_gc), str.begin(), str.length() * kUC16Size);
|
||||
MemCopy(result->GetChars(no_gc), str.begin(), str.length() * kUC16Size);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Handle<SeqTwoByteString> Factory::AllocateRawTwoByteInternalizedString(
|
||||
int length, uint32_t hash_field) {
|
||||
CHECK_GE(String::kMaxLength, length);
|
||||
DCHECK_NE(0, length); // Use Heap::empty_string() instead.
|
||||
|
||||
Map map = *internalized_string_map();
|
||||
int size = SeqTwoByteString::SizeFor(length);
|
||||
HeapObject result =
|
||||
AllocateRawWithImmortalMap(size, AllocationType::kOld, map);
|
||||
Handle<SeqTwoByteString> answer(SeqTwoByteString::cast(result), isolate());
|
||||
answer->set_length(length);
|
||||
answer->set_hash_field(hash_field);
|
||||
DCHECK_EQ(size, result.Size());
|
||||
return answer;
|
||||
}
|
||||
|
||||
@ -991,17 +1006,6 @@ Handle<String> Factory::NewOneByteInternalizedString(
|
||||
return result;
|
||||
}
|
||||
|
||||
Handle<String> Factory::NewOneByteInternalizedSubString(
|
||||
Handle<SeqOneByteString> string, int offset, int length,
|
||||
uint32_t hash_field) {
|
||||
Handle<SeqOneByteString> result =
|
||||
AllocateRawOneByteInternalizedString(length, hash_field);
|
||||
DisallowHeapAllocation no_allocation;
|
||||
MemCopy(result->GetChars(no_allocation),
|
||||
string->GetChars(no_allocation) + offset, length);
|
||||
return result;
|
||||
}
|
||||
|
||||
Handle<String> Factory::NewTwoByteInternalizedString(
|
||||
const Vector<const uc16>& str, uint32_t hash_field) {
|
||||
return AllocateTwoByteInternalizedString(str, hash_field);
|
||||
|
@ -245,8 +245,12 @@ class V8_EXPORT_PRIVATE Factory {
|
||||
Handle<String> InternalizeOneByteString(const Vector<const uint8_t>& str);
|
||||
Handle<String> InternalizeOneByteString(Handle<SeqOneByteString>, int from,
|
||||
int length);
|
||||
Handle<String> InternalizeTwoByteString(Handle<SeqTwoByteString>, int from,
|
||||
int length,
|
||||
bool convert_to_one_byte = false);
|
||||
|
||||
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str);
|
||||
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str,
|
||||
bool convert_to_one_byte = false);
|
||||
|
||||
template <class StringTableKey>
|
||||
Handle<String> InternalizeStringWithKey(StringTableKey* key);
|
||||
@ -318,13 +322,15 @@ class V8_EXPORT_PRIVATE Factory {
|
||||
Handle<String> NewOneByteInternalizedString(const Vector<const uint8_t>& str,
|
||||
uint32_t hash_field);
|
||||
|
||||
Handle<String> NewOneByteInternalizedSubString(
|
||||
Handle<SeqOneByteString> string, int offset, int length,
|
||||
uint32_t hash_field);
|
||||
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
|
||||
int length, uint32_t hash_field);
|
||||
|
||||
Handle<String> NewTwoByteInternalizedString(const Vector<const uc16>& str,
|
||||
uint32_t hash_field);
|
||||
|
||||
Handle<SeqTwoByteString> AllocateRawTwoByteInternalizedString(
|
||||
int length, uint32_t hash_field);
|
||||
|
||||
Handle<String> NewInternalizedStringImpl(Handle<String> string, int chars,
|
||||
uint32_t hash_field);
|
||||
|
||||
@ -1068,9 +1074,6 @@ class V8_EXPORT_PRIVATE Factory {
|
||||
Handle<String> AllocateInternalizedStringImpl(T t, int chars,
|
||||
uint32_t hash_field);
|
||||
|
||||
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
|
||||
int length, uint32_t hash_field);
|
||||
|
||||
Handle<String> AllocateTwoByteInternalizedString(
|
||||
const Vector<const uc16>& str, uint32_t hash_field);
|
||||
|
||||
|
@ -964,15 +964,16 @@ namespace {
|
||||
|
||||
template <typename Char>
|
||||
bool Matches(const Vector<const Char>& chars, Handle<String> string) {
|
||||
if (string.is_null()) return false;
|
||||
DCHECK(!string.is_null());
|
||||
|
||||
// Only supports internalized strings in their canonical representation (one
|
||||
// byte encoded as two-byte will return false here).
|
||||
if ((sizeof(Char) == 1) != string->IsOneByteRepresentation()) return false;
|
||||
if (chars.length() != string->length()) return false;
|
||||
|
||||
DisallowHeapAllocation no_gc;
|
||||
const Char* string_data = string->GetChars<Char>(no_gc);
|
||||
if (string->IsOneByteRepresentation()) {
|
||||
const uint8_t* string_data = string->GetChars<uint8_t>(no_gc);
|
||||
return CompareChars(chars.begin(), string_data, chars.length()) == 0;
|
||||
}
|
||||
const uint16_t* string_data = string->GetChars<uint16_t>(no_gc);
|
||||
return CompareChars(chars.begin(), string_data, chars.length()) == 0;
|
||||
}
|
||||
|
||||
@ -991,7 +992,7 @@ Handle<String> JsonParser<Char>::DecodeString(
|
||||
DecodeString(dest, string.start(), string.length());
|
||||
} else {
|
||||
DCHECK_IMPLIES(string.internalize(), string.needs_conversion());
|
||||
i::CopyChars(dest, chars_ + string.start(), string.length());
|
||||
CopyChars(dest, chars_ + string.start(), string.length());
|
||||
}
|
||||
|
||||
Vector<const SinkChar> data(dest, string.length());
|
||||
@ -1013,13 +1014,6 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
|
||||
if (string.length() == 0) return factory()->empty_string();
|
||||
|
||||
if (sizeof(Char) == 1) {
|
||||
if (V8_UNLIKELY(string.needs_conversion())) {
|
||||
DCHECK(string.has_escape());
|
||||
Handle<SeqTwoByteString> intermediate =
|
||||
factory()->NewRawTwoByteString(string.length()).ToHandleChecked();
|
||||
return DecodeString<uint16_t>(string, intermediate, hint);
|
||||
}
|
||||
|
||||
if (string.internalize() && !string.has_escape()) {
|
||||
if (!hint.is_null()) {
|
||||
Vector<const Char> data(chars_ + string.start(), string.length());
|
||||
@ -1035,12 +1029,13 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
|
||||
Vector<const uint8_t>::cast(chars));
|
||||
}
|
||||
|
||||
Handle<SeqOneByteString> intermediate =
|
||||
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
|
||||
return DecodeString<uint8_t>(string, intermediate, hint);
|
||||
}
|
||||
if (V8_UNLIKELY(string.needs_conversion())) {
|
||||
DCHECK(string.has_escape());
|
||||
Handle<SeqTwoByteString> intermediate =
|
||||
factory()->NewRawTwoByteString(string.length()).ToHandleChecked();
|
||||
return DecodeString<uint16_t>(string, intermediate, hint);
|
||||
}
|
||||
|
||||
if (string.needs_conversion()) {
|
||||
Handle<SeqOneByteString> intermediate =
|
||||
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
|
||||
return DecodeString<uint8_t>(string, intermediate, hint);
|
||||
@ -1052,13 +1047,19 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
|
||||
if (Matches(data, hint)) return hint;
|
||||
}
|
||||
if (chars_may_relocate_) {
|
||||
Handle<String> substring = factory()->NewProperSubString(
|
||||
source_, string.start(), string.start() + string.length());
|
||||
return factory()->InternalizeString(substring);
|
||||
return factory()->InternalizeTwoByteString(
|
||||
Handle<SeqTwoByteString>::cast(source_), string.start(),
|
||||
string.length(), string.needs_conversion());
|
||||
}
|
||||
Vector<const Char> chars(chars_ + string.start(), string.length());
|
||||
return factory()->InternalizeTwoByteString(
|
||||
Vector<const uint16_t>::cast(chars));
|
||||
Vector<const uint16_t>::cast(chars), string.needs_conversion());
|
||||
}
|
||||
|
||||
if (string.needs_conversion()) {
|
||||
Handle<SeqOneByteString> intermediate =
|
||||
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
|
||||
return DecodeString<uint8_t>(string, intermediate, hint);
|
||||
}
|
||||
|
||||
Handle<SeqTwoByteString> intermediate =
|
||||
|
@ -6790,6 +6790,8 @@ template Handle<String> StringTable::LookupKey(Isolate* isolate,
|
||||
TwoByteStringKey* key);
|
||||
template Handle<String> StringTable::LookupKey(Isolate* isolate,
|
||||
SeqOneByteSubStringKey* key);
|
||||
template Handle<String> StringTable::LookupKey(Isolate* isolate,
|
||||
SeqTwoByteSubStringKey* key);
|
||||
|
||||
Handle<String> StringTable::AddKeyNoResize(Isolate* isolate,
|
||||
StringTableKey* key) {
|
||||
|
@ -197,13 +197,17 @@ Char FlatStringReader::Get(int index) {
|
||||
template <typename Char>
|
||||
class SequentialStringKey final : public StringTableKey {
|
||||
public:
|
||||
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed)
|
||||
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed,
|
||||
bool convert = false)
|
||||
: SequentialStringKey(StringHasher::HashSequentialString<Char>(
|
||||
chars.begin(), chars.length(), seed),
|
||||
chars) {}
|
||||
chars, convert) {}
|
||||
|
||||
SequentialStringKey(int hash, const Vector<const Char>& chars)
|
||||
: StringTableKey(hash, chars.length()), chars_(chars) {}
|
||||
SequentialStringKey(int hash, const Vector<const Char>& chars,
|
||||
bool convert = false)
|
||||
: StringTableKey(hash, chars.length()),
|
||||
chars_(chars),
|
||||
convert_(convert) {}
|
||||
|
||||
bool IsMatch(String s) override {
|
||||
DisallowHeapAllocation no_gc;
|
||||
@ -226,12 +230,14 @@ class SequentialStringKey final : public StringTableKey {
|
||||
|
||||
private:
|
||||
Vector<const Char> chars_;
|
||||
bool convert_;
|
||||
};
|
||||
|
||||
using OneByteStringKey = SequentialStringKey<uint8_t>;
|
||||
using TwoByteStringKey = SequentialStringKey<uint16_t>;
|
||||
|
||||
class SeqOneByteSubStringKey final : public StringTableKey {
|
||||
template <typename Char>
|
||||
class SeqSubStringKey final : public StringTableKey {
|
||||
public:
|
||||
// VS 2017 on official builds gives this spurious warning:
|
||||
// warning C4789: buffer 'key' of size 16 bytes will be overrun; 4 bytes will
|
||||
@ -241,9 +247,13 @@ class SeqOneByteSubStringKey final : public StringTableKey {
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4789)
|
||||
#endif
|
||||
SeqOneByteSubStringKey(Isolate* isolate, Handle<SeqOneByteString> string,
|
||||
int from, int len)
|
||||
: StringTableKey(0, len), string_(string), from_(from) {
|
||||
SeqSubStringKey(Isolate* isolate,
|
||||
Handle<typename CharTraits<Char>::String> string, int from,
|
||||
int len, bool convert = false)
|
||||
: StringTableKey(0, len),
|
||||
string_(string),
|
||||
from_(from),
|
||||
convert_(convert) {
|
||||
// We have to set the hash later.
|
||||
DisallowHeapAllocation no_gc;
|
||||
uint32_t hash = StringHasher::HashSequentialString(
|
||||
@ -252,7 +262,8 @@ class SeqOneByteSubStringKey final : public StringTableKey {
|
||||
|
||||
DCHECK_LE(0, length());
|
||||
DCHECK_LE(from_ + length(), string_->length());
|
||||
DCHECK(string_->IsSeqOneByteString());
|
||||
DCHECK_EQ(string_->IsSeqOneByteString(), sizeof(Char) == 1);
|
||||
DCHECK_EQ(string_->IsSeqTwoByteString(), sizeof(Char) == 2);
|
||||
}
|
||||
#if defined(V8_CC_MSVC)
|
||||
#pragma warning(pop)
|
||||
@ -270,15 +281,33 @@ class SeqOneByteSubStringKey final : public StringTableKey {
|
||||
}
|
||||
|
||||
Handle<String> AsHandle(Isolate* isolate) override {
|
||||
return isolate->factory()->NewOneByteInternalizedSubString(
|
||||
string_, from_, length(), hash_field());
|
||||
if (sizeof(Char) == 1 || (sizeof(Char) == 2 && convert_)) {
|
||||
Handle<SeqOneByteString> result =
|
||||
isolate->factory()->AllocateRawOneByteInternalizedString(
|
||||
length(), hash_field());
|
||||
DisallowHeapAllocation no_gc;
|
||||
CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
|
||||
length());
|
||||
return result;
|
||||
}
|
||||
Handle<SeqTwoByteString> result =
|
||||
isolate->factory()->AllocateRawTwoByteInternalizedString(length(),
|
||||
hash_field());
|
||||
DisallowHeapAllocation no_gc;
|
||||
CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
|
||||
length());
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
Handle<SeqOneByteString> string_;
|
||||
Handle<typename CharTraits<Char>::String> string_;
|
||||
int from_;
|
||||
bool convert_;
|
||||
};
|
||||
|
||||
using SeqOneByteSubStringKey = SeqSubStringKey<uint8_t>;
|
||||
using SeqTwoByteSubStringKey = SeqSubStringKey<uint16_t>;
|
||||
|
||||
bool String::Equals(String other) {
|
||||
if (other == *this) return true;
|
||||
if (this->IsInternalizedString() && other->IsInternalizedString()) {
|
||||
|
Loading…
Reference in New Issue
Block a user