Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash
R=yangguo@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/11593007 Patch from Dan Carney <dcarney@google.com>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13242 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
362218a037
commit
9569b20db2
@ -98,12 +98,34 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool inline Heap::IsOneByte(Vector<const char> str, int chars) {
|
||||||
|
// TODO(dcarney): incorporate Latin-1 check when Latin-1 is supported?
|
||||||
|
// ASCII only check.
|
||||||
|
return chars == str.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool inline Heap::IsOneByte(String* str, int chars) {
|
||||||
|
return str->IsOneByteRepresentation();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
|
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
|
||||||
int chars,
|
int chars,
|
||||||
uint32_t hash_field) {
|
uint32_t hash_field) {
|
||||||
unibrow::Utf8InputBuffer<> buffer(str.start(),
|
if (IsOneByte(str, chars)) return AllocateAsciiSymbol(str, hash_field);
|
||||||
static_cast<unsigned>(str.length()));
|
return AllocateInternalSymbol<false>(str, chars, hash_field);
|
||||||
return AllocateInternalSymbol(&buffer, chars, hash_field);
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
MaybeObject* Heap::AllocateInternalSymbol(T t, int chars, uint32_t hash_field) {
|
||||||
|
if (IsOneByte(t, chars)) {
|
||||||
|
return AllocateInternalSymbol<true>(t, chars, hash_field);
|
||||||
|
}
|
||||||
|
return AllocateInternalSymbol<false>(t, chars, hash_field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
126
src/heap.cc
126
src/heap.cc
@ -3302,8 +3302,8 @@ static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {
|
|||||||
|
|
||||||
MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
|
MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
|
||||||
Heap* heap,
|
Heap* heap,
|
||||||
uint32_t c1,
|
uint16_t c1,
|
||||||
uint32_t c2) {
|
uint16_t c2) {
|
||||||
String* symbol;
|
String* symbol;
|
||||||
// Numeric strings have a different hash algorithm not known by
|
// Numeric strings have a different hash algorithm not known by
|
||||||
// LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
|
// LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
|
||||||
@ -3352,8 +3352,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
|
|||||||
// dictionary. Check whether we already have the string in the symbol
|
// dictionary. Check whether we already have the string in the symbol
|
||||||
// table to prevent creation of many unneccesary strings.
|
// table to prevent creation of many unneccesary strings.
|
||||||
if (length == 2) {
|
if (length == 2) {
|
||||||
unsigned c1 = first->Get(0);
|
uint16_t c1 = first->Get(0);
|
||||||
unsigned c2 = second->Get(0);
|
uint16_t c2 = second->Get(0);
|
||||||
return MakeOrFindTwoCharacterString(this, c1, c2);
|
return MakeOrFindTwoCharacterString(this, c1, c2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3467,8 +3467,8 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
|
|||||||
// Optimization for 2-byte strings often used as keys in a decompression
|
// Optimization for 2-byte strings often used as keys in a decompression
|
||||||
// dictionary. Check whether we already have the string in the symbol
|
// dictionary. Check whether we already have the string in the symbol
|
||||||
// table to prevent creation of many unneccesary strings.
|
// table to prevent creation of many unneccesary strings.
|
||||||
unsigned c1 = buffer->Get(start);
|
uint16_t c1 = buffer->Get(start);
|
||||||
unsigned c2 = buffer->Get(start + 1);
|
uint16_t c2 = buffer->Get(start + 1);
|
||||||
return MakeOrFindTwoCharacterString(this, c1, c2);
|
return MakeOrFindTwoCharacterString(this, c1, c2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4624,27 +4624,88 @@ Map* Heap::SymbolMapForString(String* string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
|
template<typename T>
|
||||||
|
class AllocateInternalSymbolHelper {
|
||||||
|
public:
|
||||||
|
static void WriteOneByteData(T t, char* chars, int len);
|
||||||
|
static void WriteTwoByteData(T t, uint16_t* chars, int len);
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class AllocateInternalSymbolHelper< Vector<const char> > {
|
||||||
|
public:
|
||||||
|
static inline void WriteOneByteData(Vector<const char> vector,
|
||||||
|
char* chars,
|
||||||
|
int len) {
|
||||||
|
// Only works for ascii.
|
||||||
|
ASSERT(vector.length() == len);
|
||||||
|
memcpy(chars, vector.start(), len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void WriteTwoByteData(Vector<const char> vector,
|
||||||
|
uint16_t* chars,
|
||||||
|
int len) {
|
||||||
|
const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
|
||||||
|
unsigned stream_length = vector.length();
|
||||||
|
while (stream_length != 0) {
|
||||||
|
unsigned consumed = 0;
|
||||||
|
uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
|
||||||
|
ASSERT(c != unibrow::Utf8::kBadChar);
|
||||||
|
ASSERT(consumed <= stream_length);
|
||||||
|
stream_length -= consumed;
|
||||||
|
stream += consumed;
|
||||||
|
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||||
|
len -= 2;
|
||||||
|
if (len < 0) break;
|
||||||
|
*chars++ = unibrow::Utf16::LeadSurrogate(c);
|
||||||
|
*chars++ = unibrow::Utf16::TrailSurrogate(c);
|
||||||
|
} else {
|
||||||
|
len -= 1;
|
||||||
|
if (len < 0) break;
|
||||||
|
*chars++ = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT(stream_length == 0);
|
||||||
|
ASSERT(len == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class AllocateInternalSymbolHelper<String*> {
|
||||||
|
public:
|
||||||
|
static inline void WriteOneByteData(String* s, char* chars, int len) {
|
||||||
|
ASSERT(s->length() == len);
|
||||||
|
String::WriteToFlat(s, chars, 0, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void WriteTwoByteData(String* s, uint16_t* chars, int len) {
|
||||||
|
ASSERT(s->length() == len);
|
||||||
|
String::WriteToFlat(s, chars, 0, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper<String*>);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<bool is_one_byte, typename T>
|
||||||
|
MaybeObject* Heap::AllocateInternalSymbol(T t,
|
||||||
int chars,
|
int chars,
|
||||||
uint32_t hash_field) {
|
uint32_t hash_field) {
|
||||||
|
typedef AllocateInternalSymbolHelper<T> H;
|
||||||
ASSERT(chars >= 0);
|
ASSERT(chars >= 0);
|
||||||
// Ensure the chars matches the number of characters in the buffer.
|
|
||||||
ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
|
|
||||||
// Determine whether the string is ASCII.
|
|
||||||
bool is_ascii = true;
|
|
||||||
while (buffer->has_more()) {
|
|
||||||
if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
|
|
||||||
is_ascii = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buffer->Rewind();
|
|
||||||
|
|
||||||
// Compute map and object size.
|
// Compute map and object size.
|
||||||
int size;
|
int size;
|
||||||
Map* map;
|
Map* map;
|
||||||
|
|
||||||
if (is_ascii) {
|
if (is_one_byte) {
|
||||||
if (chars > SeqOneByteString::kMaxLength) {
|
if (chars > SeqOneByteString::kMaxLength) {
|
||||||
return Failure::OutOfMemoryException();
|
return Failure::OutOfMemoryException();
|
||||||
}
|
}
|
||||||
@ -4674,21 +4735,26 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
|
|||||||
|
|
||||||
ASSERT_EQ(size, answer->Size());
|
ASSERT_EQ(size, answer->Size());
|
||||||
|
|
||||||
// Fill in the characters.
|
if (is_one_byte) {
|
||||||
int i = 0;
|
H::WriteOneByteData(t, SeqOneByteString::cast(answer)->GetChars(), chars);
|
||||||
while (i < chars) {
|
} else {
|
||||||
uint32_t character = buffer->GetNext();
|
H::WriteTwoByteData(t, SeqTwoByteString::cast(answer)->GetChars(), chars);
|
||||||
if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
|
||||||
answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
|
|
||||||
answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
|
|
||||||
} else {
|
|
||||||
answer->Set(i++, character);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Need explicit instantiations.
|
||||||
|
template
|
||||||
|
MaybeObject* Heap::AllocateInternalSymbol<true>(String*, int, uint32_t);
|
||||||
|
template
|
||||||
|
MaybeObject* Heap::AllocateInternalSymbol<false>(String*, int, uint32_t);
|
||||||
|
template
|
||||||
|
MaybeObject* Heap::AllocateInternalSymbol<false>(Vector<const char>,
|
||||||
|
int,
|
||||||
|
uint32_t);
|
||||||
|
|
||||||
|
|
||||||
MaybeObject* Heap::AllocateRawOneByteString(int length,
|
MaybeObject* Heap::AllocateRawOneByteString(int length,
|
||||||
PretenureFlag pretenure) {
|
PretenureFlag pretenure) {
|
||||||
if (length < 0 || length > SeqOneByteString::kMaxLength) {
|
if (length < 0 || length > SeqOneByteString::kMaxLength) {
|
||||||
|
14
src/heap.h
14
src/heap.h
@ -764,12 +764,16 @@ class Heap {
|
|||||||
Vector<const uc16> str,
|
Vector<const uc16> str,
|
||||||
uint32_t hash_field);
|
uint32_t hash_field);
|
||||||
|
|
||||||
MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
|
template<typename T>
|
||||||
unibrow::CharacterStream* buffer, int chars, uint32_t hash_field);
|
static inline bool IsOneByte(T t, int chars);
|
||||||
|
|
||||||
MUST_USE_RESULT MaybeObject* AllocateExternalSymbol(
|
template<typename T>
|
||||||
Vector<const char> str,
|
MUST_USE_RESULT inline MaybeObject* AllocateInternalSymbol(
|
||||||
int chars);
|
T t, int chars, uint32_t hash_field);
|
||||||
|
|
||||||
|
template<bool is_one_byte, typename T>
|
||||||
|
MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
|
||||||
|
T t, int chars, uint32_t hash_field);
|
||||||
|
|
||||||
// Allocates and partially initializes a String. There are two String
|
// Allocates and partially initializes a String. There are two String
|
||||||
// encodings: ASCII and two byte. These functions allocate a string of the
|
// encodings: ASCII and two byte. These functions allocate a string of the
|
||||||
|
@ -631,7 +631,17 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
|
|||||||
position_);
|
position_);
|
||||||
}
|
}
|
||||||
if (c0 < 0x20) return Handle<String>::null();
|
if (c0 < 0x20) return Handle<String>::null();
|
||||||
running_hash = StringHasher::AddCharacterCore(running_hash, c0);
|
if (static_cast<uint32_t>(c0) >
|
||||||
|
unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||||
|
running_hash =
|
||||||
|
StringHasher::AddCharacterCore(running_hash,
|
||||||
|
unibrow::Utf16::LeadSurrogate(c0));
|
||||||
|
running_hash =
|
||||||
|
StringHasher::AddCharacterCore(running_hash,
|
||||||
|
unibrow::Utf16::TrailSurrogate(c0));
|
||||||
|
} else {
|
||||||
|
running_hash = StringHasher::AddCharacterCore(running_hash, c0);
|
||||||
|
}
|
||||||
position++;
|
position++;
|
||||||
if (position >= source_length_) return Handle<String>::null();
|
if (position >= source_length_) return Handle<String>::null();
|
||||||
c0 = seq_source_->SeqOneByteStringGet(position);
|
c0 = seq_source_->SeqOneByteStringGet(position);
|
||||||
|
@ -2584,8 +2584,7 @@ void String::Visit(
|
|||||||
|
|
||||||
case kConsStringTag | kOneByteStringTag:
|
case kConsStringTag | kOneByteStringTag:
|
||||||
case kConsStringTag | kTwoByteStringTag:
|
case kConsStringTag | kTwoByteStringTag:
|
||||||
string = cons_op.Operate(ConsString::cast(string), &offset, &type,
|
string = cons_op.Operate(string, &offset, &type, &length);
|
||||||
&length);
|
|
||||||
if (string == NULL) return;
|
if (string == NULL) return;
|
||||||
slice_offset = offset;
|
slice_offset = offset;
|
||||||
ASSERT(length == static_cast<unsigned>(string->length()));
|
ASSERT(length == static_cast<unsigned>(string->length()));
|
||||||
@ -2777,6 +2776,11 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
String* ConsStringNullOp::Operate(String*, unsigned*, int32_t*, unsigned*) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
|
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
|
||||||
return depth & kDepthMask;
|
return depth & kDepthMask;
|
||||||
}
|
}
|
||||||
@ -2805,42 +2809,38 @@ void ConsStringIteratorOp::Pop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ConsStringIteratorOp::Reset() {
|
|
||||||
depth_ = 0;
|
|
||||||
maximum_depth_ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool ConsStringIteratorOp::HasMore() {
|
bool ConsStringIteratorOp::HasMore() {
|
||||||
return depth_ != 0;
|
return depth_ != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
|
void ConsStringIteratorOp::Reset() {
|
||||||
|
depth_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
String* ConsStringIteratorOp::ContinueOperation(int32_t* type_out,
|
||||||
|
unsigned* length_out) {
|
||||||
bool blew_stack;
|
bool blew_stack;
|
||||||
int32_t type;
|
String* string = NextLeaf(&blew_stack, type_out, length_out);
|
||||||
unsigned length;
|
|
||||||
String* string = NextLeaf(&blew_stack, &type, &length);
|
|
||||||
// String found.
|
// String found.
|
||||||
if (string != NULL) {
|
if (string != NULL) {
|
||||||
consumed_ += length;
|
// Verify output.
|
||||||
response->string_ = string;
|
ASSERT(*length_out == static_cast<unsigned>(string->length()));
|
||||||
response->offset_ = 0;
|
ASSERT(*type_out == string->map()->instance_type());
|
||||||
response->length_ = length;
|
return string;
|
||||||
response->type_ = type;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
// Traversal complete.
|
// Traversal complete.
|
||||||
if (!blew_stack) return false;
|
if (!blew_stack) return NULL;
|
||||||
// Restart search.
|
// Restart search from root.
|
||||||
Reset();
|
unsigned offset_out;
|
||||||
// TODO(dcarney) This is unnecessary.
|
string = Search(&offset_out, type_out, length_out);
|
||||||
// After a reset, we don't need a String::Visit
|
// Verify output.
|
||||||
response->string_ = root_;
|
ASSERT(string == NULL || offset_out == 0);
|
||||||
response->offset_ = consumed_;
|
ASSERT(string == NULL ||
|
||||||
response->length_ = root_length_;
|
*length_out == static_cast<unsigned>(string->length()));
|
||||||
response->type_ = root_type_;
|
ASSERT(string == NULL || *type_out == string->map()->instance_type());
|
||||||
return true;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2857,18 +2857,24 @@ StringCharacterStream::StringCharacterStream(
|
|||||||
end_(NULL),
|
end_(NULL),
|
||||||
op_(op) {
|
op_(op) {
|
||||||
op->Reset();
|
op->Reset();
|
||||||
String::Visit(string,
|
int32_t type = string->map()->instance_type();
|
||||||
offset, *this, *op, string->map()->instance_type(), string->length());
|
unsigned length = string->length();
|
||||||
|
String::Visit(string, offset, *this, *op, type, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool StringCharacterStream::HasMore() {
|
bool StringCharacterStream::HasMore() {
|
||||||
if (buffer8_ != end_) return true;
|
if (buffer8_ != end_) return true;
|
||||||
if (!op_->HasMore()) return false;
|
if (!op_->HasMore()) return false;
|
||||||
ConsStringIteratorOp::ContinueResponse response;
|
unsigned length;
|
||||||
if (!op_->ContinueOperation(&response)) return false;
|
int32_t type;
|
||||||
String::Visit(response.string_,
|
String* string = op_->ContinueOperation(&type, &length);
|
||||||
response.offset_, *this, *op_, response.type_, response.length_);
|
if (string == NULL) return false;
|
||||||
|
ASSERT(!string->IsConsString());
|
||||||
|
ASSERT(string->length() != 0);
|
||||||
|
ConsStringNullOp null_op;
|
||||||
|
String::Visit(string, 0, *this, null_op, type, length);
|
||||||
|
ASSERT(buffer8_ != end_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5138,7 +5144,7 @@ bool StringHasher::has_trivial_hash() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint32_t c) {
|
uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint16_t c) {
|
||||||
running_hash += c;
|
running_hash += c;
|
||||||
running_hash += (running_hash << 10);
|
running_hash += (running_hash << 10);
|
||||||
running_hash ^= (running_hash >> 6);
|
running_hash ^= (running_hash >> 6);
|
||||||
@ -5157,66 +5163,62 @@ uint32_t StringHasher::GetHashCore(uint32_t running_hash) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void StringHasher::AddCharacter(uint32_t c) {
|
void StringHasher::AddCharacter(uint16_t c) {
|
||||||
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
|
||||||
AddSurrogatePair(c); // Not inlined.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Use the Jenkins one-at-a-time hash function to update the hash
|
// Use the Jenkins one-at-a-time hash function to update the hash
|
||||||
// for the given character.
|
// for the given character.
|
||||||
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
|
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
|
||||||
// Incremental array index computation.
|
}
|
||||||
if (is_array_index_) {
|
|
||||||
if (c < '0' || c > '9') {
|
|
||||||
|
bool StringHasher::UpdateIndex(uint16_t c) {
|
||||||
|
ASSERT(is_array_index_);
|
||||||
|
if (c < '0' || c > '9') {
|
||||||
|
is_array_index_ = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int d = c - '0';
|
||||||
|
if (is_first_char_) {
|
||||||
|
is_first_char_ = false;
|
||||||
|
if (c == '0' && length_ > 1) {
|
||||||
is_array_index_ = false;
|
is_array_index_ = false;
|
||||||
} else {
|
return false;
|
||||||
int d = c - '0';
|
}
|
||||||
if (is_first_char_) {
|
}
|
||||||
is_first_char_ = false;
|
if (array_index_ > 429496729U - ((d + 2) >> 3)) {
|
||||||
if (c == '0' && length_ > 1) {
|
is_array_index_ = false;
|
||||||
is_array_index_ = false;
|
return false;
|
||||||
return;
|
}
|
||||||
}
|
array_index_ = array_index_ * 10 + d;
|
||||||
}
|
return true;
|
||||||
if (array_index_ > 429496729U - ((d + 2) >> 3)) {
|
}
|
||||||
is_array_index_ = false;
|
|
||||||
} else {
|
|
||||||
array_index_ = array_index_ * 10 + d;
|
template<typename Char>
|
||||||
|
inline void StringHasher::AddCharacters(const Char* chars, int length) {
|
||||||
|
ASSERT(sizeof(Char) == 1 || sizeof(Char) == 2);
|
||||||
|
int i = 0;
|
||||||
|
if (is_array_index_) {
|
||||||
|
for (; i < length; i++) {
|
||||||
|
AddCharacter(chars[i]);
|
||||||
|
if (!UpdateIndex(chars[i])) {
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
for (; i < length; i++) {
|
||||||
|
ASSERT(!is_array_index_);
|
||||||
|
AddCharacter(chars[i]);
|
||||||
void StringHasher::AddCharacterNoIndex(uint32_t c) {
|
|
||||||
ASSERT(!is_array_index());
|
|
||||||
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
|
||||||
AddSurrogatePairNoIndex(c); // Not inlined.
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
uint32_t StringHasher::GetHash() {
|
|
||||||
// Get the calculated raw hash value and do some more bit ops to distribute
|
|
||||||
// the hash further. Ensure that we never return zero as the hash value.
|
|
||||||
return GetHashCore(raw_running_hash_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename schar>
|
template <typename schar>
|
||||||
uint32_t HashSequentialString(const schar* chars, int length, uint32_t seed) {
|
uint32_t StringHasher::HashSequentialString(const schar* chars,
|
||||||
|
int length,
|
||||||
|
uint32_t seed) {
|
||||||
StringHasher hasher(length, seed);
|
StringHasher hasher(length, seed);
|
||||||
if (!hasher.has_trivial_hash()) {
|
if (!hasher.has_trivial_hash()) hasher.AddCharacters(chars, length);
|
||||||
int i;
|
|
||||||
for (i = 0; hasher.is_array_index() && (i < length); i++) {
|
|
||||||
hasher.AddCharacter(chars[i]);
|
|
||||||
}
|
|
||||||
for (; i < length; i++) {
|
|
||||||
hasher.AddCharacterNoIndex(chars[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hasher.GetHashField();
|
return hasher.GetHashField();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
257
src/objects.cc
257
src/objects.cc
@ -7029,24 +7029,36 @@ void StringInputBuffer::Seek(unsigned pos) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String* ConsStringIteratorOp::Operate(ConsString* cons_string,
|
String* ConsStringIteratorOp::Operate(String* string,
|
||||||
unsigned* offset_out, int32_t* type_out, unsigned* length_out) {
|
unsigned* offset_out,
|
||||||
ASSERT(*length_out == (unsigned)cons_string->length());
|
int32_t* type_out,
|
||||||
ASSERT(depth_ == 0);
|
unsigned* length_out) {
|
||||||
// Push the root string.
|
ASSERT(string->IsConsString());
|
||||||
PushLeft(cons_string);
|
ConsString* cons_string = ConsString::cast(string);
|
||||||
|
// Set up search data.
|
||||||
root_ = cons_string;
|
root_ = cons_string;
|
||||||
root_type_ = *type_out;
|
|
||||||
root_length_ = *length_out;
|
|
||||||
consumed_ = *offset_out;
|
consumed_ = *offset_out;
|
||||||
unsigned targetOffset = *offset_out;
|
// Now search.
|
||||||
|
return Search(offset_out, type_out, length_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
String* ConsStringIteratorOp::Search(unsigned* offset_out,
|
||||||
|
int32_t* type_out,
|
||||||
|
unsigned* length_out) {
|
||||||
|
ConsString* cons_string = root_;
|
||||||
|
// Reset the stack, pushing the root string.
|
||||||
|
depth_ = 1;
|
||||||
|
maximum_depth_ = 1;
|
||||||
|
frames_[0] = cons_string;
|
||||||
|
const unsigned consumed = consumed_;
|
||||||
unsigned offset = 0;
|
unsigned offset = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
// Loop until the string is found which contains the target offset.
|
// Loop until the string is found which contains the target offset.
|
||||||
String* string = cons_string->first();
|
String* string = cons_string->first();
|
||||||
unsigned length = string->length();
|
unsigned length = string->length();
|
||||||
int32_t type;
|
int32_t type;
|
||||||
if (targetOffset < offset + length) {
|
if (consumed < offset + length) {
|
||||||
// Target offset is in the left branch.
|
// Target offset is in the left branch.
|
||||||
// Keep going if we're still in a ConString.
|
// Keep going if we're still in a ConString.
|
||||||
type = string->map()->instance_type();
|
type = string->map()->instance_type();
|
||||||
@ -7075,6 +7087,7 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
|
|||||||
// Account for the possibility of an empty right leaf.
|
// Account for the possibility of an empty right leaf.
|
||||||
// This happens only if we have asked for an offset outside the string.
|
// This happens only if we have asked for an offset outside the string.
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
|
// Reset depth so future operations will return null immediately.
|
||||||
Reset();
|
Reset();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -7085,9 +7098,8 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
|
|||||||
}
|
}
|
||||||
ASSERT(length != 0);
|
ASSERT(length != 0);
|
||||||
// Adjust return values and exit.
|
// Adjust return values and exit.
|
||||||
unsigned innerOffset = targetOffset - offset;
|
consumed_ = offset + length;
|
||||||
consumed_ += length - innerOffset;
|
*offset_out = consumed - offset;
|
||||||
*offset_out = innerOffset;
|
|
||||||
*type_out = type;
|
*type_out = type;
|
||||||
*length_out = length;
|
*length_out = length;
|
||||||
return string;
|
return string;
|
||||||
@ -7097,8 +7109,9 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String* ConsStringIteratorOp::NextLeaf(
|
String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,
|
||||||
bool* blew_stack, int32_t* type_out, unsigned* length_out) {
|
int32_t* type_out,
|
||||||
|
unsigned* length_out) {
|
||||||
while (true) {
|
while (true) {
|
||||||
// Tree traversal complete.
|
// Tree traversal complete.
|
||||||
if (depth_ == 0) {
|
if (depth_ == 0) {
|
||||||
@ -7122,6 +7135,7 @@ String* ConsStringIteratorOp::NextLeaf(
|
|||||||
if (length == 0) continue;
|
if (length == 0) continue;
|
||||||
*length_out = length;
|
*length_out = length;
|
||||||
*type_out = type;
|
*type_out = type;
|
||||||
|
consumed_ += length;
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
cons_string = ConsString::cast(string);
|
cons_string = ConsString::cast(string);
|
||||||
@ -7134,8 +7148,11 @@ String* ConsStringIteratorOp::NextLeaf(
|
|||||||
type = string->map()->instance_type();
|
type = string->map()->instance_type();
|
||||||
if ((type & kStringRepresentationMask) != kConsStringTag) {
|
if ((type & kStringRepresentationMask) != kConsStringTag) {
|
||||||
AdjustMaximumDepth();
|
AdjustMaximumDepth();
|
||||||
|
unsigned length = (unsigned) string->length();
|
||||||
|
ASSERT(length != 0);
|
||||||
|
*length_out = length;
|
||||||
*type_out = type;
|
*type_out = type;
|
||||||
*length_out = string->length();
|
consumed_ += length;
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
cons_string = ConsString::cast(string);
|
cons_string = ConsString::cast(string);
|
||||||
@ -7674,28 +7691,62 @@ bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class IteratingStringHasher: public StringHasher {
|
||||||
|
public:
|
||||||
|
static inline uint32_t Hash(String* string, uint32_t seed) {
|
||||||
|
const unsigned len = static_cast<unsigned>(string->length());
|
||||||
|
IteratingStringHasher hasher(len, seed);
|
||||||
|
if (hasher.has_trivial_hash()) {
|
||||||
|
return hasher.GetHashField();
|
||||||
|
}
|
||||||
|
int32_t type = string->map()->instance_type();
|
||||||
|
ConsStringNullOp null_op;
|
||||||
|
String::Visit(string, 0, hasher, null_op, type, len);
|
||||||
|
// Flat strings terminate immediately.
|
||||||
|
if (hasher.consumed_ == len) {
|
||||||
|
ASSERT(!string->IsConsString());
|
||||||
|
return hasher.GetHashField();
|
||||||
|
}
|
||||||
|
ASSERT(string->IsConsString());
|
||||||
|
// This is a ConsString, iterate across it.
|
||||||
|
ConsStringIteratorOp op;
|
||||||
|
unsigned offset = 0;
|
||||||
|
unsigned leaf_length = len;
|
||||||
|
string = op.Operate(string, &offset, &type, &leaf_length);
|
||||||
|
while (true) {
|
||||||
|
ASSERT(hasher.consumed_ < len);
|
||||||
|
String::Visit(string, 0, hasher, null_op, type, leaf_length);
|
||||||
|
if (hasher.consumed_ == len) break;
|
||||||
|
string = op.ContinueOperation(&type, &leaf_length);
|
||||||
|
// This should be taken care of by the length check.
|
||||||
|
ASSERT(string != NULL);
|
||||||
|
}
|
||||||
|
return hasher.GetHashField();
|
||||||
|
}
|
||||||
|
inline void VisitOneByteString(const uint8_t* chars, unsigned length) {
|
||||||
|
AddCharacters(chars, static_cast<int>(length));
|
||||||
|
consumed_ += length;
|
||||||
|
}
|
||||||
|
inline void VisitTwoByteString(const uint16_t* chars, unsigned length) {
|
||||||
|
AddCharacters(chars, static_cast<int>(length));
|
||||||
|
consumed_ += length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
inline IteratingStringHasher(int len, uint32_t seed)
|
||||||
|
: StringHasher(len, seed),
|
||||||
|
consumed_(0) {}
|
||||||
|
unsigned consumed_;
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(IteratingStringHasher);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
uint32_t String::ComputeAndSetHash() {
|
uint32_t String::ComputeAndSetHash() {
|
||||||
// Should only be called if hash code has not yet been computed.
|
// Should only be called if hash code has not yet been computed.
|
||||||
ASSERT(!HasHashCode());
|
ASSERT(!HasHashCode());
|
||||||
|
|
||||||
const int len = length();
|
|
||||||
|
|
||||||
// Compute the hash code.
|
|
||||||
uint32_t field = 0;
|
|
||||||
if (StringShape(this).IsSequentialAscii()) {
|
|
||||||
field = HashSequentialString(SeqOneByteString::cast(this)->GetChars(),
|
|
||||||
len,
|
|
||||||
GetHeap()->HashSeed());
|
|
||||||
} else if (StringShape(this).IsSequentialTwoByte()) {
|
|
||||||
field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(),
|
|
||||||
len,
|
|
||||||
GetHeap()->HashSeed());
|
|
||||||
} else {
|
|
||||||
StringInputBuffer buffer(this);
|
|
||||||
field = ComputeHashField(&buffer, len, GetHeap()->HashSeed());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the hash code in the object.
|
// Store the hash code in the object.
|
||||||
|
uint32_t field = IteratingStringHasher::Hash(this, GetHeap()->HashSeed());
|
||||||
set_hash_field(field);
|
set_hash_field(field);
|
||||||
|
|
||||||
// Check the hash code is there.
|
// Check the hash code is there.
|
||||||
@ -7799,57 +7850,59 @@ uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void StringHasher::AddSurrogatePair(uc32 c) {
|
|
||||||
uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
|
||||||
AddCharacter(lead);
|
|
||||||
uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
|
||||||
AddCharacter(trail);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void StringHasher::AddSurrogatePairNoIndex(uc32 c) {
|
|
||||||
uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
|
||||||
AddCharacterNoIndex(lead);
|
|
||||||
uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
|
||||||
AddCharacterNoIndex(trail);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
uint32_t StringHasher::GetHashField() {
|
uint32_t StringHasher::GetHashField() {
|
||||||
if (length_ <= String::kMaxHashCalcLength) {
|
if (length_ <= String::kMaxHashCalcLength) {
|
||||||
if (is_array_index()) {
|
if (is_array_index_) {
|
||||||
return MakeArrayIndexHash(array_index(), length_);
|
return MakeArrayIndexHash(array_index_, length_);
|
||||||
}
|
}
|
||||||
return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask;
|
return (GetHashCore(raw_running_hash_) << String::kHashShift) |
|
||||||
|
String::kIsNotArrayIndexMask;
|
||||||
} else {
|
} else {
|
||||||
return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;
|
return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t String::ComputeHashField(unibrow::CharacterStream* buffer,
|
uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer,
|
||||||
int length,
|
int length,
|
||||||
uint32_t seed) {
|
uint32_t seed) {
|
||||||
|
typedef unibrow::Utf16 u;
|
||||||
StringHasher hasher(length, seed);
|
StringHasher hasher(length, seed);
|
||||||
|
|
||||||
// Very long strings have a trivial hash that doesn't inspect the
|
// Very long strings have a trivial hash that doesn't inspect the
|
||||||
// string contents.
|
// string contents.
|
||||||
if (hasher.has_trivial_hash()) {
|
if (hasher.has_trivial_hash()) {
|
||||||
return hasher.GetHashField();
|
return hasher.GetHashField();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do the iterative array index computation as long as there is a
|
// Do the iterative array index computation as long as there is a
|
||||||
// chance this is an array index.
|
// chance this is an array index.
|
||||||
while (buffer->has_more() && hasher.is_array_index()) {
|
if (hasher.is_array_index_) {
|
||||||
hasher.AddCharacter(buffer->GetNext());
|
while (buffer->has_more()) {
|
||||||
|
uint32_t c = buffer->GetNext();
|
||||||
|
if (c > u::kMaxNonSurrogateCharCode) {
|
||||||
|
uint16_t c1 = u::LeadSurrogate(c);
|
||||||
|
uint16_t c2 = u::TrailSurrogate(c);
|
||||||
|
hasher.AddCharacter(c1);
|
||||||
|
hasher.AddCharacter(c2);
|
||||||
|
if (!hasher.UpdateIndex(c1)) break;
|
||||||
|
if (!hasher.UpdateIndex(c2)) break;
|
||||||
|
} else {
|
||||||
|
hasher.AddCharacter(c);
|
||||||
|
if (!hasher.UpdateIndex(c)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the remaining characters without updating the array
|
// Process the remaining characters without updating the array
|
||||||
// index.
|
// index.
|
||||||
while (buffer->has_more()) {
|
while (buffer->has_more()) {
|
||||||
hasher.AddCharacterNoIndex(buffer->GetNext());
|
ASSERT(!hasher.is_array_index_);
|
||||||
|
uint32_t c = buffer->GetNext();
|
||||||
|
if (c > u::kMaxNonSurrogateCharCode) {
|
||||||
|
hasher.AddCharacter(u::LeadSurrogate(c));
|
||||||
|
hasher.AddCharacter(u::TrailSurrogate(c));
|
||||||
|
} else {
|
||||||
|
hasher.AddCharacter(c);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hasher.GetHashField();
|
return hasher.GetHashField();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -11667,7 +11720,7 @@ class Utf8SymbolKey : public HashTableKey {
|
|||||||
unibrow::Utf8InputBuffer<> buffer(string_.start(),
|
unibrow::Utf8InputBuffer<> buffer(string_.start(),
|
||||||
static_cast<unsigned>(string_.length()));
|
static_cast<unsigned>(string_.length()));
|
||||||
chars_ = buffer.Utf16Length();
|
chars_ = buffer.Utf16Length();
|
||||||
hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);
|
hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);
|
||||||
uint32_t result = hash_field_ >> String::kHashShift;
|
uint32_t result = hash_field_ >> String::kHashShift;
|
||||||
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
||||||
return result;
|
return result;
|
||||||
@ -11697,29 +11750,9 @@ class SequentialSymbolKey : public HashTableKey {
|
|||||||
: string_(string), hash_field_(0), seed_(seed) { }
|
: string_(string), hash_field_(0), seed_(seed) { }
|
||||||
|
|
||||||
uint32_t Hash() {
|
uint32_t Hash() {
|
||||||
StringHasher hasher(string_.length(), seed_);
|
hash_field_ = StringHasher::HashSequentialString<Char>(string_.start(),
|
||||||
|
string_.length(),
|
||||||
// Very long strings have a trivial hash that doesn't inspect the
|
seed_);
|
||||||
// string contents.
|
|
||||||
if (hasher.has_trivial_hash()) {
|
|
||||||
hash_field_ = hasher.GetHashField();
|
|
||||||
} else {
|
|
||||||
int i = 0;
|
|
||||||
// Do the iterative array index computation as long as there is a
|
|
||||||
// chance this is an array index.
|
|
||||||
while (i < string_.length() && hasher.is_array_index()) {
|
|
||||||
hasher.AddCharacter(static_cast<uc32>(string_[i]));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the remaining characters without updating the array
|
|
||||||
// index.
|
|
||||||
while (i < string_.length()) {
|
|
||||||
hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
hash_field_ = hasher.GetHashField();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t result = hash_field_ >> String::kHashShift;
|
uint32_t result = hash_field_ >> String::kHashShift;
|
||||||
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
||||||
@ -11764,32 +11797,9 @@ class SubStringAsciiSymbolKey : public HashTableKey {
|
|||||||
uint32_t Hash() {
|
uint32_t Hash() {
|
||||||
ASSERT(length_ >= 0);
|
ASSERT(length_ >= 0);
|
||||||
ASSERT(from_ + length_ <= string_->length());
|
ASSERT(from_ + length_ <= string_->length());
|
||||||
StringHasher hasher(length_, string_->GetHeap()->HashSeed());
|
char* chars = string_->GetChars() + from_;
|
||||||
|
hash_field_ = StringHasher::HashSequentialString(
|
||||||
// Very long strings have a trivial hash that doesn't inspect the
|
chars, length_, string_->GetHeap()->HashSeed());
|
||||||
// string contents.
|
|
||||||
if (hasher.has_trivial_hash()) {
|
|
||||||
hash_field_ = hasher.GetHashField();
|
|
||||||
} else {
|
|
||||||
int i = 0;
|
|
||||||
// Do the iterative array index computation as long as there is a
|
|
||||||
// chance this is an array index.
|
|
||||||
while (i < length_ && hasher.is_array_index()) {
|
|
||||||
hasher.AddCharacter(static_cast<uc32>(
|
|
||||||
string_->SeqOneByteStringGet(i + from_)));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the remaining characters without updating the array
|
|
||||||
// index.
|
|
||||||
while (i < length_) {
|
|
||||||
hasher.AddCharacterNoIndex(static_cast<uc32>(
|
|
||||||
string_->SeqOneByteStringGet(i + from_)));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
hash_field_ = hasher.GetHashField();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t result = hash_field_ >> String::kHashShift;
|
uint32_t result = hash_field_ >> String::kHashShift;
|
||||||
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
||||||
return result;
|
return result;
|
||||||
@ -11864,8 +11874,7 @@ class SymbolKey : public HashTableKey {
|
|||||||
return string_;
|
return string_;
|
||||||
}
|
}
|
||||||
// Otherwise allocate a new symbol.
|
// Otherwise allocate a new symbol.
|
||||||
StringInputBuffer buffer(string_);
|
return heap->AllocateInternalSymbol(string_,
|
||||||
return heap->AllocateInternalSymbol(&buffer,
|
|
||||||
string_->length(),
|
string_->length(),
|
||||||
string_->hash_field());
|
string_->hash_field());
|
||||||
}
|
}
|
||||||
@ -12635,7 +12644,7 @@ MaybeObject* SymbolTable::LookupString(String* string, Object** s) {
|
|||||||
// algorithm.
|
// algorithm.
|
||||||
class TwoCharHashTableKey : public HashTableKey {
|
class TwoCharHashTableKey : public HashTableKey {
|
||||||
public:
|
public:
|
||||||
TwoCharHashTableKey(uint32_t c1, uint32_t c2, uint32_t seed)
|
TwoCharHashTableKey(uint16_t c1, uint16_t c2, uint32_t seed)
|
||||||
: c1_(c1), c2_(c2) {
|
: c1_(c1), c2_(c2) {
|
||||||
// Char 1.
|
// Char 1.
|
||||||
uint32_t hash = seed;
|
uint32_t hash = seed;
|
||||||
@ -12651,17 +12660,17 @@ class TwoCharHashTableKey : public HashTableKey {
|
|||||||
hash ^= hash >> 11;
|
hash ^= hash >> 11;
|
||||||
hash += hash << 15;
|
hash += hash << 15;
|
||||||
if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;
|
if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;
|
||||||
|
hash_ = hash;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
StringHasher hasher(2, seed);
|
|
||||||
hasher.AddCharacter(c1);
|
|
||||||
hasher.AddCharacter(c2);
|
|
||||||
// If this assert fails then we failed to reproduce the two-character
|
// If this assert fails then we failed to reproduce the two-character
|
||||||
// version of the string hashing algorithm above. One reason could be
|
// version of the string hashing algorithm above. One reason could be
|
||||||
// that we were passed two digits as characters, since the hash
|
// that we were passed two digits as characters, since the hash
|
||||||
// algorithm is different in that case.
|
// algorithm is different in that case.
|
||||||
ASSERT_EQ(static_cast<int>(hasher.GetHash()), static_cast<int>(hash));
|
uint16_t chars[2] = {c1, c2};
|
||||||
|
uint32_t check_hash = StringHasher::HashSequentialString(chars, 2, seed);
|
||||||
|
hash = (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
|
||||||
|
ASSERT_EQ(static_cast<int32_t>(hash), static_cast<int32_t>(check_hash));
|
||||||
#endif
|
#endif
|
||||||
hash_ = hash;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsMatch(Object* o) {
|
bool IsMatch(Object* o) {
|
||||||
@ -12686,8 +12695,8 @@ class TwoCharHashTableKey : public HashTableKey {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t c1_;
|
uint16_t c1_;
|
||||||
uint32_t c2_;
|
uint16_t c2_;
|
||||||
uint32_t hash_;
|
uint32_t hash_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -12706,8 +12715,8 @@ bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool SymbolTable::LookupTwoCharsSymbolIfExists(uint32_t c1,
|
bool SymbolTable::LookupTwoCharsSymbolIfExists(uint16_t c1,
|
||||||
uint32_t c2,
|
uint16_t c2,
|
||||||
String** symbol) {
|
String** symbol) {
|
||||||
TwoCharHashTableKey key(c1, c2, GetHeap()->HashSeed());
|
TwoCharHashTableKey key(c1, c2, GetHeap()->HashSeed());
|
||||||
int entry = FindEntry(&key);
|
int entry = FindEntry(&key);
|
||||||
|
125
src/objects.h
125
src/objects.h
@ -3043,7 +3043,7 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
|
|||||||
// true if it is found, assigning the symbol to the given output
|
// true if it is found, assigning the symbol to the given output
|
||||||
// parameter.
|
// parameter.
|
||||||
bool LookupSymbolIfExists(String* str, String** symbol);
|
bool LookupSymbolIfExists(String* str, String** symbol);
|
||||||
bool LookupTwoCharsSymbolIfExists(uint32_t c1, uint32_t c2, String** symbol);
|
bool LookupTwoCharsSymbolIfExists(uint16_t c1, uint16_t c2, String** symbol);
|
||||||
|
|
||||||
// Casting.
|
// Casting.
|
||||||
static inline SymbolTable* cast(Object* obj);
|
static inline SymbolTable* cast(Object* obj);
|
||||||
@ -6929,30 +6929,14 @@ class StringHasher {
|
|||||||
public:
|
public:
|
||||||
explicit inline StringHasher(int length, uint32_t seed);
|
explicit inline StringHasher(int length, uint32_t seed);
|
||||||
|
|
||||||
// Returns true if the hash of this string can be computed without
|
template <typename schar>
|
||||||
// looking at the contents.
|
static inline uint32_t HashSequentialString(const schar* chars,
|
||||||
inline bool has_trivial_hash();
|
int length,
|
||||||
|
uint32_t seed);
|
||||||
|
|
||||||
// Add a character to the hash and update the array index calculation.
|
static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
|
||||||
inline void AddCharacter(uint32_t c);
|
int length,
|
||||||
|
uint32_t seed);
|
||||||
// Adds a character to the hash but does not update the array index
|
|
||||||
// calculation. This can only be called when it has been verified
|
|
||||||
// that the input is not an array index.
|
|
||||||
inline void AddCharacterNoIndex(uint32_t c);
|
|
||||||
|
|
||||||
// Add a character above 0xffff as a surrogate pair. These can get into
|
|
||||||
// the hasher through the routines that take a UTF-8 string and make a symbol.
|
|
||||||
void AddSurrogatePair(uc32 c);
|
|
||||||
void AddSurrogatePairNoIndex(uc32 c);
|
|
||||||
|
|
||||||
// Returns the value to store in the hash field of a string with
|
|
||||||
// the given length and contents.
|
|
||||||
uint32_t GetHashField();
|
|
||||||
|
|
||||||
// Returns true if the characters seen so far make up a legal array
|
|
||||||
// index.
|
|
||||||
bool is_array_index() { return is_array_index_; }
|
|
||||||
|
|
||||||
// Calculated hash value for a string consisting of 1 to
|
// Calculated hash value for a string consisting of 1 to
|
||||||
// String::kMaxArrayIndexSize digits with no leading zeros (except "0").
|
// String::kMaxArrayIndexSize digits with no leading zeros (except "0").
|
||||||
@ -6964,51 +6948,36 @@ class StringHasher {
|
|||||||
// use 27 instead.
|
// use 27 instead.
|
||||||
static const int kZeroHash = 27;
|
static const int kZeroHash = 27;
|
||||||
|
|
||||||
private:
|
|
||||||
uint32_t array_index() {
|
|
||||||
ASSERT(is_array_index());
|
|
||||||
return array_index_;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint32_t GetHash();
|
|
||||||
|
|
||||||
// Reusable parts of the hashing algorithm.
|
// Reusable parts of the hashing algorithm.
|
||||||
INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint32_t c));
|
INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint16_t c));
|
||||||
INLINE(static uint32_t GetHashCore(uint32_t running_hash));
|
INLINE(static uint32_t GetHashCore(uint32_t running_hash));
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// Returns the value to store in the hash field of a string with
|
||||||
|
// the given length and contents.
|
||||||
|
uint32_t GetHashField();
|
||||||
|
// Returns true if the hash of this string can be computed without
|
||||||
|
// looking at the contents.
|
||||||
|
inline bool has_trivial_hash();
|
||||||
|
// Adds a block of characters to the hash.
|
||||||
|
template<typename Char>
|
||||||
|
inline void AddCharacters(const Char* chars, int len);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Add a character to the hash.
|
||||||
|
inline void AddCharacter(uint16_t c);
|
||||||
|
// Update index. Returns true if string is still an index.
|
||||||
|
inline bool UpdateIndex(uint16_t c);
|
||||||
|
|
||||||
int length_;
|
int length_;
|
||||||
uint32_t raw_running_hash_;
|
uint32_t raw_running_hash_;
|
||||||
uint32_t array_index_;
|
uint32_t array_index_;
|
||||||
bool is_array_index_;
|
bool is_array_index_;
|
||||||
bool is_first_char_;
|
bool is_first_char_;
|
||||||
friend class TwoCharHashTableKey;
|
DISALLOW_COPY_AND_ASSIGN(StringHasher);
|
||||||
|
|
||||||
template <bool seq_ascii> friend class JsonParser;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class IncrementalAsciiStringHasher {
|
|
||||||
public:
|
|
||||||
explicit inline IncrementalAsciiStringHasher(uint32_t seed, char first_char);
|
|
||||||
inline void AddCharacter(uc32 c);
|
|
||||||
inline uint32_t GetHash();
|
|
||||||
|
|
||||||
private:
|
|
||||||
int length_;
|
|
||||||
uint32_t raw_running_hash_;
|
|
||||||
uint32_t array_index_;
|
|
||||||
bool is_array_index_;
|
|
||||||
char first_char_;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// Calculates string hash.
|
|
||||||
template <typename schar>
|
|
||||||
inline uint32_t HashSequentialString(const schar* chars,
|
|
||||||
int length,
|
|
||||||
uint32_t seed);
|
|
||||||
|
|
||||||
|
|
||||||
// The characteristics of a string are stored in its map. Retrieving these
|
// The characteristics of a string are stored in its map. Retrieving these
|
||||||
// few bits of information is moderately expensive, involving two memory
|
// few bits of information is moderately expensive, involving two memory
|
||||||
// loads where the second is dependent on the first. To improve efficiency
|
// loads where the second is dependent on the first. To improve efficiency
|
||||||
@ -7227,10 +7196,6 @@ class String: public HeapObject {
|
|||||||
// Returns a hash value used for the property table
|
// Returns a hash value used for the property table
|
||||||
inline uint32_t Hash();
|
inline uint32_t Hash();
|
||||||
|
|
||||||
static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
|
|
||||||
int length,
|
|
||||||
uint32_t seed);
|
|
||||||
|
|
||||||
static bool ComputeArrayIndex(unibrow::CharacterStream* buffer,
|
static bool ComputeArrayIndex(unibrow::CharacterStream* buffer,
|
||||||
uint32_t* index,
|
uint32_t* index,
|
||||||
int length);
|
int length);
|
||||||
@ -7870,22 +7835,30 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// A ConsStringOp that returns null.
|
||||||
|
// Useful when the operation to apply on a ConsString
|
||||||
|
// requires an expensive data structure.
|
||||||
|
class ConsStringNullOp {
|
||||||
|
public:
|
||||||
|
inline ConsStringNullOp() {}
|
||||||
|
static inline String* Operate(String*, unsigned*, int32_t*, unsigned*);
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(ConsStringNullOp);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// This maintains an off-stack representation of the stack frames required
|
// This maintains an off-stack representation of the stack frames required
|
||||||
// to traverse a ConsString, allowing an entirely iterative and restartable
|
// to traverse a ConsString, allowing an entirely iterative and restartable
|
||||||
// traversal of the entire string
|
// traversal of the entire string
|
||||||
// Note: this class is not GC-safe.
|
// Note: this class is not GC-safe.
|
||||||
class ConsStringIteratorOp {
|
class ConsStringIteratorOp {
|
||||||
public:
|
public:
|
||||||
struct ContinueResponse {
|
|
||||||
String* string_;
|
|
||||||
unsigned offset_;
|
|
||||||
unsigned length_;
|
|
||||||
int32_t type_;
|
|
||||||
};
|
|
||||||
inline ConsStringIteratorOp() {}
|
inline ConsStringIteratorOp() {}
|
||||||
String* Operate(ConsString* cons_string, unsigned* offset_out,
|
String* Operate(String* string,
|
||||||
int32_t* type_out, unsigned* length_out);
|
unsigned* offset_out,
|
||||||
inline bool ContinueOperation(ContinueResponse* response);
|
int32_t* type_out,
|
||||||
|
unsigned* length_out);
|
||||||
|
inline String* ContinueOperation(int32_t* type_out, unsigned* length_out);
|
||||||
inline void Reset();
|
inline void Reset();
|
||||||
inline bool HasMore();
|
inline bool HasMore();
|
||||||
|
|
||||||
@ -7902,6 +7875,9 @@ class ConsStringIteratorOp {
|
|||||||
inline void AdjustMaximumDepth();
|
inline void AdjustMaximumDepth();
|
||||||
inline void Pop();
|
inline void Pop();
|
||||||
String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out);
|
String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out);
|
||||||
|
String* Search(unsigned* offset_out,
|
||||||
|
int32_t* type_out,
|
||||||
|
unsigned* length_out);
|
||||||
|
|
||||||
unsigned depth_;
|
unsigned depth_;
|
||||||
unsigned maximum_depth_;
|
unsigned maximum_depth_;
|
||||||
@ -7910,8 +7886,6 @@ class ConsStringIteratorOp {
|
|||||||
ConsString* frames_[kStackSize];
|
ConsString* frames_[kStackSize];
|
||||||
unsigned consumed_;
|
unsigned consumed_;
|
||||||
ConsString* root_;
|
ConsString* root_;
|
||||||
int32_t root_type_;
|
|
||||||
unsigned root_length_;
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
|
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -7919,8 +7893,9 @@ class ConsStringIteratorOp {
|
|||||||
// Note: this class is not GC-safe.
|
// Note: this class is not GC-safe.
|
||||||
class StringCharacterStream {
|
class StringCharacterStream {
|
||||||
public:
|
public:
|
||||||
inline StringCharacterStream(
|
inline StringCharacterStream(String* string,
|
||||||
String* string, unsigned offset, ConsStringIteratorOp* op);
|
unsigned offset,
|
||||||
|
ConsStringIteratorOp* op);
|
||||||
inline uint16_t GetNext();
|
inline uint16_t GetNext();
|
||||||
inline bool HasMore();
|
inline bool HasMore();
|
||||||
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
|
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
|
||||||
|
@ -112,7 +112,7 @@ const char* StringsStorage::GetCopy(const char* src) {
|
|||||||
OS::StrNCpy(dst, src, len);
|
OS::StrNCpy(dst, src, len);
|
||||||
dst[len] = '\0';
|
dst[len] = '\0';
|
||||||
uint32_t hash =
|
uint32_t hash =
|
||||||
HashSequentialString(dst.start(), len, HEAP->HashSeed());
|
StringHasher::HashSequentialString(dst.start(), len, HEAP->HashSeed());
|
||||||
return AddOrDisposeString(dst.start(), hash);
|
return AddOrDisposeString(dst.start(), hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,7 +145,7 @@ const char* StringsStorage::GetVFormatted(const char* format, va_list args) {
|
|||||||
DeleteArray(str.start());
|
DeleteArray(str.start());
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
uint32_t hash = HashSequentialString(
|
uint32_t hash = StringHasher::HashSequentialString(
|
||||||
str.start(), len, HEAP->HashSeed());
|
str.start(), len, HEAP->HashSeed());
|
||||||
return AddOrDisposeString(str.start(), hash);
|
return AddOrDisposeString(str.start(), hash);
|
||||||
}
|
}
|
||||||
@ -156,8 +156,8 @@ const char* StringsStorage::GetName(String* name) {
|
|||||||
int length = Min(kMaxNameSize, name->length());
|
int length = Min(kMaxNameSize, name->length());
|
||||||
SmartArrayPointer<char> data =
|
SmartArrayPointer<char> data =
|
||||||
name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0, length);
|
name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0, length);
|
||||||
uint32_t hash =
|
uint32_t hash = StringHasher::HashSequentialString(
|
||||||
HashSequentialString(*data, length, name->GetHeap()->HashSeed());
|
*data, length, name->GetHeap()->HashSeed());
|
||||||
return AddOrDisposeString(data.Detach(), hash);
|
return AddOrDisposeString(data.Detach(), hash);
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
@ -1451,9 +1451,9 @@ void HeapObjectsMap::RemoveDeadEntries() {
|
|||||||
SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) {
|
SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) {
|
||||||
SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash());
|
SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash());
|
||||||
const char* label = info->GetLabel();
|
const char* label = info->GetLabel();
|
||||||
id ^= HashSequentialString(label,
|
id ^= StringHasher::HashSequentialString(label,
|
||||||
static_cast<int>(strlen(label)),
|
static_cast<int>(strlen(label)),
|
||||||
HEAP->HashSeed());
|
HEAP->HashSeed());
|
||||||
intptr_t element_count = info->GetElementCount();
|
intptr_t element_count = info->GetElementCount();
|
||||||
if (element_count != -1)
|
if (element_count != -1)
|
||||||
id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count),
|
id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count),
|
||||||
@ -2940,9 +2940,10 @@ class NativeGroupRetainedObjectInfo : public v8::RetainedObjectInfo {
|
|||||||
NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo(
|
NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo(
|
||||||
const char* label) {
|
const char* label) {
|
||||||
const char* label_copy = collection_->names()->GetCopy(label);
|
const char* label_copy = collection_->names()->GetCopy(label);
|
||||||
uint32_t hash = HashSequentialString(label_copy,
|
uint32_t hash = StringHasher::HashSequentialString(
|
||||||
static_cast<int>(strlen(label_copy)),
|
label_copy,
|
||||||
HEAP->HashSeed());
|
static_cast<int>(strlen(label_copy)),
|
||||||
|
HEAP->HashSeed());
|
||||||
HashMap::Entry* entry = native_groups_.Lookup(const_cast<char*>(label_copy),
|
HashMap::Entry* entry = native_groups_.Lookup(const_cast<char*>(label_copy),
|
||||||
hash, true);
|
hash, true);
|
||||||
if (entry->value == NULL) {
|
if (entry->value == NULL) {
|
||||||
|
@ -170,10 +170,6 @@ class Utf8 {
|
|||||||
// that match are coded as a 4 byte UTF-8 sequence.
|
// that match are coded as a 4 byte UTF-8 sequence.
|
||||||
static const unsigned kBytesSavedByCombiningSurrogates = 2;
|
static const unsigned kBytesSavedByCombiningSurrogates = 2;
|
||||||
static const unsigned kSizeOfUnmatchedSurrogate = 3;
|
static const unsigned kSizeOfUnmatchedSurrogate = 3;
|
||||||
|
|
||||||
private:
|
|
||||||
template <unsigned s> friend class Utf8InputBuffer;
|
|
||||||
friend class Test;
|
|
||||||
static inline uchar ValueOf(const byte* str,
|
static inline uchar ValueOf(const byte* str,
|
||||||
unsigned length,
|
unsigned length,
|
||||||
unsigned* cursor);
|
unsigned* cursor);
|
||||||
|
@ -345,37 +345,24 @@ void AccumulateStats(Handle<String> cons_string, ConsStringStats* stats) {
|
|||||||
|
|
||||||
void AccumulateStatsWithOperator(
|
void AccumulateStatsWithOperator(
|
||||||
ConsString* cons_string, ConsStringStats* stats) {
|
ConsString* cons_string, ConsStringStats* stats) {
|
||||||
// Init op.
|
unsigned offset = 0;
|
||||||
|
int32_t type = cons_string->map()->instance_type();
|
||||||
|
unsigned length = static_cast<unsigned>(cons_string->length());
|
||||||
ConsStringIteratorOp op;
|
ConsStringIteratorOp op;
|
||||||
op.Reset();
|
String* string = op.Operate(cons_string, &offset, &type, &length);
|
||||||
// Use response for initial search and on blown stack.
|
CHECK(string != NULL);
|
||||||
ConsStringIteratorOp::ContinueResponse response;
|
|
||||||
response.string_ = cons_string;
|
|
||||||
response.offset_ = 0;
|
|
||||||
response.type_ = cons_string->map()->instance_type();
|
|
||||||
response.length_ = (uint32_t) cons_string->length();
|
|
||||||
while (true) {
|
while (true) {
|
||||||
String* string = op.Operate(ConsString::cast(response.string_),
|
ASSERT(!string->IsConsString());
|
||||||
&response.offset_,
|
// Accumulate stats.
|
||||||
&response.type_,
|
stats->leaves_++;
|
||||||
&response.length_);
|
stats->chars_ += string->length();
|
||||||
CHECK(string != NULL);
|
// Check for completion.
|
||||||
while (true) {
|
bool keep_going_fast_check = op.HasMore();
|
||||||
// Accumulate stats.
|
string = op.ContinueOperation(&type, &length);
|
||||||
stats->leaves_++;
|
if (string == NULL) return;
|
||||||
stats->chars_ += string->length();
|
// Verify no false positives for fast check.
|
||||||
// Check for completion.
|
CHECK(keep_going_fast_check);
|
||||||
bool keep_going_fast_check = op.HasMore();
|
}
|
||||||
bool keep_going = op.ContinueOperation(&response);
|
|
||||||
if (!keep_going) return;
|
|
||||||
// Verify no false positives for fast check.
|
|
||||||
CHECK(keep_going_fast_check);
|
|
||||||
CHECK(response.string_ != NULL);
|
|
||||||
// Blew stack. Restart outer loop.
|
|
||||||
if (response.string_->IsConsString()) break;
|
|
||||||
string = response.string_;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user