Some string optimizations:

* Faster hashing for sequential strings.

 * When adding short external two-byte strings try to convert them
   back to ascii. Chances are high the embedder uses two-byte
   representation even for ascii strings. This optimization saves
   memory and makes hashing faster.

Review URL: http://codereview.chromium.org/1444001

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4300 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
vitalyr@chromium.org 2010-03-26 23:33:37 +00:00
parent a25312a53e
commit 9117dfcff9
5 changed files with 67 additions and 4 deletions

View File

@ -1961,8 +1961,9 @@ Object* Heap::AllocateConsString(String* first, String* second) {
return MakeOrFindTwoCharacterString(c1, c2);
}
bool is_ascii = first->IsAsciiRepresentation()
&& second->IsAsciiRepresentation();
bool first_is_ascii = first->IsAsciiRepresentation();
bool second_is_ascii = second->IsAsciiRepresentation();
bool is_ascii = first_is_ascii && second_is_ascii;
// Make sure that an out of memory exception is thrown if the length
// of the new cons string is too large.
@ -1997,6 +1998,25 @@ Object* Heap::AllocateConsString(String* first, String* second) {
for (int i = 0; i < second_length; i++) *dest++ = src[i];
return result;
} else {
// For short external two-byte strings we check whether they can
// be represented using ascii.
if (!first_is_ascii) {
first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
}
if (first_is_ascii && !second_is_ascii) {
second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
}
if (first_is_ascii && second_is_ascii) {
Object* result = AllocateRawAsciiString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
char* dest = SeqAsciiString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
Counters::string_add_runtime_ext_to_ascii.Increment();
return result;
}
Object* result = AllocateRawTwoByteString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.

View File

@ -255,6 +255,16 @@ bool String::IsTwoByteRepresentation() {
}
bool String::IsExternalTwoByteStringWithAsciiChars() {
if (!IsExternalTwoByteString()) return false;
const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
for (int i = 0, len = length(); i < len; i++) {
if (data[i] > kMaxAsciiCharCode) return false;
}
return true;
}
bool StringShape::IsCons() {
return (type_ & kStringRepresentationMask) == kConsStringTag;
}

View File

@ -4660,13 +4660,38 @@ bool String::IsEqualTo(Vector<const char> str) {
}
template <typename schar>
static inline uint32_t HashSequentialString(const schar* chars, int length) {
StringHasher hasher(length);
if (!hasher.has_trivial_hash()) {
int i;
for (i = 0; hasher.is_array_index() && (i < length); i++) {
hasher.AddCharacter(chars[i]);
}
for (; i < length; i++) {
hasher.AddCharacterNoIndex(chars[i]);
}
}
return hasher.GetHashField();
}
uint32_t String::ComputeAndSetHash() {
// Should only be called if hash code has not yet been computed.
ASSERT(!(hash_field() & kHashComputedMask));
const int len = length();
// Compute the hash code.
StringInputBuffer buffer(this);
uint32_t field = ComputeHashField(&buffer, length());
uint32_t field = 0;
if (StringShape(this).IsSequentialAscii()) {
field = HashSequentialString(SeqAsciiString::cast(this)->GetChars(), len);
} else if (StringShape(this).IsSequentialTwoByte()) {
field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(), len);
} else {
StringInputBuffer buffer(this);
field = ComputeHashField(&buffer, len);
}
// Store the hash code in the object.
set_hash_field(field);

View File

@ -3926,6 +3926,13 @@ class String: public HeapObject {
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
// Check whether this string is an external two-byte string that in
// fact contains only ascii characters.
//
// Such strings may appear when the embedder prefers two-byte
// representations even for ascii data.
inline bool IsExternalTwoByteStringWithAsciiChars();
// Get and set individual two byte chars in the string.
inline void Set(int index, uint16_t value);
// Get individual two byte char in the string. Repeated calls

View File

@ -166,6 +166,7 @@ namespace internal {
SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs) \
SC(string_add_runtime, V8.StringAddRuntime) \
SC(string_add_native, V8.StringAddNative) \
SC(string_add_runtime_ext_to_ascii, V8.StringAddRuntimeExtToAscii) \
SC(sub_string_runtime, V8.SubStringRuntime) \
SC(sub_string_native, V8.SubStringNative) \
SC(string_compare_native, V8.StringCompareNative) \