Track ascii-ness of data in externalized strings.
If a two-byte string only contains ascii characters, then we can save memory when flattening a cons string containing it. Similarly we can use this in Array.prototype.join implementation. To track this a new bit is added to instance type. This bit is used as a hint in generated code and in runtime functions. To enable testing a new V8 extension is added controlled by --expose-externalize-string flag. Review URL: http://codereview.chromium.org/2762008 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4894 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
26e692af2f
commit
3cafa65462
@ -10559,13 +10559,14 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ ldrb(r4, FieldMemOperand(r4, Map::kInstanceTypeOffset));
|
||||
__ ldrb(r5, FieldMemOperand(r5, Map::kInstanceTypeOffset));
|
||||
}
|
||||
Label non_ascii, allocated;
|
||||
Label non_ascii, allocated, ascii_data;
|
||||
ASSERT_EQ(0, kTwoByteStringTag);
|
||||
__ tst(r4, Operand(kStringEncodingMask));
|
||||
__ tst(r5, Operand(kStringEncodingMask), ne);
|
||||
__ b(eq, &non_ascii);
|
||||
|
||||
// Allocate an ASCII cons string.
|
||||
__ bind(&ascii_data);
|
||||
__ AllocateAsciiConsString(r7, r6, r4, r5, &string_add_runtime);
|
||||
__ bind(&allocated);
|
||||
// Fill the fields of the cons string.
|
||||
@ -10577,6 +10578,19 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ Ret();
|
||||
|
||||
__ bind(&non_ascii);
|
||||
// At least one of the strings is two-byte. Check whether it happens
|
||||
// to contain only ascii characters.
|
||||
// r4: first instance type.
|
||||
// r5: second instance type.
|
||||
__ tst(r4, Operand(kAsciiDataHintMask));
|
||||
__ tst(r5, Operand(kAsciiDataHintMask), ne);
|
||||
__ b(ne, &ascii_data);
|
||||
__ eor(r4, r4, Operand(r5));
|
||||
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
|
||||
__ and_(r4, r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
|
||||
__ cmp(r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
|
||||
__ b(eq, &ascii_data);
|
||||
|
||||
// Allocate a two byte cons string.
|
||||
__ AllocateTwoByteConsString(r7, r6, r4, r5, &string_add_runtime);
|
||||
__ jmp(&allocated);
|
||||
|
@ -1462,6 +1462,7 @@ bool Genesis::InstallExtensions(Handle<Context> global_context,
|
||||
}
|
||||
|
||||
if (FLAG_expose_gc) InstallExtension("v8/gc");
|
||||
if (FLAG_expose_externalize_string) InstallExtension("v8/externalize");
|
||||
|
||||
if (extensions == NULL) return true;
|
||||
// Install required extensions
|
||||
|
114
src/execution.cc
114
src/execution.cc
@ -679,7 +679,7 @@ Object* Execution::HandleStackGuardInterrupt() {
|
||||
|
||||
// --- G C E x t e n s i o n ---
|
||||
|
||||
const char* GCExtension::kSource = "native function gc();";
|
||||
const char* const GCExtension::kSource = "native function gc();";
|
||||
|
||||
|
||||
v8::Handle<v8::FunctionTemplate> GCExtension::GetNativeFunction(
|
||||
@ -695,7 +695,115 @@ v8::Handle<v8::Value> GCExtension::GC(const v8::Arguments& args) {
|
||||
}
|
||||
|
||||
|
||||
static GCExtension kGCExtension;
|
||||
v8::DeclareExtension kGCExtensionDeclaration(&kGCExtension);
|
||||
static GCExtension gc_extension;
|
||||
static v8::DeclareExtension gc_extension_declaration(&gc_extension);
|
||||
|
||||
|
||||
// --- E x t e r n a l i z e S t r i n g E x t e n s i o n ---
|
||||
|
||||
|
||||
template <typename Char, typename Base>
|
||||
class SimpleStringResource : public Base {
|
||||
public:
|
||||
// Takes ownership of |data|.
|
||||
SimpleStringResource(Char* data, size_t length)
|
||||
: data_(data),
|
||||
length_(length) {}
|
||||
|
||||
virtual ~SimpleStringResource() { delete data_; }
|
||||
|
||||
virtual const Char* data() const { return data_; }
|
||||
|
||||
virtual size_t length() const { return length_; }
|
||||
|
||||
private:
|
||||
Char* const data_;
|
||||
const size_t length_;
|
||||
};
|
||||
|
||||
|
||||
typedef SimpleStringResource<char, v8::String::ExternalAsciiStringResource>
|
||||
SimpleAsciiStringResource;
|
||||
typedef SimpleStringResource<uc16, v8::String::ExternalStringResource>
|
||||
SimpleTwoByteStringResource;
|
||||
|
||||
|
||||
const char* const ExternalizeStringExtension::kSource =
|
||||
"native function externalizeString();"
|
||||
"native function isAsciiString();";
|
||||
|
||||
|
||||
v8::Handle<v8::FunctionTemplate> ExternalizeStringExtension::GetNativeFunction(
|
||||
v8::Handle<v8::String> str) {
|
||||
if (strcmp(*v8::String::AsciiValue(str), "externalizeString") == 0) {
|
||||
return v8::FunctionTemplate::New(ExternalizeStringExtension::Externalize);
|
||||
} else {
|
||||
ASSERT(strcmp(*v8::String::AsciiValue(str), "isAsciiString") == 0);
|
||||
return v8::FunctionTemplate::New(ExternalizeStringExtension::IsAscii);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
v8::Handle<v8::Value> ExternalizeStringExtension::Externalize(
|
||||
const v8::Arguments& args) {
|
||||
if (args.Length() < 1 || !args[0]->IsString()) {
|
||||
return v8::ThrowException(v8::String::New(
|
||||
"First parameter to externalizeString() must be a string."));
|
||||
}
|
||||
bool force_two_byte = false;
|
||||
if (args.Length() >= 2) {
|
||||
if (args[1]->IsBoolean()) {
|
||||
force_two_byte = args[1]->BooleanValue();
|
||||
} else {
|
||||
return v8::ThrowException(v8::String::New(
|
||||
"Second parameter to externalizeString() must be a boolean."));
|
||||
}
|
||||
}
|
||||
bool result = false;
|
||||
Handle<String> string = Utils::OpenHandle(*args[0].As<v8::String>());
|
||||
if (string->IsExternalString()) {
|
||||
return v8::ThrowException(v8::String::New(
|
||||
"externalizeString() can't externalize twice."));
|
||||
}
|
||||
if (string->IsAsciiRepresentation() && !force_two_byte) {
|
||||
char* data = new char[string->length()];
|
||||
String::WriteToFlat(*string, data, 0, string->length());
|
||||
SimpleAsciiStringResource* resource = new SimpleAsciiStringResource(
|
||||
data, string->length());
|
||||
result = string->MakeExternal(resource);
|
||||
if (result && !string->IsSymbol()) {
|
||||
i::ExternalStringTable::AddString(*string);
|
||||
}
|
||||
} else {
|
||||
uc16* data = new uc16[string->length()];
|
||||
String::WriteToFlat(*string, data, 0, string->length());
|
||||
SimpleTwoByteStringResource* resource = new SimpleTwoByteStringResource(
|
||||
data, string->length());
|
||||
result = string->MakeExternal(resource);
|
||||
if (result && !string->IsSymbol()) {
|
||||
i::ExternalStringTable::AddString(*string);
|
||||
}
|
||||
}
|
||||
if (!result) {
|
||||
return v8::ThrowException(v8::String::New("externalizeString() failed."));
|
||||
}
|
||||
return v8::Undefined();
|
||||
}
|
||||
|
||||
|
||||
v8::Handle<v8::Value> ExternalizeStringExtension::IsAscii(
|
||||
const v8::Arguments& args) {
|
||||
if (args.Length() != 1 || !args[0]->IsString()) {
|
||||
return v8::ThrowException(v8::String::New(
|
||||
"isAsciiString() requires a single string argument."));
|
||||
}
|
||||
return Utils::OpenHandle(*args[0].As<v8::String>())->IsAsciiRepresentation() ?
|
||||
v8::True() : v8::False();
|
||||
}
|
||||
|
||||
|
||||
static ExternalizeStringExtension externalize_extension;
|
||||
static v8::DeclareExtension externalize_extension_declaration(
|
||||
&externalize_extension);
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
@ -316,10 +316,21 @@ class GCExtension : public v8::Extension {
|
||||
v8::Handle<v8::String> name);
|
||||
static v8::Handle<v8::Value> GC(const v8::Arguments& args);
|
||||
private:
|
||||
static const char* kSource;
|
||||
static const char* const kSource;
|
||||
};
|
||||
|
||||
|
||||
class ExternalizeStringExtension : public v8::Extension {
|
||||
public:
|
||||
ExternalizeStringExtension() : v8::Extension("v8/externalize", kSource) {}
|
||||
virtual v8::Handle<v8::FunctionTemplate> GetNativeFunction(
|
||||
v8::Handle<v8::String> name);
|
||||
static v8::Handle<v8::Value> Externalize(const v8::Arguments& args);
|
||||
static v8::Handle<v8::Value> IsAscii(const v8::Arguments& args);
|
||||
private:
|
||||
static const char* const kSource;
|
||||
};
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_EXECUTION_H_
|
||||
|
@ -123,6 +123,8 @@ DEFINE_bool(enable_armv7, true,
|
||||
DEFINE_string(expose_natives_as, NULL, "expose natives in global object")
|
||||
DEFINE_string(expose_debug_as, NULL, "expose debug in global object")
|
||||
DEFINE_bool(expose_gc, false, "expose gc extension")
|
||||
DEFINE_bool(expose_externalize_string, false,
|
||||
"expose externalize string extension")
|
||||
DEFINE_int(stack_trace_limit, 10, "number of stack frames to capture")
|
||||
DEFINE_bool(disable_native_files, false, "disable builtin natives files")
|
||||
|
||||
|
47
src/heap.cc
47
src/heap.cc
@ -1929,6 +1929,18 @@ Object* Heap::AllocateConsString(String* first, String* second) {
|
||||
return Failure::OutOfMemoryException();
|
||||
}
|
||||
|
||||
bool is_ascii_data_in_two_byte_string = false;
|
||||
if (!is_ascii) {
|
||||
// At least one of the strings uses two-byte representation so we
|
||||
// can't use the fast case code for short ascii strings below, but
|
||||
// we can try to save memory if all chars actually fit in ascii.
|
||||
is_ascii_data_in_two_byte_string =
|
||||
first->HasOnlyAsciiChars() && second->HasOnlyAsciiChars();
|
||||
if (is_ascii_data_in_two_byte_string) {
|
||||
Counters::string_add_runtime_ext_to_ascii.Increment();
|
||||
}
|
||||
}
|
||||
|
||||
// If the resulting string is small make a flat string.
|
||||
if (length < String::kMinNonFlatLength) {
|
||||
ASSERT(first->IsFlat());
|
||||
@ -1955,22 +1967,13 @@ Object* Heap::AllocateConsString(String* first, String* second) {
|
||||
for (int i = 0; i < second_length; i++) *dest++ = src[i];
|
||||
return result;
|
||||
} else {
|
||||
// For short external two-byte strings we check whether they can
|
||||
// be represented using ascii.
|
||||
if (!first_is_ascii) {
|
||||
first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
|
||||
}
|
||||
if (first_is_ascii && !second_is_ascii) {
|
||||
second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
|
||||
}
|
||||
if (first_is_ascii && second_is_ascii) {
|
||||
if (is_ascii_data_in_two_byte_string) {
|
||||
Object* result = AllocateRawAsciiString(length);
|
||||
if (result->IsFailure()) return result;
|
||||
// Copy the characters into the new object.
|
||||
char* dest = SeqAsciiString::cast(result)->GetChars();
|
||||
String::WriteToFlat(first, dest, 0, first_length);
|
||||
String::WriteToFlat(second, dest + first_length, 0, second_length);
|
||||
Counters::string_add_runtime_ext_to_ascii.Increment();
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1984,7 +1987,8 @@ Object* Heap::AllocateConsString(String* first, String* second) {
|
||||
}
|
||||
}
|
||||
|
||||
Map* map = is_ascii ? cons_ascii_string_map() : cons_string_map();
|
||||
Map* map = (is_ascii || is_ascii_data_in_two_byte_string) ?
|
||||
cons_ascii_string_map() : cons_string_map();
|
||||
|
||||
Object* result = Allocate(map, NEW_SPACE);
|
||||
if (result->IsFailure()) return result;
|
||||
@ -2070,7 +2074,23 @@ Object* Heap::AllocateExternalStringFromTwoByte(
|
||||
return Failure::OutOfMemoryException();
|
||||
}
|
||||
|
||||
Map* map = Heap::external_string_map();
|
||||
// For small strings we check whether the resource contains only
|
||||
// ascii characters. If yes, we use a different string map.
|
||||
bool is_ascii = true;
|
||||
if (length >= static_cast<size_t>(String::kMinNonFlatLength)) {
|
||||
is_ascii = false;
|
||||
} else {
|
||||
const uc16* data = resource->data();
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (data[i] > String::kMaxAsciiCharCode) {
|
||||
is_ascii = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map* map = is_ascii ?
|
||||
Heap::external_string_with_ascii_data_map() : Heap::external_string_map();
|
||||
Object* result = Allocate(map, NEW_SPACE);
|
||||
if (result->IsFailure()) return result;
|
||||
|
||||
@ -2853,6 +2873,9 @@ Map* Heap::SymbolMapForString(String* string) {
|
||||
if (map == cons_ascii_string_map()) return cons_ascii_symbol_map();
|
||||
if (map == external_string_map()) return external_symbol_map();
|
||||
if (map == external_ascii_string_map()) return external_ascii_symbol_map();
|
||||
if (map == external_string_with_ascii_data_map()) {
|
||||
return external_symbol_with_ascii_data_map();
|
||||
}
|
||||
|
||||
// No match found.
|
||||
return NULL;
|
||||
|
@ -69,10 +69,12 @@ class ZoneScopeInfo;
|
||||
V(Map, cons_symbol_map, ConsSymbolMap) \
|
||||
V(Map, cons_ascii_symbol_map, ConsAsciiSymbolMap) \
|
||||
V(Map, external_symbol_map, ExternalSymbolMap) \
|
||||
V(Map, external_symbol_with_ascii_data_map, ExternalSymbolWithAsciiDataMap) \
|
||||
V(Map, external_ascii_symbol_map, ExternalAsciiSymbolMap) \
|
||||
V(Map, cons_string_map, ConsStringMap) \
|
||||
V(Map, cons_ascii_string_map, ConsAsciiStringMap) \
|
||||
V(Map, external_string_map, ExternalStringMap) \
|
||||
V(Map, external_string_with_ascii_data_map, ExternalStringWithAsciiDataMap) \
|
||||
V(Map, external_ascii_string_map, ExternalAsciiStringMap) \
|
||||
V(Map, undetectable_string_map, UndetectableStringMap) \
|
||||
V(Map, undetectable_ascii_string_map, UndetectableAsciiStringMap) \
|
||||
|
@ -12852,7 +12852,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
// If result is not supposed to be flat allocate a cons string object. If both
|
||||
// strings are ascii the result is an ascii cons string.
|
||||
Label non_ascii, allocated;
|
||||
Label non_ascii, allocated, ascii_data;
|
||||
__ mov(edi, FieldOperand(eax, HeapObject::kMapOffset));
|
||||
__ movzx_b(ecx, FieldOperand(edi, Map::kInstanceTypeOffset));
|
||||
__ mov(edi, FieldOperand(edx, HeapObject::kMapOffset));
|
||||
@ -12861,6 +12861,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
ASSERT(kStringEncodingMask == kAsciiStringTag);
|
||||
__ test(ecx, Immediate(kAsciiStringTag));
|
||||
__ j(zero, &non_ascii);
|
||||
__ bind(&ascii_data);
|
||||
// Allocate an acsii cons string.
|
||||
__ AllocateAsciiConsString(ecx, edi, no_reg, &string_add_runtime);
|
||||
__ bind(&allocated);
|
||||
@ -12875,6 +12876,19 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ IncrementCounter(&Counters::string_add_native, 1);
|
||||
__ ret(2 * kPointerSize);
|
||||
__ bind(&non_ascii);
|
||||
// At least one of the strings is two-byte. Check whether it happens
|
||||
// to contain only ascii characters.
|
||||
// ecx: first instance type AND second instance type.
|
||||
// edi: second instance type.
|
||||
__ test(ecx, Immediate(kAsciiDataHintMask));
|
||||
__ j(not_zero, &ascii_data);
|
||||
__ mov(ecx, FieldOperand(eax, HeapObject::kMapOffset));
|
||||
__ movzx_b(ecx, FieldOperand(ecx, Map::kInstanceTypeOffset));
|
||||
__ xor_(edi, Operand(ecx));
|
||||
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
|
||||
__ and_(edi, kAsciiStringTag | kAsciiDataHintTag);
|
||||
__ cmp(edi, kAsciiStringTag | kAsciiDataHintTag);
|
||||
__ j(equal, &ascii_data);
|
||||
// Allocate a two byte cons string.
|
||||
__ AllocateConsString(ecx, edi, no_reg, &string_add_runtime);
|
||||
__ jmp(&allocated);
|
||||
|
@ -552,12 +552,14 @@ static const char* TypeToString(InstanceType type) {
|
||||
case CONS_SYMBOL_TYPE: return "CONS_SYMBOL";
|
||||
case CONS_ASCII_SYMBOL_TYPE: return "CONS_ASCII_SYMBOL";
|
||||
case EXTERNAL_ASCII_SYMBOL_TYPE:
|
||||
case EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE:
|
||||
case EXTERNAL_SYMBOL_TYPE: return "EXTERNAL_SYMBOL";
|
||||
case ASCII_STRING_TYPE: return "ASCII_STRING";
|
||||
case STRING_TYPE: return "TWO_BYTE_STRING";
|
||||
case CONS_STRING_TYPE:
|
||||
case CONS_ASCII_STRING_TYPE: return "CONS_STRING";
|
||||
case EXTERNAL_ASCII_STRING_TYPE:
|
||||
case EXTERNAL_STRING_WITH_ASCII_DATA_TYPE:
|
||||
case EXTERNAL_STRING_TYPE: return "EXTERNAL_STRING";
|
||||
case FIXED_ARRAY_TYPE: return "FIXED_ARRAY";
|
||||
case BYTE_ARRAY_TYPE: return "BYTE_ARRAY";
|
||||
|
@ -237,31 +237,20 @@ bool StringShape::IsSymbol() {
|
||||
|
||||
bool String::IsAsciiRepresentation() {
|
||||
uint32_t type = map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) == kConsStringTag &&
|
||||
ConsString::cast(this)->second()->length() == 0) {
|
||||
return ConsString::cast(this)->first()->IsAsciiRepresentation();
|
||||
}
|
||||
return (type & kStringEncodingMask) == kAsciiStringTag;
|
||||
}
|
||||
|
||||
|
||||
bool String::IsTwoByteRepresentation() {
|
||||
uint32_t type = map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) == kConsStringTag &&
|
||||
ConsString::cast(this)->second()->length() == 0) {
|
||||
return ConsString::cast(this)->first()->IsTwoByteRepresentation();
|
||||
}
|
||||
return (type & kStringEncodingMask) == kTwoByteStringTag;
|
||||
}
|
||||
|
||||
|
||||
bool String::IsExternalTwoByteStringWithAsciiChars() {
|
||||
if (!IsExternalTwoByteString()) return false;
|
||||
const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
|
||||
for (int i = 0, len = length(); i < len; i++) {
|
||||
if (data[i] > kMaxAsciiCharCode) return false;
|
||||
}
|
||||
return true;
|
||||
bool String::HasOnlyAsciiChars() {
|
||||
uint32_t type = map()->instance_type();
|
||||
return (type & kStringEncodingMask) == kAsciiStringTag ||
|
||||
(type & kAsciiDataHintMask) == kAsciiDataHintTag;
|
||||
}
|
||||
|
||||
|
||||
|
@ -678,6 +678,9 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) {
|
||||
|
||||
|
||||
bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
|
||||
// Externalizing twice leaks the external resouce, so it's
|
||||
// prohibited by the API.
|
||||
ASSERT(!this->IsExternalString());
|
||||
#ifdef DEBUG
|
||||
if (FLAG_enable_slow_asserts) {
|
||||
// Assert that the resource and the string are equivalent.
|
||||
@ -697,13 +700,16 @@ bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
|
||||
return false;
|
||||
}
|
||||
ASSERT(size >= ExternalString::kSize);
|
||||
bool is_ascii = this->IsAsciiRepresentation();
|
||||
bool is_symbol = this->IsSymbol();
|
||||
int length = this->length();
|
||||
int hash_field = this->hash_field();
|
||||
|
||||
// Morph the object to an external string by adjusting the map and
|
||||
// reinitializing the fields.
|
||||
this->set_map(Heap::external_string_map());
|
||||
this->set_map(is_ascii ?
|
||||
Heap::external_string_with_ascii_data_map() :
|
||||
Heap::external_string_map());
|
||||
ExternalTwoByteString* self = ExternalTwoByteString::cast(this);
|
||||
self->set_length(length);
|
||||
self->set_hash_field(hash_field);
|
||||
@ -713,7 +719,9 @@ bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
|
||||
if (is_symbol) {
|
||||
self->Hash(); // Force regeneration of the hash value.
|
||||
// Now morph this external string into a external symbol.
|
||||
this->set_map(Heap::external_symbol_map());
|
||||
this->set_map(is_ascii ?
|
||||
Heap::external_symbol_with_ascii_data_map() :
|
||||
Heap::external_symbol_map());
|
||||
}
|
||||
|
||||
// Fill the remainder of the string with dead wood.
|
||||
|
@ -320,6 +320,10 @@ enum PropertyNormalizationMode {
|
||||
ExternalTwoByteString::kSize, \
|
||||
external_symbol, \
|
||||
ExternalSymbol) \
|
||||
V(EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE, \
|
||||
ExternalTwoByteString::kSize, \
|
||||
external_symbol_with_ascii_data, \
|
||||
ExternalSymbolWithAsciiData) \
|
||||
V(EXTERNAL_ASCII_SYMBOL_TYPE, \
|
||||
ExternalAsciiString::kSize, \
|
||||
external_ascii_symbol, \
|
||||
@ -344,6 +348,10 @@ enum PropertyNormalizationMode {
|
||||
ExternalTwoByteString::kSize, \
|
||||
external_string, \
|
||||
ExternalString) \
|
||||
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE, \
|
||||
ExternalTwoByteString::kSize, \
|
||||
external_string_with_ascii_data, \
|
||||
ExternalStringWithAsciiData) \
|
||||
V(EXTERNAL_ASCII_STRING_TYPE, \
|
||||
ExternalAsciiString::kSize, \
|
||||
external_ascii_string, \
|
||||
@ -412,6 +420,11 @@ enum StringRepresentationTag {
|
||||
};
|
||||
const uint32_t kIsConsStringMask = 0x1;
|
||||
|
||||
// If bit 7 is clear, then bit 3 indicates whether this two-byte
|
||||
// string actually contains ascii data.
|
||||
const uint32_t kAsciiDataHintMask = 0x08;
|
||||
const uint32_t kAsciiDataHintTag = 0x08;
|
||||
|
||||
|
||||
// A ConsString with an empty string as the right side is a candidate
|
||||
// for being shortcut by the garbage collector unless it is a
|
||||
@ -427,18 +440,22 @@ const uint32_t kShortcutTypeTag = kConsStringTag;
|
||||
|
||||
enum InstanceType {
|
||||
// String types.
|
||||
SYMBOL_TYPE = kSymbolTag | kSeqStringTag,
|
||||
SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kSeqStringTag,
|
||||
ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kSeqStringTag,
|
||||
CONS_SYMBOL_TYPE = kSymbolTag | kConsStringTag,
|
||||
CONS_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kConsStringTag,
|
||||
CONS_ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kConsStringTag,
|
||||
EXTERNAL_SYMBOL_TYPE = kSymbolTag | kExternalStringTag,
|
||||
EXTERNAL_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kExternalStringTag,
|
||||
EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE =
|
||||
kTwoByteStringTag | kSymbolTag | kExternalStringTag | kAsciiDataHintTag,
|
||||
EXTERNAL_ASCII_SYMBOL_TYPE =
|
||||
kAsciiStringTag | kSymbolTag | kExternalStringTag,
|
||||
STRING_TYPE = kSeqStringTag,
|
||||
STRING_TYPE = kTwoByteStringTag | kSeqStringTag,
|
||||
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
|
||||
CONS_STRING_TYPE = kConsStringTag,
|
||||
CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
|
||||
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
|
||||
EXTERNAL_STRING_TYPE = kExternalStringTag,
|
||||
EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
|
||||
EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
|
||||
kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
|
||||
EXTERNAL_ASCII_STRING_TYPE = kAsciiStringTag | kExternalStringTag,
|
||||
PRIVATE_EXTERNAL_ASCII_STRING_TYPE = EXTERNAL_ASCII_STRING_TYPE,
|
||||
|
||||
@ -4069,12 +4086,14 @@ class String: public HeapObject {
|
||||
inline bool IsAsciiRepresentation();
|
||||
inline bool IsTwoByteRepresentation();
|
||||
|
||||
// Check whether this string is an external two-byte string that in
|
||||
// fact contains only ascii characters.
|
||||
// Returns whether this string has ascii chars, i.e. all of them can
|
||||
// be ascii encoded. This might be the case even if the string is
|
||||
// two-byte. Such strings may appear when the embedder prefers
|
||||
// two-byte external representations even for ascii data.
|
||||
//
|
||||
// Such strings may appear when the embedder prefers two-byte
|
||||
// representations even for ascii data.
|
||||
inline bool IsExternalTwoByteStringWithAsciiChars();
|
||||
// NOTE: this should be considered only a hint. False negatives are
|
||||
// possible.
|
||||
inline bool HasOnlyAsciiChars();
|
||||
|
||||
// Get and set individual two byte chars in the string.
|
||||
inline void Set(int index, uint16_t value);
|
||||
|
@ -4946,16 +4946,6 @@ static Object* ConvertCaseHelper(String* s,
|
||||
}
|
||||
|
||||
|
||||
static inline SeqAsciiString* TryGetSeqAsciiString(String* s) {
|
||||
if (!s->IsFlat() || !s->IsAsciiRepresentation()) return NULL;
|
||||
if (s->IsConsString()) {
|
||||
ASSERT(ConsString::cast(s)->second()->length() == 0);
|
||||
return SeqAsciiString::cast(ConsString::cast(s)->first());
|
||||
}
|
||||
return SeqAsciiString::cast(s);
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
struct ToLowerTraits {
|
||||
@ -5002,7 +4992,7 @@ static Object* ConvertCase(
|
||||
unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) {
|
||||
NoHandleAllocation ha;
|
||||
CONVERT_CHECKED(String, s, args[0]);
|
||||
s->TryFlatten();
|
||||
s = s->TryFlattenGetString();
|
||||
|
||||
const int length = s->length();
|
||||
// Assume that the string is not empty; we need this assumption later
|
||||
@ -5014,13 +5004,12 @@ static Object* ConvertCase(
|
||||
// character is also ascii. This is currently the case, but it
|
||||
// might break in the future if we implement more context and locale
|
||||
// dependent upper/lower conversions.
|
||||
SeqAsciiString* seq_ascii = TryGetSeqAsciiString(s);
|
||||
if (seq_ascii != NULL) {
|
||||
if (s->IsSeqAsciiString()) {
|
||||
Object* o = Heap::AllocateRawAsciiString(length);
|
||||
if (o->IsFailure()) return o;
|
||||
SeqAsciiString* result = SeqAsciiString::cast(o);
|
||||
bool has_changed_character = ConvertTraits::ConvertAscii(
|
||||
result->GetChars(), seq_ascii->GetChars(), length);
|
||||
result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length);
|
||||
return has_changed_character ? result : s;
|
||||
}
|
||||
|
||||
@ -5564,7 +5553,7 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
|
||||
if (first->IsString()) return first;
|
||||
}
|
||||
|
||||
bool ascii = special->IsAsciiRepresentation();
|
||||
bool ascii = special->HasOnlyAsciiChars();
|
||||
int position = 0;
|
||||
for (int i = 0; i < array_length; i++) {
|
||||
int increment = 0;
|
||||
@ -5605,7 +5594,7 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
|
||||
String* element = String::cast(elt);
|
||||
int element_length = element->length();
|
||||
increment = element_length;
|
||||
if (ascii && !element->IsAsciiRepresentation()) {
|
||||
if (ascii && !element->HasOnlyAsciiChars()) {
|
||||
ascii = false;
|
||||
}
|
||||
} else {
|
||||
|
@ -11205,16 +11205,17 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
// If result is not supposed to be flat, allocate a cons string object. If
|
||||
// both strings are ascii the result is an ascii cons string.
|
||||
// rax: first string
|
||||
// ebx: length of resulting flat string
|
||||
// rbx: length of resulting flat string
|
||||
// rdx: second string
|
||||
// r8: instance type of first string
|
||||
// r9: instance type of second string
|
||||
Label non_ascii, allocated;
|
||||
Label non_ascii, allocated, ascii_data;
|
||||
__ movl(rcx, r8);
|
||||
__ and_(rcx, r9);
|
||||
ASSERT(kStringEncodingMask == kAsciiStringTag);
|
||||
__ testl(rcx, Immediate(kAsciiStringTag));
|
||||
__ j(zero, &non_ascii);
|
||||
__ bind(&ascii_data);
|
||||
// Allocate an acsii cons string.
|
||||
__ AllocateAsciiConsString(rcx, rdi, no_reg, &string_add_runtime);
|
||||
__ bind(&allocated);
|
||||
@ -11228,6 +11229,18 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ IncrementCounter(&Counters::string_add_native, 1);
|
||||
__ ret(2 * kPointerSize);
|
||||
__ bind(&non_ascii);
|
||||
// At least one of the strings is two-byte. Check whether it happens
|
||||
// to contain only ascii characters.
|
||||
// rcx: first instance type AND second instance type.
|
||||
// r8: first instance type.
|
||||
// r9: second instance type.
|
||||
__ testb(rcx, Immediate(kAsciiDataHintMask));
|
||||
__ j(not_zero, &ascii_data);
|
||||
__ xor_(r8, r9);
|
||||
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
|
||||
__ andb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
|
||||
__ cmpb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
|
||||
__ j(equal, &ascii_data);
|
||||
// Allocate a two byte cons string.
|
||||
__ AllocateConsString(rcx, rdi, no_reg, &string_add_runtime);
|
||||
__ jmp(&allocated);
|
||||
@ -11235,7 +11248,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
// Handle creating a flat result. First check that both strings are not
|
||||
// external strings.
|
||||
// rax: first string
|
||||
// ebx: length of resulting flat string as smi
|
||||
// rbx: length of resulting flat string as smi
|
||||
// rdx: second string
|
||||
// r8: instance type of first string
|
||||
// r9: instance type of first string
|
||||
@ -11251,7 +11264,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ j(equal, &string_add_runtime);
|
||||
// Now check if both strings are ascii strings.
|
||||
// rax: first string
|
||||
// ebx: length of resulting flat string
|
||||
// rbx: length of resulting flat string
|
||||
// rdx: second string
|
||||
// r8: instance type of first string
|
||||
// r9: instance type of second string
|
||||
|
95
test/mjsunit/string-externalize.js
Normal file
95
test/mjsunit/string-externalize.js
Normal file
@ -0,0 +1,95 @@
|
||||
// Copyright 2010 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Flags: --expose-externalize-string
|
||||
|
||||
var size = 1024;
|
||||
|
||||
function test() {
|
||||
var str = "";
|
||||
|
||||
// Build an ascii cons string.
|
||||
for (var i = 0; i < size; i++) {
|
||||
str += String.fromCharCode(i & 0x7f);
|
||||
}
|
||||
assertTrue(isAsciiString(str));
|
||||
|
||||
var twoByteExternalWithAsciiData =
|
||||
"AA" + (function() { return "A"; })();
|
||||
externalizeString(twoByteExternalWithAsciiData, true /* force two-byte */);
|
||||
assertFalse(isAsciiString(twoByteExternalWithAsciiData));
|
||||
|
||||
var realTwoByteExternalString =
|
||||
"\u1234\u1234" + (function() { return "\u1234"; })();
|
||||
externalizeString(realTwoByteExternalString);
|
||||
assertFalse(isAsciiString(realTwoByteExternalString));
|
||||
|
||||
assertTrue(isAsciiString(["a", twoByteExternalWithAsciiData].join("")));
|
||||
|
||||
// Appending a two-byte string that contains only ascii chars should
|
||||
// still produce an ascii cons.
|
||||
var str1 = str + twoByteExternalWithAsciiData;
|
||||
assertTrue(isAsciiString(str1));
|
||||
|
||||
// Force flattening of the string.
|
||||
var old_length = str1.length - twoByteExternalWithAsciiData.length;
|
||||
for (var i = 0; i < old_length; i++) {
|
||||
assertEquals(String.fromCharCode(i & 0x7f), str1[i]);
|
||||
}
|
||||
for (var i = old_length; i < str1.length; i++) {
|
||||
assertEquals("A", str1[i]);
|
||||
}
|
||||
|
||||
// Flattened string should still be ascii.
|
||||
assertTrue(isAsciiString(str1));
|
||||
|
||||
// Lower-casing an ascii string should produce ascii.
|
||||
assertTrue(isAsciiString(str1.toLowerCase()));
|
||||
|
||||
assertFalse(isAsciiString(["a", realTwoByteExternalString].join("")));
|
||||
|
||||
// Appending a real two-byte string should produce a two-byte cons.
|
||||
var str2 = str + realTwoByteExternalString;
|
||||
assertFalse(isAsciiString(str2));
|
||||
|
||||
// Force flattening of the string.
|
||||
old_length = str2.length - realTwoByteExternalString.length;
|
||||
for (var i = 0; i < old_length; i++) {
|
||||
assertEquals(String.fromCharCode(i & 0x7f), str2[i]);
|
||||
}
|
||||
for (var i = old_length; i < str.length; i++) {
|
||||
assertEquals("\u1234", str2[i]);
|
||||
}
|
||||
|
||||
// Flattened string should still be two-byte.
|
||||
assertFalse(isAsciiString(str2));
|
||||
}
|
||||
|
||||
// Run the test many times to ensure IC-s don't break things.
|
||||
for (var i = 0; i < 10; i++) {
|
||||
test();
|
||||
}
|
Loading…
Reference in New Issue
Block a user