Track ascii-ness of data in externalized strings.

If a two-byte string only contains ascii characters, then we can save
memory when flattening a cons string containing it. Similarly we can
use this in Array.prototype.join implementation. To track this a new
bit is added to instance type. This bit is used as a hint in generated
code and in runtime functions.

To enable testing a new V8 extension is added controlled by
--expose-externalize-string flag.

Review URL: http://codereview.chromium.org/2762008

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4894 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
vitalyr@chromium.org 2010-06-17 16:19:28 +00:00
parent 26e692af2f
commit 3cafa65462
15 changed files with 356 additions and 66 deletions

View File

@ -10559,13 +10559,14 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ ldrb(r4, FieldMemOperand(r4, Map::kInstanceTypeOffset));
__ ldrb(r5, FieldMemOperand(r5, Map::kInstanceTypeOffset));
}
Label non_ascii, allocated;
Label non_ascii, allocated, ascii_data;
ASSERT_EQ(0, kTwoByteStringTag);
__ tst(r4, Operand(kStringEncodingMask));
__ tst(r5, Operand(kStringEncodingMask), ne);
__ b(eq, &non_ascii);
// Allocate an ASCII cons string.
__ bind(&ascii_data);
__ AllocateAsciiConsString(r7, r6, r4, r5, &string_add_runtime);
__ bind(&allocated);
// Fill the fields of the cons string.
@ -10577,6 +10578,19 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ Ret();
__ bind(&non_ascii);
// At least one of the strings is two-byte. Check whether it happens
// to contain only ascii characters.
// r4: first instance type.
// r5: second instance type.
__ tst(r4, Operand(kAsciiDataHintMask));
__ tst(r5, Operand(kAsciiDataHintMask), ne);
__ b(ne, &ascii_data);
__ eor(r4, r4, Operand(r5));
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
__ and_(r4, r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
__ cmp(r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
__ b(eq, &ascii_data);
// Allocate a two byte cons string.
__ AllocateTwoByteConsString(r7, r6, r4, r5, &string_add_runtime);
__ jmp(&allocated);

View File

@ -1462,6 +1462,7 @@ bool Genesis::InstallExtensions(Handle<Context> global_context,
}
if (FLAG_expose_gc) InstallExtension("v8/gc");
if (FLAG_expose_externalize_string) InstallExtension("v8/externalize");
if (extensions == NULL) return true;
// Install required extensions

View File

@ -679,7 +679,7 @@ Object* Execution::HandleStackGuardInterrupt() {
// --- G C E x t e n s i o n ---
const char* GCExtension::kSource = "native function gc();";
const char* const GCExtension::kSource = "native function gc();";
v8::Handle<v8::FunctionTemplate> GCExtension::GetNativeFunction(
@ -695,7 +695,115 @@ v8::Handle<v8::Value> GCExtension::GC(const v8::Arguments& args) {
}
static GCExtension kGCExtension;
v8::DeclareExtension kGCExtensionDeclaration(&kGCExtension);
static GCExtension gc_extension;
static v8::DeclareExtension gc_extension_declaration(&gc_extension);
// --- E x t e r n a l i z e S t r i n g E x t e n s i o n ---
template <typename Char, typename Base>
class SimpleStringResource : public Base {
public:
// Takes ownership of |data|.
SimpleStringResource(Char* data, size_t length)
: data_(data),
length_(length) {}
virtual ~SimpleStringResource() { delete data_; }
virtual const Char* data() const { return data_; }
virtual size_t length() const { return length_; }
private:
Char* const data_;
const size_t length_;
};
typedef SimpleStringResource<char, v8::String::ExternalAsciiStringResource>
SimpleAsciiStringResource;
typedef SimpleStringResource<uc16, v8::String::ExternalStringResource>
SimpleTwoByteStringResource;
const char* const ExternalizeStringExtension::kSource =
"native function externalizeString();"
"native function isAsciiString();";
v8::Handle<v8::FunctionTemplate> ExternalizeStringExtension::GetNativeFunction(
v8::Handle<v8::String> str) {
if (strcmp(*v8::String::AsciiValue(str), "externalizeString") == 0) {
return v8::FunctionTemplate::New(ExternalizeStringExtension::Externalize);
} else {
ASSERT(strcmp(*v8::String::AsciiValue(str), "isAsciiString") == 0);
return v8::FunctionTemplate::New(ExternalizeStringExtension::IsAscii);
}
}
v8::Handle<v8::Value> ExternalizeStringExtension::Externalize(
const v8::Arguments& args) {
if (args.Length() < 1 || !args[0]->IsString()) {
return v8::ThrowException(v8::String::New(
"First parameter to externalizeString() must be a string."));
}
bool force_two_byte = false;
if (args.Length() >= 2) {
if (args[1]->IsBoolean()) {
force_two_byte = args[1]->BooleanValue();
} else {
return v8::ThrowException(v8::String::New(
"Second parameter to externalizeString() must be a boolean."));
}
}
bool result = false;
Handle<String> string = Utils::OpenHandle(*args[0].As<v8::String>());
if (string->IsExternalString()) {
return v8::ThrowException(v8::String::New(
"externalizeString() can't externalize twice."));
}
if (string->IsAsciiRepresentation() && !force_two_byte) {
char* data = new char[string->length()];
String::WriteToFlat(*string, data, 0, string->length());
SimpleAsciiStringResource* resource = new SimpleAsciiStringResource(
data, string->length());
result = string->MakeExternal(resource);
if (result && !string->IsSymbol()) {
i::ExternalStringTable::AddString(*string);
}
} else {
uc16* data = new uc16[string->length()];
String::WriteToFlat(*string, data, 0, string->length());
SimpleTwoByteStringResource* resource = new SimpleTwoByteStringResource(
data, string->length());
result = string->MakeExternal(resource);
if (result && !string->IsSymbol()) {
i::ExternalStringTable::AddString(*string);
}
}
if (!result) {
return v8::ThrowException(v8::String::New("externalizeString() failed."));
}
return v8::Undefined();
}
v8::Handle<v8::Value> ExternalizeStringExtension::IsAscii(
const v8::Arguments& args) {
if (args.Length() != 1 || !args[0]->IsString()) {
return v8::ThrowException(v8::String::New(
"isAsciiString() requires a single string argument."));
}
return Utils::OpenHandle(*args[0].As<v8::String>())->IsAsciiRepresentation() ?
v8::True() : v8::False();
}
static ExternalizeStringExtension externalize_extension;
static v8::DeclareExtension externalize_extension_declaration(
&externalize_extension);
} } // namespace v8::internal

View File

@ -316,10 +316,21 @@ class GCExtension : public v8::Extension {
v8::Handle<v8::String> name);
static v8::Handle<v8::Value> GC(const v8::Arguments& args);
private:
static const char* kSource;
static const char* const kSource;
};
class ExternalizeStringExtension : public v8::Extension {
public:
ExternalizeStringExtension() : v8::Extension("v8/externalize", kSource) {}
virtual v8::Handle<v8::FunctionTemplate> GetNativeFunction(
v8::Handle<v8::String> name);
static v8::Handle<v8::Value> Externalize(const v8::Arguments& args);
static v8::Handle<v8::Value> IsAscii(const v8::Arguments& args);
private:
static const char* const kSource;
};
} } // namespace v8::internal
#endif // V8_EXECUTION_H_

View File

@ -123,6 +123,8 @@ DEFINE_bool(enable_armv7, true,
DEFINE_string(expose_natives_as, NULL, "expose natives in global object")
DEFINE_string(expose_debug_as, NULL, "expose debug in global object")
DEFINE_bool(expose_gc, false, "expose gc extension")
DEFINE_bool(expose_externalize_string, false,
"expose externalize string extension")
DEFINE_int(stack_trace_limit, 10, "number of stack frames to capture")
DEFINE_bool(disable_native_files, false, "disable builtin natives files")

View File

@ -1929,6 +1929,18 @@ Object* Heap::AllocateConsString(String* first, String* second) {
return Failure::OutOfMemoryException();
}
bool is_ascii_data_in_two_byte_string = false;
if (!is_ascii) {
// At least one of the strings uses two-byte representation so we
// can't use the fast case code for short ascii strings below, but
// we can try to save memory if all chars actually fit in ascii.
is_ascii_data_in_two_byte_string =
first->HasOnlyAsciiChars() && second->HasOnlyAsciiChars();
if (is_ascii_data_in_two_byte_string) {
Counters::string_add_runtime_ext_to_ascii.Increment();
}
}
// If the resulting string is small make a flat string.
if (length < String::kMinNonFlatLength) {
ASSERT(first->IsFlat());
@ -1955,22 +1967,13 @@ Object* Heap::AllocateConsString(String* first, String* second) {
for (int i = 0; i < second_length; i++) *dest++ = src[i];
return result;
} else {
// For short external two-byte strings we check whether they can
// be represented using ascii.
if (!first_is_ascii) {
first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
}
if (first_is_ascii && !second_is_ascii) {
second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
}
if (first_is_ascii && second_is_ascii) {
if (is_ascii_data_in_two_byte_string) {
Object* result = AllocateRawAsciiString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
char* dest = SeqAsciiString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
Counters::string_add_runtime_ext_to_ascii.Increment();
return result;
}
@ -1984,7 +1987,8 @@ Object* Heap::AllocateConsString(String* first, String* second) {
}
}
Map* map = is_ascii ? cons_ascii_string_map() : cons_string_map();
Map* map = (is_ascii || is_ascii_data_in_two_byte_string) ?
cons_ascii_string_map() : cons_string_map();
Object* result = Allocate(map, NEW_SPACE);
if (result->IsFailure()) return result;
@ -2070,7 +2074,23 @@ Object* Heap::AllocateExternalStringFromTwoByte(
return Failure::OutOfMemoryException();
}
Map* map = Heap::external_string_map();
// For small strings we check whether the resource contains only
// ascii characters. If yes, we use a different string map.
bool is_ascii = true;
if (length >= static_cast<size_t>(String::kMinNonFlatLength)) {
is_ascii = false;
} else {
const uc16* data = resource->data();
for (size_t i = 0; i < length; i++) {
if (data[i] > String::kMaxAsciiCharCode) {
is_ascii = false;
break;
}
}
}
Map* map = is_ascii ?
Heap::external_string_with_ascii_data_map() : Heap::external_string_map();
Object* result = Allocate(map, NEW_SPACE);
if (result->IsFailure()) return result;
@ -2853,6 +2873,9 @@ Map* Heap::SymbolMapForString(String* string) {
if (map == cons_ascii_string_map()) return cons_ascii_symbol_map();
if (map == external_string_map()) return external_symbol_map();
if (map == external_ascii_string_map()) return external_ascii_symbol_map();
if (map == external_string_with_ascii_data_map()) {
return external_symbol_with_ascii_data_map();
}
// No match found.
return NULL;

View File

@ -69,10 +69,12 @@ class ZoneScopeInfo;
V(Map, cons_symbol_map, ConsSymbolMap) \
V(Map, cons_ascii_symbol_map, ConsAsciiSymbolMap) \
V(Map, external_symbol_map, ExternalSymbolMap) \
V(Map, external_symbol_with_ascii_data_map, ExternalSymbolWithAsciiDataMap) \
V(Map, external_ascii_symbol_map, ExternalAsciiSymbolMap) \
V(Map, cons_string_map, ConsStringMap) \
V(Map, cons_ascii_string_map, ConsAsciiStringMap) \
V(Map, external_string_map, ExternalStringMap) \
V(Map, external_string_with_ascii_data_map, ExternalStringWithAsciiDataMap) \
V(Map, external_ascii_string_map, ExternalAsciiStringMap) \
V(Map, undetectable_string_map, UndetectableStringMap) \
V(Map, undetectable_ascii_string_map, UndetectableAsciiStringMap) \

View File

@ -12852,7 +12852,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
// If result is not supposed to be flat allocate a cons string object. If both
// strings are ascii the result is an ascii cons string.
Label non_ascii, allocated;
Label non_ascii, allocated, ascii_data;
__ mov(edi, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ecx, FieldOperand(edi, Map::kInstanceTypeOffset));
__ mov(edi, FieldOperand(edx, HeapObject::kMapOffset));
@ -12861,6 +12861,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
ASSERT(kStringEncodingMask == kAsciiStringTag);
__ test(ecx, Immediate(kAsciiStringTag));
__ j(zero, &non_ascii);
__ bind(&ascii_data);
// Allocate an acsii cons string.
__ AllocateAsciiConsString(ecx, edi, no_reg, &string_add_runtime);
__ bind(&allocated);
@ -12875,6 +12876,19 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ IncrementCounter(&Counters::string_add_native, 1);
__ ret(2 * kPointerSize);
__ bind(&non_ascii);
// At least one of the strings is two-byte. Check whether it happens
// to contain only ascii characters.
// ecx: first instance type AND second instance type.
// edi: second instance type.
__ test(ecx, Immediate(kAsciiDataHintMask));
__ j(not_zero, &ascii_data);
__ mov(ecx, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ecx, FieldOperand(ecx, Map::kInstanceTypeOffset));
__ xor_(edi, Operand(ecx));
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
__ and_(edi, kAsciiStringTag | kAsciiDataHintTag);
__ cmp(edi, kAsciiStringTag | kAsciiDataHintTag);
__ j(equal, &ascii_data);
// Allocate a two byte cons string.
__ AllocateConsString(ecx, edi, no_reg, &string_add_runtime);
__ jmp(&allocated);

View File

@ -552,12 +552,14 @@ static const char* TypeToString(InstanceType type) {
case CONS_SYMBOL_TYPE: return "CONS_SYMBOL";
case CONS_ASCII_SYMBOL_TYPE: return "CONS_ASCII_SYMBOL";
case EXTERNAL_ASCII_SYMBOL_TYPE:
case EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE:
case EXTERNAL_SYMBOL_TYPE: return "EXTERNAL_SYMBOL";
case ASCII_STRING_TYPE: return "ASCII_STRING";
case STRING_TYPE: return "TWO_BYTE_STRING";
case CONS_STRING_TYPE:
case CONS_ASCII_STRING_TYPE: return "CONS_STRING";
case EXTERNAL_ASCII_STRING_TYPE:
case EXTERNAL_STRING_WITH_ASCII_DATA_TYPE:
case EXTERNAL_STRING_TYPE: return "EXTERNAL_STRING";
case FIXED_ARRAY_TYPE: return "FIXED_ARRAY";
case BYTE_ARRAY_TYPE: return "BYTE_ARRAY";

View File

@ -237,31 +237,20 @@ bool StringShape::IsSymbol() {
bool String::IsAsciiRepresentation() {
uint32_t type = map()->instance_type();
if ((type & kStringRepresentationMask) == kConsStringTag &&
ConsString::cast(this)->second()->length() == 0) {
return ConsString::cast(this)->first()->IsAsciiRepresentation();
}
return (type & kStringEncodingMask) == kAsciiStringTag;
}
bool String::IsTwoByteRepresentation() {
uint32_t type = map()->instance_type();
if ((type & kStringRepresentationMask) == kConsStringTag &&
ConsString::cast(this)->second()->length() == 0) {
return ConsString::cast(this)->first()->IsTwoByteRepresentation();
}
return (type & kStringEncodingMask) == kTwoByteStringTag;
}
bool String::IsExternalTwoByteStringWithAsciiChars() {
if (!IsExternalTwoByteString()) return false;
const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
for (int i = 0, len = length(); i < len; i++) {
if (data[i] > kMaxAsciiCharCode) return false;
}
return true;
bool String::HasOnlyAsciiChars() {
uint32_t type = map()->instance_type();
return (type & kStringEncodingMask) == kAsciiStringTag ||
(type & kAsciiDataHintMask) == kAsciiDataHintTag;
}

View File

@ -678,6 +678,9 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) {
bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
// Externalizing twice leaks the external resouce, so it's
// prohibited by the API.
ASSERT(!this->IsExternalString());
#ifdef DEBUG
if (FLAG_enable_slow_asserts) {
// Assert that the resource and the string are equivalent.
@ -697,13 +700,16 @@ bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
return false;
}
ASSERT(size >= ExternalString::kSize);
bool is_ascii = this->IsAsciiRepresentation();
bool is_symbol = this->IsSymbol();
int length = this->length();
int hash_field = this->hash_field();
// Morph the object to an external string by adjusting the map and
// reinitializing the fields.
this->set_map(Heap::external_string_map());
this->set_map(is_ascii ?
Heap::external_string_with_ascii_data_map() :
Heap::external_string_map());
ExternalTwoByteString* self = ExternalTwoByteString::cast(this);
self->set_length(length);
self->set_hash_field(hash_field);
@ -713,7 +719,9 @@ bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
if (is_symbol) {
self->Hash(); // Force regeneration of the hash value.
// Now morph this external string into a external symbol.
this->set_map(Heap::external_symbol_map());
this->set_map(is_ascii ?
Heap::external_symbol_with_ascii_data_map() :
Heap::external_symbol_map());
}
// Fill the remainder of the string with dead wood.

View File

@ -320,6 +320,10 @@ enum PropertyNormalizationMode {
ExternalTwoByteString::kSize, \
external_symbol, \
ExternalSymbol) \
V(EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE, \
ExternalTwoByteString::kSize, \
external_symbol_with_ascii_data, \
ExternalSymbolWithAsciiData) \
V(EXTERNAL_ASCII_SYMBOL_TYPE, \
ExternalAsciiString::kSize, \
external_ascii_symbol, \
@ -344,6 +348,10 @@ enum PropertyNormalizationMode {
ExternalTwoByteString::kSize, \
external_string, \
ExternalString) \
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE, \
ExternalTwoByteString::kSize, \
external_string_with_ascii_data, \
ExternalStringWithAsciiData) \
V(EXTERNAL_ASCII_STRING_TYPE, \
ExternalAsciiString::kSize, \
external_ascii_string, \
@ -412,6 +420,11 @@ enum StringRepresentationTag {
};
const uint32_t kIsConsStringMask = 0x1;
// If bit 7 is clear, then bit 3 indicates whether this two-byte
// string actually contains ascii data.
const uint32_t kAsciiDataHintMask = 0x08;
const uint32_t kAsciiDataHintTag = 0x08;
// A ConsString with an empty string as the right side is a candidate
// for being shortcut by the garbage collector unless it is a
@ -427,18 +440,22 @@ const uint32_t kShortcutTypeTag = kConsStringTag;
enum InstanceType {
// String types.
SYMBOL_TYPE = kSymbolTag | kSeqStringTag,
SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kSeqStringTag,
ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kSeqStringTag,
CONS_SYMBOL_TYPE = kSymbolTag | kConsStringTag,
CONS_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kConsStringTag,
CONS_ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kConsStringTag,
EXTERNAL_SYMBOL_TYPE = kSymbolTag | kExternalStringTag,
EXTERNAL_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kExternalStringTag,
EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE =
kTwoByteStringTag | kSymbolTag | kExternalStringTag | kAsciiDataHintTag,
EXTERNAL_ASCII_SYMBOL_TYPE =
kAsciiStringTag | kSymbolTag | kExternalStringTag,
STRING_TYPE = kSeqStringTag,
STRING_TYPE = kTwoByteStringTag | kSeqStringTag,
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
CONS_STRING_TYPE = kConsStringTag,
CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
EXTERNAL_STRING_TYPE = kExternalStringTag,
EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
EXTERNAL_ASCII_STRING_TYPE = kAsciiStringTag | kExternalStringTag,
PRIVATE_EXTERNAL_ASCII_STRING_TYPE = EXTERNAL_ASCII_STRING_TYPE,
@ -4069,12 +4086,14 @@ class String: public HeapObject {
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
// Check whether this string is an external two-byte string that in
// fact contains only ascii characters.
// Returns whether this string has ascii chars, i.e. all of them can
// be ascii encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers
// two-byte external representations even for ascii data.
//
// Such strings may appear when the embedder prefers two-byte
// representations even for ascii data.
inline bool IsExternalTwoByteStringWithAsciiChars();
// NOTE: this should be considered only a hint. False negatives are
// possible.
inline bool HasOnlyAsciiChars();
// Get and set individual two byte chars in the string.
inline void Set(int index, uint16_t value);

View File

@ -4946,16 +4946,6 @@ static Object* ConvertCaseHelper(String* s,
}
static inline SeqAsciiString* TryGetSeqAsciiString(String* s) {
if (!s->IsFlat() || !s->IsAsciiRepresentation()) return NULL;
if (s->IsConsString()) {
ASSERT(ConsString::cast(s)->second()->length() == 0);
return SeqAsciiString::cast(ConsString::cast(s)->first());
}
return SeqAsciiString::cast(s);
}
namespace {
struct ToLowerTraits {
@ -5002,7 +4992,7 @@ static Object* ConvertCase(
unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) {
NoHandleAllocation ha;
CONVERT_CHECKED(String, s, args[0]);
s->TryFlatten();
s = s->TryFlattenGetString();
const int length = s->length();
// Assume that the string is not empty; we need this assumption later
@ -5014,13 +5004,12 @@ static Object* ConvertCase(
// character is also ascii. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
SeqAsciiString* seq_ascii = TryGetSeqAsciiString(s);
if (seq_ascii != NULL) {
if (s->IsSeqAsciiString()) {
Object* o = Heap::AllocateRawAsciiString(length);
if (o->IsFailure()) return o;
SeqAsciiString* result = SeqAsciiString::cast(o);
bool has_changed_character = ConvertTraits::ConvertAscii(
result->GetChars(), seq_ascii->GetChars(), length);
result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length);
return has_changed_character ? result : s;
}
@ -5564,7 +5553,7 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
if (first->IsString()) return first;
}
bool ascii = special->IsAsciiRepresentation();
bool ascii = special->HasOnlyAsciiChars();
int position = 0;
for (int i = 0; i < array_length; i++) {
int increment = 0;
@ -5605,7 +5594,7 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
String* element = String::cast(elt);
int element_length = element->length();
increment = element_length;
if (ascii && !element->IsAsciiRepresentation()) {
if (ascii && !element->HasOnlyAsciiChars()) {
ascii = false;
}
} else {

View File

@ -11205,16 +11205,17 @@ void StringAddStub::Generate(MacroAssembler* masm) {
// If result is not supposed to be flat, allocate a cons string object. If
// both strings are ascii the result is an ascii cons string.
// rax: first string
// ebx: length of resulting flat string
// rbx: length of resulting flat string
// rdx: second string
// r8: instance type of first string
// r9: instance type of second string
Label non_ascii, allocated;
Label non_ascii, allocated, ascii_data;
__ movl(rcx, r8);
__ and_(rcx, r9);
ASSERT(kStringEncodingMask == kAsciiStringTag);
__ testl(rcx, Immediate(kAsciiStringTag));
__ j(zero, &non_ascii);
__ bind(&ascii_data);
// Allocate an acsii cons string.
__ AllocateAsciiConsString(rcx, rdi, no_reg, &string_add_runtime);
__ bind(&allocated);
@ -11228,6 +11229,18 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ IncrementCounter(&Counters::string_add_native, 1);
__ ret(2 * kPointerSize);
__ bind(&non_ascii);
// At least one of the strings is two-byte. Check whether it happens
// to contain only ascii characters.
// rcx: first instance type AND second instance type.
// r8: first instance type.
// r9: second instance type.
__ testb(rcx, Immediate(kAsciiDataHintMask));
__ j(not_zero, &ascii_data);
__ xor_(r8, r9);
ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
__ andb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
__ cmpb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
__ j(equal, &ascii_data);
// Allocate a two byte cons string.
__ AllocateConsString(rcx, rdi, no_reg, &string_add_runtime);
__ jmp(&allocated);
@ -11235,7 +11248,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
// Handle creating a flat result. First check that both strings are not
// external strings.
// rax: first string
// ebx: length of resulting flat string as smi
// rbx: length of resulting flat string as smi
// rdx: second string
// r8: instance type of first string
// r9: instance type of first string
@ -11251,7 +11264,7 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ j(equal, &string_add_runtime);
// Now check if both strings are ascii strings.
// rax: first string
// ebx: length of resulting flat string
// rbx: length of resulting flat string
// rdx: second string
// r8: instance type of first string
// r9: instance type of second string

View File

@ -0,0 +1,95 @@
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --expose-externalize-string
var size = 1024;
function test() {
var str = "";
// Build an ascii cons string.
for (var i = 0; i < size; i++) {
str += String.fromCharCode(i & 0x7f);
}
assertTrue(isAsciiString(str));
var twoByteExternalWithAsciiData =
"AA" + (function() { return "A"; })();
externalizeString(twoByteExternalWithAsciiData, true /* force two-byte */);
assertFalse(isAsciiString(twoByteExternalWithAsciiData));
var realTwoByteExternalString =
"\u1234\u1234" + (function() { return "\u1234"; })();
externalizeString(realTwoByteExternalString);
assertFalse(isAsciiString(realTwoByteExternalString));
assertTrue(isAsciiString(["a", twoByteExternalWithAsciiData].join("")));
// Appending a two-byte string that contains only ascii chars should
// still produce an ascii cons.
var str1 = str + twoByteExternalWithAsciiData;
assertTrue(isAsciiString(str1));
// Force flattening of the string.
var old_length = str1.length - twoByteExternalWithAsciiData.length;
for (var i = 0; i < old_length; i++) {
assertEquals(String.fromCharCode(i & 0x7f), str1[i]);
}
for (var i = old_length; i < str1.length; i++) {
assertEquals("A", str1[i]);
}
// Flattened string should still be ascii.
assertTrue(isAsciiString(str1));
// Lower-casing an ascii string should produce ascii.
assertTrue(isAsciiString(str1.toLowerCase()));
assertFalse(isAsciiString(["a", realTwoByteExternalString].join("")));
// Appending a real two-byte string should produce a two-byte cons.
var str2 = str + realTwoByteExternalString;
assertFalse(isAsciiString(str2));
// Force flattening of the string.
old_length = str2.length - realTwoByteExternalString.length;
for (var i = 0; i < old_length; i++) {
assertEquals(String.fromCharCode(i & 0x7f), str2[i]);
}
for (var i = old_length; i < str.length; i++) {
assertEquals("\u1234", str2[i]);
}
// Flattened string should still be two-byte.
assertFalse(isAsciiString(str2));
}
// Run the test many times to ensure IC-s don't break things.
for (var i = 0; i < 10; i++) {
test();
}