Add basic support for Latin1 to the API.

BUG=

Review URL: https://chromiumcodereview.appspot.com/10857030

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12430 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2012-09-03 15:06:36 +00:00
parent 66a16bd63a
commit 74f06b1f99
10 changed files with 728 additions and 125 deletions

View File

@ -1069,19 +1069,48 @@ class String : public Primitive {
PRESERVE_ASCII_NULL = 4
};
// 16-bit character codes.
enum StringEncoding {
INVALID_ENCODING = 0,
UTF_8_ENCODING = 1,
LATIN1_ENCODING = 2,
UTF_16_ENCODING = 3,
ASCII_HINT = 1 << 16,
NOT_ASCII_HINT = 1 << 17
};
static const int kStringEncodingMask = 3;
static const int kAsciiHintMask = String::ASCII_HINT | String::NOT_ASCII_HINT;
static const int kUndefinedLength = -1;
// 16-bit UTF16 code units. PRESERVE_ASCII_NULL is not supported as option,
// null-characters are never converted to spaces.
V8EXPORT int Write(uint16_t* buffer,
int start = 0,
int length = -1,
int length = kUndefinedLength,
int options = NO_OPTIONS) const;
// ASCII characters.
// ASCII characters. Null-characters are converted to spaces unless
// PRESERVE_ASCII_NULL is set as option.
V8EXPORT int WriteAscii(char* buffer,
int start = 0,
int length = -1,
int length = kUndefinedLength,
int options = NO_OPTIONS) const;
// UTF-8 encoded characters.
// Latin1 characters. PRESERVE_ASCII_NULL is not supported as option,
// null-characters are never converted to spaces.
V8EXPORT int WriteLatin1(char* buffer,
int start = 0,
int length = kUndefinedLength,
int options = NO_OPTIONS) const;
// UTF-8 encoded characters. PRESERVE_ASCII_NULL is not supported as option,
// null-characters are never converted to spaces.
V8EXPORT int WriteUtf8(char* buffer,
int length = -1,
int length = kUndefinedLength,
int* nchars_ref = NULL,
int options = NO_OPTIONS) const;
@ -1122,6 +1151,7 @@ class String : public Primitive {
void operator=(const ExternalStringResourceBase&);
friend class v8::internal::Heap;
friend class v8::String;
};
/**
@ -1180,6 +1210,16 @@ class String : public Primitive {
ExternalAsciiStringResource() {}
};
/**
* An ExternalLatin1StringResource is a wrapper around an Latin1-encoded
* string buffer that resides outside V8's heap. For usage in V8, a Latin1
* string is converted to ASCII or two-byte string depending on whether
* it contains non-ASCII characters.
*/
class V8EXPORT ExternalLatin1StringResource
: public ExternalAsciiStringResource {
};
/**
* Get the ExternalStringResource for an external string. Returns
* NULL if IsExternal() doesn't return true.
@ -1193,24 +1233,44 @@ class String : public Primitive {
V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource()
const;
/**
* If the string is external, return its encoding (Latin1 or UTF16)
* and possibly a hint on whether the content is ASCII.
* Return String::INVALID_ENCODING otherwise.
*/
inline int GetExternalStringEncoding() const;
/**
* Return the resource of the external string regardless of encoding.
* Call this only after having made sure that the string is indeed external!
*/
inline ExternalStringResourceBase* GetExternalStringResourceBase() const;
static inline String* Cast(v8::Value* obj);
/**
* Allocates a new string from either UTF-8 encoded or ASCII data.
* The second parameter 'length' gives the buffer length.
* If the data is UTF-8 encoded, the caller must
* be careful to supply the length parameter.
* If it is not given, the function calls
* 'strlen' to determine the buffer length, it might be
* wrong if 'data' contains a null character.
* Allocates a new string from either UTF-8 or Latin1-encoded data.
* The second parameter 'length' gives the buffer length. If the data may
* contain zero bytes, the caller must be careful to supply the length
* parameter. If it is not given, the function calls 'strlen' to determine
* the buffer length, it might be wrong if 'data' contains a null character.
* The third parameter specifies the encoding, which may include an hint
* whether the string contains ASCII characters. In the case of Latin1, the
* appropriate internal representation (UTF16 or ASCII) is chosen.
*/
V8EXPORT static Local<String> New(const char* data, int length = -1);
V8EXPORT static Local<String> New(const char* data,
int length = kUndefinedLength,
int encoding = UTF_8_ENCODING);
/** Allocates a new string from 16-bit character codes.*/
V8EXPORT static Local<String> New(const uint16_t* data, int length = -1);
/** Allocates a new string from 16-bit UTF-16 code units.*/
V8EXPORT static Local<String> New(const uint16_t* data,
int length = kUndefinedLength);
/** Creates a symbol. Returns one if it exists already.*/
V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1);
V8EXPORT static Local<String> NewSymbol(const char* data,
int length = kUndefinedLength,
int encoding = UTF_8_ENCODING);
/**
* Creates a new string by concatenating the left and the right strings
@ -1247,7 +1307,8 @@ class String : public Primitive {
* this function should not otherwise delete or modify the resource. Neither
* should the underlying buffer be deallocated or modified except through the
* destructor of the external string resource.
*/ V8EXPORT static Local<String> NewExternal(
*/
V8EXPORT static Local<String> NewExternal(
ExternalAsciiStringResource* resource);
/**
@ -1261,6 +1322,24 @@ class String : public Primitive {
*/
V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource);
/**
* Creates a new external string using the Latin1-encoded data defined in the
* given resource. When the external string is no longer live on V8's heap
* the resource will be disposed by calling its Dispose method. The caller of
* this function should not otherwise delete or modify the resource. Neither
* should the underlying buffer be deallocated or modified except through the
* destructor of the external string resource.
* If the data contains a non-ASCII character, the string is created as a new
* string object on the V8 heap and the Dispose method is called on the
* resource immediately. This is because V8 is unable to handle non-ASCII
* Latin1-encoded strings internally.
*/
V8EXPORT static Local<String> NewExternal(
ExternalLatin1StringResource* resource,
int encoding = String::LATIN1_ENCODING);
/**
* Returns true if this string can be made external.
*/
@ -1268,11 +1347,13 @@ class String : public Primitive {
/** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/
V8EXPORT static Local<String> NewUndetectable(const char* data,
int length = -1);
int length = kUndefinedLength,
int encoding = UTF_8_ENCODING);
/** Creates an undetectable string from the supplied 16-bit character codes.*/
/** Creates an undetectable string from the supplied 16-bit UTF16 code units.
*/
V8EXPORT static Local<String> NewUndetectable(const uint16_t* data,
int length = -1);
int length = kUndefinedLength);
/**
* Converts an object to a UTF-8-encoded character array. Useful if
@ -1343,7 +1424,9 @@ class String : public Primitive {
};
private:
V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const;
V8EXPORT void VerifyExternalStringEncoding(int encoding) const;
V8EXPORT void VerifyExternalStringResourceBase(
ExternalStringResourceBase* val) const;
V8EXPORT static void CheckCast(v8::Value* obj);
};
@ -4034,6 +4117,9 @@ class Internals {
static const int kJSObjectHeaderSize = 3 * kApiPointerSize;
static const int kFullStringRepresentationMask = 0x07;
static const int kExternalTwoByteRepresentationTag = 0x02;
static const int kExternalAsciiRepresentationTag = 0x06;
static const int kExternalAsciiDataHintMask = 0x08;
static const int kExternalAsciiDataHintTag = 0x08;
static const int kIsolateStateOffset = 0;
static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize;
@ -4091,11 +4177,6 @@ class Internals {
}
}
static inline bool IsExternalTwoByteString(int instance_type) {
int representation = (instance_type & kFullStringRepresentationMask);
return representation == kExternalTwoByteRepresentationTag;
}
static inline bool IsInitialized(v8::Isolate* isolate) {
uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset;
return *reinterpret_cast<int*>(addr) == 1;
@ -4373,16 +4454,56 @@ Local<String> String::Empty(Isolate* isolate) {
String::ExternalStringResource* String::GetExternalStringResource() const {
typedef internal::Object O;
typedef internal::Internals I;
String::ExternalStringResource* result = NULL;
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
String::ExternalStringResource* result;
if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) {
void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
result = reinterpret_cast<String::ExternalStringResource*>(value);
} else {
result = NULL;
if ((I::GetInstanceType(obj) & I::kFullStringRepresentationMask) ==
I::kExternalTwoByteRepresentationTag) {
result = reinterpret_cast<String::ExternalStringResource*>(
GetExternalStringResourceBase());
}
return result;
}
int String::GetExternalStringEncoding() const {
typedef internal::Object O;
typedef internal::Internals I;
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
static const int kRepresentationAndHintMask =
I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask;
int encoding;
switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) {
case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag:
encoding = UTF_16_ENCODING | ASCII_HINT;
break;
case I::kExternalTwoByteRepresentationTag:
encoding = UTF_16_ENCODING | NOT_ASCII_HINT;
break;
case I::kExternalAsciiRepresentationTag:
encoding = LATIN1_ENCODING | ASCII_HINT;
break;
default:
encoding = INVALID_ENCODING;
break;
}
#ifdef V8_ENABLE_CHECKS
VerifyExternalStringResource(result);
VerifyExternalStringEncoding(encoding);
#endif
return encoding;
}
String::ExternalStringResourceBase* String::GetExternalStringResourceBase()
const {
typedef internal::Object O;
typedef internal::Internals I;
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
ExternalStringResourceBase* result =
reinterpret_cast<String::ExternalStringResourceBase*>(value);
#ifdef V8_ENABLE_CHECKS
VerifyExternalStringResourceBase(result);
#endif
return result;
}

View File

@ -3857,7 +3857,7 @@ int String::WriteUtf8(char* buffer,
int string_length = str->length();
if (str->IsAsciiRepresentation()) {
int len;
if (capacity == -1) {
if (capacity == kUndefinedLength) {
capacity = str->length() + 1;
len = string_length;
} else {
@ -3872,7 +3872,7 @@ int String::WriteUtf8(char* buffer,
return len;
}
if (capacity == -1 || capacity / 3 >= string_length) {
if (capacity == kUndefinedLength || capacity / 3 >= string_length) {
int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
const int kMaxRecursion = 100;
int utf8_bytes =
@ -3903,7 +3903,7 @@ int String::WriteUtf8(char* buffer,
int utf8_bytes = i::Utf8Length(str);
if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
if (utf8_bytes <= capacity) {
return WriteUtf8(buffer, -1, nchars_ref, options);
return WriteUtf8(buffer, kUndefinedLength, nchars_ref, options);
}
}
@ -3921,7 +3921,9 @@ int String::WriteUtf8(char* buffer,
int pos = 0;
int nchars = 0;
int previous = unibrow::Utf16::kNoPreviousCharacter;
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
for (i = 0;
i < len && (capacity == kUndefinedLength || pos < fast_end);
i++) {
i::uc32 c = write_input_buffer.GetNext();
int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
pos += written;
@ -3967,7 +3969,7 @@ int String::WriteUtf8(char* buffer,
}
if (nchars_ref != NULL) *nchars_ref = nchars;
if (!(options & NO_NULL_TERMINATION) &&
(i == len && (capacity == -1 || pos < capacity))) {
(i == len && (capacity == kUndefinedLength || pos < capacity))) {
buffer[pos++] = '\0';
}
return pos;
@ -3982,7 +3984,7 @@ int String::WriteAscii(char* buffer,
if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0;
LOG_API(isolate, "String::WriteAscii");
ENTER_V8(isolate);
ASSERT(start >= 0 && length >= -1);
ASSERT(start >= 0 && length >= kUndefinedLength);
i::Handle<i::String> str = Utils::OpenHandle(this);
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
@ -3991,7 +3993,7 @@ int String::WriteAscii(char* buffer,
if (str->IsAsciiRepresentation()) {
// WriteToFlat is faster than using the StringInputBuffer.
if (length == -1) length = str->length() + 1;
if (length == kUndefinedLength) length = str->length() + 1;
int len = i::Min(length, str->length() - start);
i::String::WriteToFlat(*str, buffer, start, start + len);
if (!(options & PRESERVE_ASCII_NULL)) {
@ -4007,7 +4009,7 @@ int String::WriteAscii(char* buffer,
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
int end = length;
if ((length == -1) || (length > str->length() - start)) {
if ((length == kUndefinedLength) || (length > str->length() - start)) {
end = str->length() - start;
}
if (end < 0) return 0;
@ -4025,6 +4027,31 @@ int String::WriteAscii(char* buffer,
}
int String::WriteLatin1(char* buffer,
int start,
int length,
int options) const {
i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();
if (IsDeadCheck(isolate, "v8::String::WriteLatin1()")) return 0;
LOG_API(isolate, "String::WriteLatin1");
ENTER_V8(isolate);
ASSERT(start >= 0 && length >= kUndefinedLength);
i::Handle<i::String> str = Utils::OpenHandle(this);
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
FlattenString(str); // Flatten the string for efficiency.
}
if (length == kUndefinedLength) length = str->length() + 1;
int len = i::Min(length, str->length() - start);
i::String::WriteToFlat(*str, buffer, start, start + len);
if (!(options & NO_NULL_TERMINATION) && length > len) {
buffer[len] = '\0';
}
return len;
}
int String::Write(uint16_t* buffer,
int start,
int length,
@ -4033,7 +4060,7 @@ int String::Write(uint16_t* buffer,
if (IsDeadCheck(isolate, "v8::String::Write()")) return 0;
LOG_API(isolate, "String::Write");
ENTER_V8(isolate);
ASSERT(start >= 0 && length >= -1);
ASSERT(start >= 0 && length >= kUndefinedLength);
i::Handle<i::String> str = Utils::OpenHandle(this);
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
@ -4042,7 +4069,7 @@ int String::Write(uint16_t* buffer,
FlattenString(str);
}
int end = start + length;
if ((length == -1) || (length > str->length() - start) )
if ((length == kUndefinedLength) || (length > str->length() - start) )
end = str->length();
if (end < 0) return 0;
i::String::WriteToFlat(*str, buffer, start, end);
@ -4073,18 +4100,43 @@ bool v8::String::IsExternalAscii() const {
}
void v8::String::VerifyExternalStringResource(
v8::String::ExternalStringResource* value) const {
void v8::String::VerifyExternalStringEncoding(int encoding) const {
typedef internal::Internals I;
i::Handle<i::String> str = Utils::OpenHandle(this);
const v8::String::ExternalStringResource* expected;
if (i::StringShape(*str).IsExternalTwoByte()) {
const void* resource =
i::Handle<i::ExternalTwoByteString>::cast(str)->resource();
expected = reinterpret_cast<const ExternalStringResource*>(resource);
} else {
expected = NULL;
switch (encoding) {
case UTF_16_ENCODING | ASCII_HINT:
CHECK(str->HasOnlyAsciiChars());
// Fall through
case UTF_16_ENCODING | NOT_ASCII_HINT :
CHECK(str->IsExternalTwoByteString());
break;
case LATIN1_ENCODING | ASCII_HINT:
CHECK(str->IsExternalAsciiString());
break;
default:
CHECK_EQ(INVALID_ENCODING, encoding);
CHECK(!str->IsExternalString());
break;
}
CHECK_EQ(expected, value);
}
void v8::String::VerifyExternalStringResourceBase(
v8::String::ExternalStringResourceBase* value) const {
i::Handle<i::String> str = Utils::OpenHandle(this);
i::StringShape shape(*str);
const void* expected;
// We expect an external string at this point since GetExternalStringEncoding
// should have already been called to rule out non-external strings.
if (i::StringShape(*str).IsExternalTwoByte()) {
expected = i::ExternalTwoByteString::cast(*str)->resource();
} else {
ASSERT(i::StringShape(*str).IsExternalAscii());
expected = i::ExternalAsciiString::cast(*str)->resource();
}
CHECK_EQ(expected,
reinterpret_cast<const ExternalStringResourceBase*>(value));
}
@ -4701,17 +4753,44 @@ Local<String> v8::String::Empty() {
}
Local<String> v8::String::New(const char* data, int length) {
static i::Handle<i::String> NewOneByteEncodedString(
i::Factory* factory, const char* data, int length, int encoding) {
if (length == String::kUndefinedLength) length = i::StrLength(data);
typedef v8::String S;
static const int kAsciiHintShift = 16;
ASSERT(IS_POWER_OF_TWO(encoding & S::kAsciiHintMask));
i::String::AsciiHint ascii_hint =
static_cast<i::String::AsciiHint>(encoding >> kAsciiHintShift);
STATIC_ASSERT(i::String::MAYBE_ASCII == 0);
STATIC_ASSERT(i::String::NOT_ASCII ==
(v8::String::NOT_ASCII_HINT >> kAsciiHintShift));
STATIC_ASSERT(i::String::ASCII ==
(v8::String::ASCII_HINT >> kAsciiHintShift));
int masked_encoding = encoding & S::kStringEncodingMask;
if (masked_encoding == S::UTF_8_ENCODING) {
return factory->NewStringFromUtf8(
i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
} else if (masked_encoding == S::LATIN1_ENCODING) {
return factory->NewStringFromLatin1(
i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
} else { // Wrong encoding.
return i::Handle<i::String>();
}
}
Local<String> v8::String::New(
const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::New()");
LOG_API(isolate, "String::New(char)");
if (length == 0) return Empty();
ENTER_V8(isolate);
if (length == -1) length = i::StrLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromUtf8(
i::Vector<const char>(data, length));
return Utils::ToLocal(result);
return Utils::ToLocal(
NewOneByteEncodedString(isolate->factory(), data, length, encoding));
}
@ -4728,15 +4807,14 @@ Local<String> v8::String::Concat(Handle<String> left, Handle<String> right) {
}
Local<String> v8::String::NewUndetectable(const char* data, int length) {
Local<String> v8::String::NewUndetectable(
const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
LOG_API(isolate, "String::NewUndetectable(char)");
ENTER_V8(isolate);
if (length == -1) length = i::StrLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromUtf8(
i::Vector<const char>(data, length));
NewOneByteEncodedString(isolate->factory(), data, length, encoding);
result->MarkAsUndetectable();
return Utils::ToLocal(result);
}
@ -4755,7 +4833,7 @@ Local<String> v8::String::New(const uint16_t* data, int length) {
LOG_API(isolate, "String::New(uint16_)");
if (length == 0) return Empty();
ENTER_V8(isolate);
if (length == -1) length = TwoByteStringLength(data);
if (length == kUndefinedLength) length = TwoByteStringLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromTwoByte(
i::Vector<const uint16_t>(data, length));
@ -4768,7 +4846,7 @@ Local<String> v8::String::NewUndetectable(const uint16_t* data, int length) {
EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
LOG_API(isolate, "String::NewUndetectable(uint16_)");
ENTER_V8(isolate);
if (length == -1) length = TwoByteStringLength(data);
if (length == kUndefinedLength) length = TwoByteStringLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromTwoByte(
i::Vector<const uint16_t>(data, length));
@ -4806,29 +4884,43 @@ Local<String> v8::String::NewExternal(
}
bool v8::String::MakeExternal(v8::String::ExternalStringResource* resource) {
i::Handle<i::String> obj = Utils::OpenHandle(this);
i::Isolate* isolate = obj->GetIsolate();
template<class StringResourceType>
static bool MakeStringExternal(
i::Handle<i::String> string, StringResourceType* resource) {
i::Isolate* isolate = string->GetIsolate();
if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
if (i::StringShape(*obj).IsExternalTwoByte()) {
if (i::StringShape(*string).IsExternal()) {
return false; // Already an external string.
}
ENTER_V8(isolate);
if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
if (isolate->string_tracker()->IsFreshUnusedString(string)) {
return false;
}
if (isolate->heap()->IsInGCPostProcessing()) {
return false;
}
CHECK(resource && resource->data());
bool result = obj->MakeExternal(resource);
if (result && !obj->IsSymbol()) {
isolate->heap()->external_string_table()->AddString(*obj);
bool result = string->MakeExternal(resource);
if (result && !string->IsSymbol()) {
isolate->heap()->external_string_table()->AddString(*string);
}
return result;
}
bool v8::String::MakeExternal(ExternalStringResource* resource) {
i::Handle<i::String> obj = Utils::OpenHandle(this);
return MakeStringExternal(obj, resource);
}
bool v8::String::MakeExternal(ExternalAsciiStringResource* resource) {
i::Handle<i::String> obj = Utils::OpenHandle(this);
ASSERT(obj->HasOnlyAsciiChars());
return MakeStringExternal(obj, resource);
}
Local<String> v8::String::NewExternal(
v8::String::ExternalAsciiStringResource* resource) {
i::Isolate* isolate = i::Isolate::Current();
@ -4842,27 +4934,37 @@ Local<String> v8::String::NewExternal(
}
bool v8::String::MakeExternal(
v8::String::ExternalAsciiStringResource* resource) {
i::Handle<i::String> obj = Utils::OpenHandle(this);
i::Isolate* isolate = obj->GetIsolate();
if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
if (i::StringShape(*obj).IsExternalTwoByte()) {
return false; // Already an external string.
}
Local<String> v8::String::NewExternal(ExternalLatin1StringResource* resource,
int encoding) {
typedef v8::internal::Internals I;
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::NewExternal()");
LOG_API(isolate, "String::NewExternal");
ENTER_V8(isolate);
if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
return false;
}
if (isolate->heap()->IsInGCPostProcessing()) {
return false;
}
ASSERT((encoding & kStringEncodingMask) == LATIN1_ENCODING);
CHECK(resource && resource->data());
bool result = obj->MakeExternal(resource);
if (result && !obj->IsSymbol()) {
isolate->heap()->external_string_table()->AddString(*obj);
bool ascii_hint = (encoding & kAsciiHintMask);
i::Handle<i::String> result;
if (ascii_hint == ASCII_HINT ||
(ascii_hint != NOT_ASCII_HINT &&
i::String::IsAscii(resource->data(), resource->length()))) {
// Assert that the ascii hint is correct.
ASSERT(ascii_hint != ASCII_HINT ||
i::String::IsAscii(resource->data(), resource->length()));
result = NewExternalAsciiStringHandle(isolate, resource);
isolate->heap()->external_string_table()->AddString(*result);
} else {
// We cannot simply take the backing store and use it as an ASCII string,
// since it's not. Instead, we convert it to an internal string and dispose
// the external resource.
result = isolate->factory()->NewStringFromLatin1(
i::Vector<const char>(resource->data(), resource->length()),
i::NOT_TENURED,
i::String::NOT_ASCII);
resource->Dispose();
}
return result;
return Utils::ToLocal(result);
}
@ -5125,14 +5227,28 @@ Local<Object> Array::CloneElementAt(uint32_t index) {
}
Local<String> v8::String::NewSymbol(const char* data, int length) {
Local<String> v8::String::NewSymbol(
const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::NewSymbol()");
LOG_API(isolate, "String::NewSymbol(char)");
ENTER_V8(isolate);
if (length == -1) length = i::StrLength(data);
i::Handle<i::String> result =
isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
if (length == kUndefinedLength) length = i::StrLength(data);
i::Handle<i::String> result;
ASSERT(IS_POWER_OF_TWO(encoding & kAsciiHintMask));
if (((encoding & kStringEncodingMask) == LATIN1_ENCODING) &&
((encoding & kAsciiHintMask) == NOT_ASCII_HINT ||
!i::String::IsAscii(data, length))) {
result = isolate->factory()->NewStringFromLatin1(
i::Vector<const char>(data, length),
i::NOT_TENURED,
i::String::NOT_ASCII);
result = isolate->factory()->LookupSymbol(result);
} else { // We can handle UTF8 and ASCII strings here.
result =
isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
}
return Utils::ToLocal(result);
}

View File

@ -204,10 +204,23 @@ Handle<String> Factory::NewStringFromAscii(Vector<const char> string,
}
Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
PretenureFlag pretenure) {
PretenureFlag pretenure,
String::AsciiHint ascii_hint) {
CALL_HEAP_FUNCTION(
isolate(),
isolate()->heap()->AllocateStringFromUtf8(string, pretenure),
isolate()->heap()->AllocateStringFromUtf8(
string, pretenure, ascii_hint),
String);
}
Handle<String> Factory::NewStringFromLatin1(Vector<const char> string,
PretenureFlag pretenure,
String::AsciiHint ascii_hint) {
CALL_HEAP_FUNCTION(
isolate(),
isolate()->heap()->AllocateStringFromLatin1(
string, pretenure, ascii_hint),
String);
}

View File

@ -120,7 +120,13 @@ class Factory {
// flags in the parser.
Handle<String> NewStringFromUtf8(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
PretenureFlag pretenure = NOT_TENURED,
String::AsciiHint ascii_hint = String::MAYBE_ASCII);
Handle<String> NewStringFromLatin1(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED,
String::AsciiHint ascii_hint = String::MAYBE_ASCII);
Handle<String> NewStringFromTwoByte(
Vector<const uc16> str,

View File

@ -83,9 +83,14 @@ void PromotionQueue::ActivateGuardIfOnTheSamePage() {
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
PretenureFlag pretenure) {
// Check for ASCII first since this is the common case.
if (String::IsAscii(str.start(), str.length())) {
PretenureFlag pretenure,
String::AsciiHint ascii_hint) {
if ((ascii_hint == String::MAYBE_ASCII &&
String::IsAscii(str.start(), str.length())) ||
ascii_hint == String::ASCII) {
// Assert that the ASCII-hint is correct.
ASSERT(ascii_hint != String::ASCII ||
String::IsAscii(str.start(), str.length()));
// If the string is ASCII, we do not need to convert the characters
// since UTF8 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
@ -95,6 +100,24 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
}
MaybeObject* Heap::AllocateStringFromLatin1(Vector<const char> str,
PretenureFlag pretenure,
String::AsciiHint ascii_hint) {
if ((ascii_hint == String::MAYBE_ASCII &&
String::IsAscii(str.start(), str.length())) ||
ascii_hint == String::ASCII) {
// Assert that the strict ASCII-hint is correct.
ASSERT(ascii_hint != String::ASCII ||
String::IsAscii(str.start(), str.length()));
// If the string is ASCII, we do not need to convert the characters
// since Latin1 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
}
// Non-ASCII and we need to decode.
return AllocateStringFromLatin1Slow(str, pretenure);
}
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
int chars,
uint32_t hash_field) {

View File

@ -48,6 +48,7 @@
#include "snapshot.h"
#include "store-buffer.h"
#include "v8threads.h"
#include "v8utils.h"
#include "vm-state-inl.h"
#if V8_TARGET_ARCH_ARM && !V8_INTERPRETED_REGEXP
#include "regexp-macro-assembler.h"
@ -4390,7 +4391,8 @@ MaybeObject* Heap::ReinitializeJSGlobalProxy(JSFunction* constructor,
MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
PretenureFlag pretenure) {
if (string.length() == 1) {
int length = string.length();
if (length == 1) {
return Heap::LookupSingleCharacterStringFromCode(string[0]);
}
Object* result;
@ -4399,11 +4401,10 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
if (!maybe_result->ToObject(&result)) return maybe_result;
}
isolate_->counters()->string_length_ascii()->Increment(length);
// Copy the characters into the new object.
SeqAsciiString* string_result = SeqAsciiString::cast(result);
for (int i = 0; i < string.length(); i++) {
string_result->SeqAsciiStringSet(i, string[i]);
}
CopyChars(SeqAsciiString::cast(result)->GetChars(), string.start(), length);
return result;
}
@ -4430,41 +4431,63 @@ MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
if (!maybe_result->ToObject(&result)) return maybe_result;
}
isolate_->counters()->string_length_utf8()->Increment(chars);
// Convert and copy the characters into the new object.
String* string_result = String::cast(result);
SeqTwoByteString* twobyte = SeqTwoByteString::cast(result);
decoder->Reset(string.start(), string.length());
int i = 0;
while (i < chars) {
uint32_t r = decoder->GetNext();
if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::LeadSurrogate(r));
twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::TrailSurrogate(r));
} else {
string_result->Set(i++, r);
twobyte->SeqTwoByteStringSet(i++, r);
}
}
return result;
}
MaybeObject* Heap::AllocateStringFromLatin1Slow(Vector<const char> string,
PretenureFlag pretenure) {
int chars = string.length();
Object* result;
{ MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
isolate_->counters()->string_length_latin1()->Increment(chars);
// Convert and copy the characters into the new object.
SeqTwoByteString* string_result = SeqTwoByteString::cast(result);
CopyChars(string_result->GetChars(),
reinterpret_cast<const unsigned char*>(string.start()),
chars);
return result;
}
MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
PretenureFlag pretenure) {
// Check if the string is an ASCII string.
MaybeObject* maybe_result;
if (String::IsAscii(string.start(), string.length())) {
maybe_result = AllocateRawAsciiString(string.length(), pretenure);
} else { // It's not an ASCII string.
maybe_result = AllocateRawTwoByteString(string.length(), pretenure);
}
Object* result;
if (!maybe_result->ToObject(&result)) return maybe_result;
int length = string.length();
const uc16* start = string.start();
// Copy the characters into the new object, which may be either ASCII or
// UTF-16.
String* string_result = String::cast(result);
for (int i = 0; i < string.length(); i++) {
string_result->Set(i, string[i]);
if (String::IsAscii(start, length)) {
MaybeObject* maybe_result = AllocateRawAsciiString(length, pretenure);
if (!maybe_result->ToObject(&result)) return maybe_result;
isolate_->counters()->string_length_ascii()->Increment(length);
CopyChars(SeqAsciiString::cast(result)->GetChars(), start, length);
} else { // It's not an ASCII string.
MaybeObject* maybe_result = AllocateRawTwoByteString(length, pretenure);
if (!maybe_result->ToObject(&result)) return maybe_result;
isolate_->counters()->string_length_utf16()->Increment(length);
CopyChars(SeqTwoByteString::cast(result)->GetChars(), start, length);
}
return result;
}

View File

@ -679,10 +679,18 @@ class Heap {
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT inline MaybeObject* AllocateStringFromUtf8(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
PretenureFlag pretenure = NOT_TENURED,
String::AsciiHint ascii_hint = String::MAYBE_ASCII);
MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT inline MaybeObject* AllocateStringFromLatin1(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED,
String::AsciiHint ascii_hint = String::MAYBE_ASCII);
MUST_USE_RESULT MaybeObject* AllocateStringFromLatin1Slow(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);

View File

@ -7128,6 +7128,10 @@ class String: public HeapObject {
friend class String;
};
enum AsciiHint { MAYBE_ASCII = 0,
ASCII = 1,
NOT_ASCII = 2 };
// Get and set the length of the string.
inline int length();
inline void set_length(int value);

View File

@ -252,6 +252,10 @@ namespace internal {
SC(string_add_make_two_char, V8.StringAddMakeTwoChar) \
SC(string_compare_native, V8.StringCompareNative) \
SC(string_compare_runtime, V8.StringCompareRuntime) \
SC(string_length_utf8, V8.StringLengthUtf8) \
SC(string_length_ascii, V8.StringLengthAScii) \
SC(string_length_latin1, V8.StringLengthLatin1) \
SC(string_length_utf16, V8.StringLengthUtf16) \
SC(regexp_entry_runtime, V8.RegExpEntryRuntime) \
SC(regexp_entry_native, V8.RegExpEntryNative) \
SC(number_to_string_native, V8.NumberToStringNative) \

View File

@ -714,9 +714,294 @@ TEST(ExternalStringWithDisposeHandling) {
}
static void TestNewLatin1String(int encoding1, int encoding2) {
const char* chars1 = "ASCII 123";
const char* chars1js = "'ASCII 123'";
int str1_len = strlen(chars1);
const char* chars2 = "Non-ASCII \xAB\xCD\xEF";
const char* chars2js = "'Non-ASCII \\u00ab\\u00cd\\u00ef'";
int str2_len = strlen(chars2);
Local<String> str1 = String::New(chars1, str1_len, encoding1);
Local<String> str2 = String::New(chars2, str2_len, encoding2);
Local<String> str1_compare = CompileRun(chars1js)->ToString();
Local<String> str2_compare = CompileRun(chars2js)->ToString();
if (encoding1 & String::NOT_ASCII_HINT) {
CHECK(v8::Utils::OpenHandle(*str1)->IsSeqTwoByteString());
} else {
CHECK(v8::Utils::OpenHandle(*str1)->IsSeqAsciiString());
}
CHECK(v8::Utils::OpenHandle(*str1_compare)->IsSeqAsciiString());
CHECK(v8::Utils::OpenHandle(*str2)->IsSeqTwoByteString());
CHECK(v8::Utils::OpenHandle(*str2_compare)->IsSeqTwoByteString());
CHECK(str1_compare->Equals(str1));
CHECK(str2_compare->Equals(str2));
}
TEST(CreateLatin1String) {
v8::HandleScope scope;
LocalContext env;
int latin1 = String::LATIN1_ENCODING;
int l_noascii = String::LATIN1_ENCODING | String::NOT_ASCII_HINT;
int l_ascii = String::LATIN1_ENCODING | String::ASCII_HINT;
TestNewLatin1String(latin1, latin1);
TestNewLatin1String(l_ascii, latin1);
TestNewLatin1String(l_noascii, l_noascii);
}
TEST(ExternalStringEncoding) {
v8::HandleScope scope;
LocalContext env;
int counter = 0;
{ HandleScope scope;
uint16_t* two_byte_ascii = AsciiToTwoByteString("two byte ascii");
uint16_t* two_byte = AsciiToTwoByteString("two byte non-ascii \x99");
char* ascii = i::StrDup("ascii");
TestResource* two_byte_resource = new TestResource(two_byte, &counter);
TestResource* two_byte_ascii_resource =
new TestResource(two_byte_ascii, &counter);
TestAsciiResource* ascii_resource =
new TestAsciiResource(ascii, &counter);
Local<String> two_byte_external = String::NewExternal(two_byte_resource);
Local<String> two_byte_ascii_external =
String::NewExternal(two_byte_ascii_resource);
Local<String> ascii_external = String::NewExternal(ascii_resource);
Local<String> not_external = v8_str("not external");
CHECK_EQ(String::UTF_16_ENCODING | String::NOT_ASCII_HINT,
two_byte_external->GetExternalStringEncoding());
CHECK_EQ(String::UTF_16_ENCODING | String::ASCII_HINT,
two_byte_ascii_external->GetExternalStringEncoding());
CHECK_EQ(String::LATIN1_ENCODING | String::ASCII_HINT,
ascii_external->GetExternalStringEncoding());
CHECK_EQ(String::INVALID_ENCODING,
not_external->GetExternalStringEncoding());
CHECK_EQ(two_byte_resource, two_byte_external->GetExternalStringResource());
CHECK_EQ(two_byte_ascii_resource,
two_byte_ascii_external->GetExternalStringResourceBase());
CHECK_EQ(ascii_resource, ascii_external->GetExternalStringResourceBase());
CHECK_EQ(0, counter);
}
HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
CHECK_EQ(3, counter);
}
TEST(WriteLatin1String) {
HandleScope scope;
LocalContext env;
const char* latin1_ascii = "latin1 ascii";
const char* latin1 = "\x99 latin1 non-ascii \xF8";
const char* concat = "latin1 ascii\x99 latin1 non-ascii \xF8";
const char* sub = "latin1 non-ascii \xF8";
Local<String> latin1_ascii_string = String::New(latin1_ascii,
String::kUndefinedLength,
String::LATIN1_ENCODING);
Local<String> latin1_string = String::New(latin1,
String::kUndefinedLength,
String::LATIN1_ENCODING);
Local<String> concat_string = String::Concat(latin1_ascii_string,
latin1_string);
Local<String> sub_string = v8::Utils::ToLocal(
FACTORY->NewSubString(
v8::Utils::OpenHandle(*latin1_string), 2, latin1_string->Length()));
CHECK(v8::Utils::OpenHandle(*latin1_ascii_string)->IsSeqAsciiString());
CHECK(v8::Utils::OpenHandle(*latin1_string)->IsSeqTwoByteString());
CHECK(v8::Utils::OpenHandle(*concat_string)->IsConsString());
CHECK(v8::Utils::OpenHandle(*sub_string)->IsSlicedString());
char buffer[64];
CHECK_EQ(strlen(latin1_ascii), latin1_ascii_string->WriteLatin1(buffer));
CHECK_EQ(0, strcmp(latin1_ascii, buffer));
CHECK_EQ(strlen(latin1), latin1_string->WriteLatin1(buffer));
CHECK_EQ(0, strcmp(latin1, buffer));
CHECK_EQ(strlen(concat), concat_string->WriteLatin1(buffer));
CHECK_EQ(0, strcmp(concat, buffer));
CHECK_EQ(strlen(sub), sub_string->WriteLatin1(buffer));
CHECK_EQ(0, strcmp(sub, buffer));
memset(buffer, 0x1, sizeof(buffer));
CHECK_EQ(strlen(latin1),
latin1_string->WriteLatin1(buffer,
0,
String::kUndefinedLength,
String::NO_NULL_TERMINATION));
CHECK_EQ(0, strncmp(latin1, buffer, strlen(latin1)));
CHECK_NE(0, strcmp(latin1, buffer));
buffer[strlen(latin1)] = '\0';
CHECK_EQ(0, strcmp(latin1, buffer));
CHECK_EQ(strlen(latin1) - 2,
latin1_string->WriteLatin1(buffer, 2));
CHECK_EQ(0, strncmp(latin1 + 2, buffer, strlen(latin1)));
}
class TestLatin1Resource: public String::ExternalLatin1StringResource {
public:
explicit TestLatin1Resource(const char* data, int* counter = NULL)
: data_(data), length_(strlen(data)), counter_(counter) { }
~TestLatin1Resource() {
i::DeleteArray(data_);
if (counter_ != NULL) ++*counter_;
}
const char* data() const {
return data_;
}
size_t length() const {
return length_;
}
private:
const char* data_;
size_t length_;
int* counter_;
};
TEST(ExternalLatin1String) {
HandleScope scope;
LocalContext env;
int counter = 0;
{ HandleScope scope;
char* latin1_ascii_a = i::StrDup("latin1 ascii a");
char* latin1_ascii_b = i::StrDup("latin1 ascii b");
char* latin1_a = i::StrDup("latin non-ascii \xAA");
char* latin1_b = i::StrDup("latin non-ascii \xBB");
TestLatin1Resource* latin1_ascii_a_resource =
new TestLatin1Resource(latin1_ascii_a, &counter);
TestLatin1Resource* latin1_ascii_b_resource =
new TestLatin1Resource(latin1_ascii_b, &counter);
TestLatin1Resource* latin1_a_resource =
new TestLatin1Resource(latin1_a, &counter);
TestLatin1Resource* latin1_b_resource =
new TestLatin1Resource(latin1_b, &counter);
Local<String> latin1_ascii_a_external =
String::NewExternal(latin1_ascii_a_resource);
Local<String> latin1_ascii_b_external = String::NewExternal(
latin1_ascii_b_resource,
String::LATIN1_ENCODING | String::ASCII_HINT);
CHECK_EQ(0, counter);
// Non-ascii latin1 strings are internalized immediately as two-byte
// string and the external resource is disposed.
Local<String> latin1_a_external = String::NewExternal(latin1_a_resource);
Local<String> latin1_b_external = String::NewExternal(
latin1_b_resource, String::LATIN1_ENCODING | String::NOT_ASCII_HINT);
CHECK(v8::Utils::OpenHandle(*latin1_a_external)->IsSeqTwoByteString());
CHECK(v8::Utils::OpenHandle(*latin1_b_external)->IsSeqTwoByteString());
CHECK_EQ(2, counter);
CHECK_EQ(latin1_ascii_a_external->GetExternalStringEncoding(),
(v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
CHECK_EQ(latin1_ascii_b_external->GetExternalStringEncoding(),
(v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
CHECK_EQ(latin1_a_external->GetExternalStringEncoding(),
v8::String::INVALID_ENCODING);
CHECK_EQ(latin1_b_external->GetExternalStringEncoding(),
v8::String::INVALID_ENCODING);
CHECK_EQ(latin1_ascii_a_resource,
latin1_ascii_a_external->GetExternalStringResourceBase());
CHECK_EQ(latin1_ascii_b_resource,
latin1_ascii_b_external->GetExternalStringResourceBase());
}
HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
CHECK_EQ(4, counter);
}
TEST(ExternalizeLatin1String) {
HandleScope scope;
LocalContext env;
int counter = 0;
{ HandleScope scope;
Local<String> latin1_a_ascii = String::New("latin1 a ascii");
Local<String> latin1_b_ascii = String::New("latin1 b ascii");
Local<String> latin1 = String::New("latin1 non-ascii \xAA",
String::kUndefinedLength,
String::LATIN1_ENCODING);
CHECK(v8::Utils::OpenHandle(*latin1_a_ascii)->IsSeqAsciiString());
CHECK(v8::Utils::OpenHandle(*latin1_b_ascii)->IsSeqAsciiString());
CHECK(v8::Utils::OpenHandle(*latin1)->IsSeqTwoByteString());
// Run GC twice to put those strings into old space for externalizing.
HEAP->CollectGarbage(i::NEW_SPACE);
HEAP->CollectGarbage(i::NEW_SPACE);
char* latin1_a_ascii_chars = i::NewArray<char>(64);
uint16_t* latin1_b_ascii_chars = i::NewArray<uint16_t>(64);
uint16_t* latin1_chars = i::NewArray<uint16_t>(64);
latin1_a_ascii->WriteLatin1(latin1_a_ascii_chars);
latin1_b_ascii->Write(latin1_b_ascii_chars);
latin1->Write(latin1_chars);
TestLatin1Resource* latin1_a_ascii_resource =
new TestLatin1Resource(latin1_a_ascii_chars, &counter);
TestResource* latin1_b_ascii_resource =
new TestResource(latin1_b_ascii_chars, &counter);
TestResource* latin1_resource =
new TestResource(latin1_chars, &counter);
CHECK(latin1_a_ascii->MakeExternal(latin1_a_ascii_resource));
CHECK(latin1_a_ascii->IsExternalAscii());
CHECK_EQ(latin1_a_ascii->GetExternalStringEncoding(),
(v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
CHECK_EQ(latin1_a_ascii_resource,
latin1_a_ascii->GetExternalStringResourceBase());
CHECK(latin1_a_ascii->Equals(String::New("latin1 a ascii")));
CHECK(latin1_b_ascii->MakeExternal(latin1_b_ascii_resource));
CHECK(latin1_b_ascii->IsExternal());
CHECK_EQ(latin1_b_ascii->GetExternalStringEncoding(),
(v8::String::UTF_16_ENCODING | v8::String::ASCII_HINT));
CHECK_EQ(latin1_b_ascii_resource,
latin1_b_ascii->GetExternalStringResourceBase());
CHECK(latin1_b_ascii->Equals(String::New("latin1 b ascii")));
CHECK(latin1->MakeExternal(latin1_resource));
CHECK(latin1->IsExternal());
CHECK_EQ(latin1->GetExternalStringEncoding(),
(v8::String::UTF_16_ENCODING | v8::String::NOT_ASCII_HINT));
CHECK_EQ(latin1_resource,
latin1->GetExternalStringResourceBase());
CHECK(latin1->Equals(String::New("latin1 non-ascii \xAA",
String::kUndefinedLength,
String::LATIN1_ENCODING)));
}
HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
CHECK_EQ(3, counter);
}
THREADED_TEST(StringConcat) {
{
v8::HandleScope scope;
HandleScope scope;
LocalContext env;
const char* one_byte_string_1 = "function a_times_t";
const char* two_byte_string_1 = "wo_plus_b(a, b) {return ";