A new instance of the utf-8 conversion changelist, this time against
bleeding_edge. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@170 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
388c1094b7
commit
1d8c4a9d94
25
include/v8.h
25
include/v8.h
@ -26,8 +26,8 @@
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/** \mainpage V8 API Reference Guide
|
||||
*
|
||||
* V8 is Google's open source JavaScript engine.
|
||||
*
|
||||
* V8 is Google's open source JavaScript engine.
|
||||
*
|
||||
* This set of documents provides reference material generated from the
|
||||
* V8 header file, include/v8.h.
|
||||
@ -485,7 +485,7 @@ class EXPORT Data {
|
||||
/**
|
||||
* Pre-compilation data that can be associated with a script. This
|
||||
* data can be calculated for a script in advance of actually
|
||||
* compiling it, and can bestored between compilations. When script
|
||||
* compiling it, and can be stored between compilations. When script
|
||||
* data is given to the compile method compilation will be faster.
|
||||
*/
|
||||
class EXPORT ScriptData { // NOLINT
|
||||
@ -631,7 +631,7 @@ class EXPORT Value : public Data {
|
||||
* Returns true if this value is boolean.
|
||||
*/
|
||||
bool IsBoolean();
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if this value is a number.
|
||||
*/
|
||||
@ -696,8 +696,18 @@ class EXPORT Boolean : public Primitive {
|
||||
*/
|
||||
class EXPORT String : public Primitive {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Returns the number of characters in this string.
|
||||
*/
|
||||
int Length();
|
||||
|
||||
/**
|
||||
* Returns the number of bytes in the UTF-8 encoded
|
||||
* representation of this string.
|
||||
*/
|
||||
int Utf8Length();
|
||||
|
||||
/**
|
||||
* Write the contents of the string to an external buffer.
|
||||
* If no arguments are given, expects the buffer to be large
|
||||
@ -716,9 +726,8 @@ class EXPORT String : public Primitive {
|
||||
* excluding the NULL terminator.
|
||||
*/
|
||||
int Write(uint16_t* buffer, int start = 0, int length = -1); // UTF-16
|
||||
int WriteAscii(char* buffer,
|
||||
int start = 0,
|
||||
int length = -1); // literally ascii
|
||||
int WriteAscii(char* buffer, int start = 0, int length = -1); // ASCII
|
||||
int WriteUtf8(char* buffer, int length = -1); // UTF-8
|
||||
|
||||
/**
|
||||
* Returns true if the string is external
|
||||
@ -1755,7 +1764,7 @@ class EXPORT V8 {
|
||||
static void IgnoreOutOfMemoryException();
|
||||
|
||||
/**
|
||||
* Check if V8 is dead and therefore unusable. This is the case after
|
||||
* Check if V8 is dead and therefore unusable. This is the case after
|
||||
* fatal errors such as out-of-memory situations.
|
||||
*/
|
||||
static bool IsDead();
|
||||
|
47
src/api.cc
47
src/api.cc
@ -1925,6 +1925,53 @@ int String::Length() {
|
||||
}
|
||||
|
||||
|
||||
int String::Utf8Length() {
|
||||
if (IsDeadCheck("v8::String::Utf8Length()")) return 0;
|
||||
return Utils::OpenHandle(this)->Utf8Length();
|
||||
}
|
||||
|
||||
|
||||
int String::WriteUtf8(char* buffer, int capacity) {
|
||||
if (IsDeadCheck("v8::String::WriteUtf8()")) return 0;
|
||||
LOG_API("String::WriteUtf8");
|
||||
i::Handle<i::String> str = Utils::OpenHandle(this);
|
||||
write_input_buffer.Reset(0, *str);
|
||||
int len = str->length();
|
||||
// Encode the first K - 3 bytes directly into the buffer since we
|
||||
// know there's room for them. If no capacity is given we copy all
|
||||
// of them here.
|
||||
int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
|
||||
int i;
|
||||
int pos = 0;
|
||||
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
|
||||
i::uc32 c = write_input_buffer.GetNext();
|
||||
int written = unibrow::Utf8::Encode(buffer + pos, c);
|
||||
pos += written;
|
||||
}
|
||||
if (i < len) {
|
||||
// For the last characters we need to check the length for each one
|
||||
// because they may be longer than the remaining space in the
|
||||
// buffer.
|
||||
char intermediate[unibrow::Utf8::kMaxEncodedSize];
|
||||
for (; i < len && pos < capacity; i++) {
|
||||
i::uc32 c = write_input_buffer.GetNext();
|
||||
int written = unibrow::Utf8::Encode(intermediate, c);
|
||||
if (pos + written <= capacity) {
|
||||
for (int j = 0; j < written; j++)
|
||||
buffer[pos + j] = intermediate[j];
|
||||
pos += written;
|
||||
} else {
|
||||
// We've reached the end of the buffer
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i == len && (capacity == -1 || pos < capacity))
|
||||
buffer[pos++] = '\0';
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
int String::WriteAscii(char* buffer, int start, int length) {
|
||||
if (IsDeadCheck("v8::String::WriteAscii()")) return 0;
|
||||
LOG_API("String::WriteAscii");
|
||||
|
@ -2915,6 +2915,22 @@ bool String::LooksValid() {
|
||||
}
|
||||
|
||||
|
||||
int String::Utf8Length() {
|
||||
if (is_ascii()) return length();
|
||||
// Attempt to flatten before accessing the string. It probably
|
||||
// doesn't make Utf8Length faster, but it is very likely that
|
||||
// the string will be accessed later (for example by WriteUtf8)
|
||||
// so it's still a good idea.
|
||||
TryFlatten();
|
||||
Access<StringInputBuffer> buffer(&string_input_buffer);
|
||||
buffer->Reset(0, this);
|
||||
int result = 0;
|
||||
while (buffer->has_more())
|
||||
result += unibrow::Utf8::Length(buffer->GetNext());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
SmartPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
|
||||
RobustnessFlag robust_flag,
|
||||
int offset,
|
||||
|
@ -2842,6 +2842,8 @@ class String: public HeapObject {
|
||||
RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
|
||||
int* length_output = 0);
|
||||
|
||||
int Utf8Length();
|
||||
|
||||
// Return a 16 bit Unicode representation of the string.
|
||||
// The string should be nearly flat, otherwise the performance of
|
||||
// of this method may be very bad. Setting robustness_flag to
|
||||
|
@ -333,3 +333,43 @@ TEST(DeepAscii) {
|
||||
TraverseFirst(flat_string, string, DEEP_ASCII_DEPTH);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(Utf8Conversion) {
|
||||
// Smoke test for converting strings to utf-8.
|
||||
InitializeVM();
|
||||
v8::HandleScope handle_scope;
|
||||
// A simple ascii string
|
||||
const char* ascii_string = "abcdef12345";
|
||||
int len = v8::String::New(ascii_string, strlen(ascii_string))->Utf8Length();
|
||||
CHECK_EQ(strlen(ascii_string), len);
|
||||
// A mixed ascii and non-ascii string
|
||||
// U+02E4 -> CB A4
|
||||
// U+0064 -> 64
|
||||
// U+12E4 -> E1 8B A4
|
||||
// U+0030 -> 30
|
||||
// U+3045 -> E3 81 85
|
||||
const uint16_t mixed_string[] = {0x02E4, 0x0064, 0x12E4, 0x0030, 0x3045};
|
||||
// The characters we expect to be output
|
||||
const char as_utf8[11] = {0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4, 0x30,
|
||||
0xE3, 0x81, 0x85, 0x00};
|
||||
// The number of bytes expected to be written for each length
|
||||
const int lengths[12] = {0, 0, 2, 3, 3, 3, 6, 7, 7, 7, 10, 11};
|
||||
v8::Handle<v8::String> mixed = v8::String::New(mixed_string, 5);
|
||||
CHECK_EQ(10, mixed->Utf8Length());
|
||||
// Try encoding the string with all capacities
|
||||
char buffer[11];
|
||||
for (int i = 0; i <= 11; i++) {
|
||||
// Clear the buffer before reusing it
|
||||
for (int j = 0; j < 11; j++)
|
||||
buffer[j] = -1;
|
||||
int written = mixed->WriteUtf8(buffer, i);
|
||||
CHECK_EQ(lengths[i], written);
|
||||
// Check that the contents are correct
|
||||
for (int j = 0; j < lengths[i]; j++)
|
||||
CHECK_EQ(as_utf8[j], buffer[j]);
|
||||
// Check that the rest of the buffer hasn't been touched
|
||||
for (int j = lengths[i]; j < 11; j++)
|
||||
CHECK_EQ(-1, buffer[j]);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user