[api] simplify String::WriteToUtf8.
Since we always flatten the string upfront, we don't need to implement the the UTF8 conversion as a string visitor anymore. R=petermarshall@chromium.org Bug: v8:6780, v8:8605 Change-Id: I27946551d7c3742f47ac36d5c909c19a7f2b0371 Reviewed-on: https://chromium-review.googlesource.com/c/1371828 Commit-Queue: Yang Guo <yangguo@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#58312}
This commit is contained in:
parent
e66c6ef750
commit
c30e65e15b
348
src/api.cc
348
src/api.cc
@ -5304,204 +5304,133 @@ int String::Utf8Length(Isolate* isolate) const {
|
||||
return utf8_length;
|
||||
}
|
||||
|
||||
class Utf8WriterVisitor {
|
||||
public:
|
||||
Utf8WriterVisitor(
|
||||
char* buffer,
|
||||
int capacity,
|
||||
bool skip_capacity_check,
|
||||
bool replace_invalid_utf8)
|
||||
: early_termination_(false),
|
||||
last_character_(unibrow::Utf16::kNoPreviousCharacter),
|
||||
buffer_(buffer),
|
||||
start_(buffer),
|
||||
capacity_(capacity),
|
||||
skip_capacity_check_(capacity == -1 || skip_capacity_check),
|
||||
replace_invalid_utf8_(replace_invalid_utf8),
|
||||
utf16_chars_read_(0) {
|
||||
}
|
||||
|
||||
static int WriteEndCharacter(uint16_t character,
|
||||
int last_character,
|
||||
int remaining,
|
||||
char* const buffer,
|
||||
bool replace_invalid_utf8) {
|
||||
DCHECK_GT(remaining, 0);
|
||||
// We can't use a local buffer here because Encode needs to modify
|
||||
// previous characters in the stream. We know, however, that
|
||||
// exactly one character will be advanced.
|
||||
if (unibrow::Utf16::IsSurrogatePair(last_character, character)) {
|
||||
int written = unibrow::Utf8::Encode(buffer, character, last_character,
|
||||
replace_invalid_utf8);
|
||||
DCHECK_EQ(written, 1);
|
||||
return written;
|
||||
namespace {
|
||||
// Writes the flat content of a string to a buffer. This is done in two phases.
|
||||
// The first phase calculates a pessimistic estimate (writable_length) on how
|
||||
// many code units can be safely written without exceeding the buffer capacity
|
||||
// and without leaving at a lone surrogate. The estimated number of code units
|
||||
// is then written out in one go, and the reported byte usage is used to
|
||||
// correct the estimate. This is repeated until the estimate becomes <= 0 or
|
||||
// all code units have been written out. The second phase writes out code
|
||||
// units until the buffer capacity is reached, would be exceeded by the next
|
||||
// unit, or all code units have been written out.
|
||||
template <typename Char>
|
||||
static int WriteUtf8Impl(i::Vector<const Char> string, char* write_start,
|
||||
int write_capacity, int options,
|
||||
int* utf16_chars_read_out) {
|
||||
bool write_null = !(options & v8::String::NO_NULL_TERMINATION);
|
||||
bool replace_invalid_utf8 = (options & v8::String::REPLACE_INVALID_UTF8);
|
||||
char* current_write = write_start;
|
||||
const Char* read_start = string.start();
|
||||
int read_index = 0;
|
||||
int read_length = string.length();
|
||||
int prev_char = unibrow::Utf16::kNoPreviousCharacter;
|
||||
// Do a fast loop where there is no exit capacity check.
|
||||
// Need enough space to write everything but one character.
|
||||
STATIC_ASSERT(unibrow::Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit == 3);
|
||||
static const int kMaxSizePerChar = sizeof(Char) == 1 ? 2 : 3;
|
||||
while (read_index < read_length) {
|
||||
int up_to = read_length;
|
||||
if (write_capacity != -1) {
|
||||
int remaining_capacity =
|
||||
write_capacity - static_cast<int>(current_write - write_start);
|
||||
int writable_length =
|
||||
(remaining_capacity - kMaxSizePerChar) / kMaxSizePerChar;
|
||||
// Need to drop into slow loop.
|
||||
if (writable_length <= 0) break;
|
||||
up_to = std::min(up_to, read_index + writable_length);
|
||||
}
|
||||
// Use a scratch buffer to check the required characters.
|
||||
char temp_buffer[unibrow::Utf8::kMaxEncodedSize];
|
||||
// Can't encode using last_character as gcc has array bounds issues.
|
||||
int written = unibrow::Utf8::Encode(temp_buffer, character,
|
||||
// Write the characters to the stream.
|
||||
if (sizeof(Char) == 1) {
|
||||
// Simply memcpy if we only have ASCII characters.
|
||||
uint8_t char_mask = 0;
|
||||
for (int i = read_index; i < up_to; i++) char_mask |= read_start[i];
|
||||
if ((char_mask & 0x80) == 0) {
|
||||
int copy_length = up_to - read_index;
|
||||
memcpy(current_write, read_start + read_index, copy_length);
|
||||
current_write += copy_length;
|
||||
read_index = up_to;
|
||||
} else {
|
||||
for (; read_index < up_to; read_index++) {
|
||||
current_write += unibrow::Utf8::EncodeOneByte(
|
||||
current_write, static_cast<uint8_t>(read_start[read_index]));
|
||||
DCHECK(write_capacity == -1 ||
|
||||
(current_write - write_start) <= write_capacity);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (; read_index < up_to; read_index++) {
|
||||
uint16_t character = read_start[read_index];
|
||||
current_write += unibrow::Utf8::Encode(current_write, character,
|
||||
prev_char, replace_invalid_utf8);
|
||||
prev_char = character;
|
||||
DCHECK(write_capacity == -1 ||
|
||||
(current_write - write_start) <= write_capacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (read_index < read_length) {
|
||||
DCHECK_NE(-1, write_capacity);
|
||||
// Aborted due to limited capacity. Check capacity on each iteration.
|
||||
int remaining_capacity =
|
||||
write_capacity - static_cast<int>(current_write - write_start);
|
||||
DCHECK_GE(remaining_capacity, 0);
|
||||
for (; read_index < read_length && remaining_capacity > 0; read_index++) {
|
||||
uint32_t character = read_start[read_index];
|
||||
int written = 0;
|
||||
// We can't use a local buffer here because Encode needs to modify
|
||||
// previous characters in the stream. We know, however, that
|
||||
// exactly one character will be advanced.
|
||||
if (unibrow::Utf16::IsSurrogatePair(prev_char, character)) {
|
||||
written = unibrow::Utf8::Encode(current_write, character, prev_char,
|
||||
replace_invalid_utf8);
|
||||
DCHECK_EQ(written, 1);
|
||||
} else {
|
||||
// Use a scratch buffer to check the required characters.
|
||||
char temp_buffer[unibrow::Utf8::kMaxEncodedSize];
|
||||
// Encoding a surrogate pair to Utf8 always takes 4 bytes.
|
||||
static const int kSurrogatePairEncodedSize =
|
||||
static_cast<int>(unibrow::Utf8::kMaxEncodedSize);
|
||||
// For REPLACE_INVALID_UTF8, catch the case where we cut off in the
|
||||
// middle of a surrogate pair. Abort before encoding the pair instead.
|
||||
if (replace_invalid_utf8 &&
|
||||
remaining_capacity < kSurrogatePairEncodedSize &&
|
||||
unibrow::Utf16::IsLeadSurrogate(character) &&
|
||||
read_index + 1 < read_length &&
|
||||
unibrow::Utf16::IsTrailSurrogate(read_start[read_index + 1])) {
|
||||
write_null = false;
|
||||
break;
|
||||
}
|
||||
// Can't encode using prev_char as gcc has array bounds issues.
|
||||
written = unibrow::Utf8::Encode(temp_buffer, character,
|
||||
unibrow::Utf16::kNoPreviousCharacter,
|
||||
replace_invalid_utf8);
|
||||
// Won't fit.
|
||||
if (written > remaining) return 0;
|
||||
// Copy over the character from temp_buffer.
|
||||
for (int j = 0; j < written; j++) {
|
||||
buffer[j] = temp_buffer[j];
|
||||
}
|
||||
return written;
|
||||
}
|
||||
if (written > remaining_capacity) {
|
||||
// Won't fit. Abort and do not null-terminate the result.
|
||||
write_null = false;
|
||||
break;
|
||||
}
|
||||
// Copy over the character from temp_buffer.
|
||||
for (int i = 0; i < written; i++) current_write[i] = temp_buffer[i];
|
||||
}
|
||||
|
||||
// Visit writes out a group of code units (chars) of a v8::String to the
|
||||
// internal buffer_. This is done in two phases. The first phase calculates a
|
||||
// pesimistic estimate (writable_length) on how many code units can be safely
|
||||
// written without exceeding the buffer capacity and without writing the last
|
||||
// code unit (it could be a lead surrogate). The estimated number of code
|
||||
// units is then written out in one go, and the reported byte usage is used
|
||||
// to correct the estimate. This is repeated until the estimate becomes <= 0
|
||||
// or all code units have been written out. The second phase writes out code
|
||||
// units until the buffer capacity is reached, would be exceeded by the next
|
||||
// unit, or all units have been written out.
|
||||
template<typename Char>
|
||||
void Visit(const Char* chars, const int length) {
|
||||
DCHECK(!early_termination_);
|
||||
if (length == 0) return;
|
||||
// Copy state to stack.
|
||||
char* buffer = buffer_;
|
||||
int last_character = sizeof(Char) == 1
|
||||
? unibrow::Utf16::kNoPreviousCharacter
|
||||
: last_character_;
|
||||
int i = 0;
|
||||
// Do a fast loop where there is no exit capacity check.
|
||||
while (true) {
|
||||
int fast_length;
|
||||
if (skip_capacity_check_) {
|
||||
fast_length = length;
|
||||
} else {
|
||||
int remaining_capacity = capacity_ - static_cast<int>(buffer - start_);
|
||||
// Need enough space to write everything but one character.
|
||||
STATIC_ASSERT(unibrow::Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit ==
|
||||
3);
|
||||
int max_size_per_char = sizeof(Char) == 1 ? 2 : 3;
|
||||
int writable_length =
|
||||
(remaining_capacity - max_size_per_char)/max_size_per_char;
|
||||
// Need to drop into slow loop.
|
||||
if (writable_length <= 0) break;
|
||||
fast_length = i + writable_length;
|
||||
if (fast_length > length) fast_length = length;
|
||||
}
|
||||
// Write the characters to the stream.
|
||||
if (sizeof(Char) == 1) {
|
||||
for (; i < fast_length; i++) {
|
||||
buffer += unibrow::Utf8::EncodeOneByte(
|
||||
buffer, static_cast<uint8_t>(*chars++));
|
||||
DCHECK(capacity_ == -1 || (buffer - start_) <= capacity_);
|
||||
}
|
||||
} else {
|
||||
for (; i < fast_length; i++) {
|
||||
uint16_t character = *chars++;
|
||||
buffer += unibrow::Utf8::Encode(buffer, character, last_character,
|
||||
replace_invalid_utf8_);
|
||||
last_character = character;
|
||||
DCHECK(capacity_ == -1 || (buffer - start_) <= capacity_);
|
||||
}
|
||||
}
|
||||
// Array is fully written. Exit.
|
||||
if (fast_length == length) {
|
||||
// Write state back out to object.
|
||||
last_character_ = last_character;
|
||||
buffer_ = buffer;
|
||||
utf16_chars_read_ += length;
|
||||
return;
|
||||
}
|
||||
}
|
||||
DCHECK(!skip_capacity_check_);
|
||||
// Slow loop. Must check capacity on each iteration.
|
||||
int remaining_capacity = capacity_ - static_cast<int>(buffer - start_);
|
||||
DCHECK_GE(remaining_capacity, 0);
|
||||
for (; i < length && remaining_capacity > 0; i++) {
|
||||
uint16_t character = *chars++;
|
||||
// remaining_capacity is <= 3 bytes at this point, so we do not write out
|
||||
// an umatched lead surrogate.
|
||||
if (replace_invalid_utf8_ && unibrow::Utf16::IsLeadSurrogate(character)) {
|
||||
early_termination_ = true;
|
||||
break;
|
||||
}
|
||||
int written = WriteEndCharacter(character,
|
||||
last_character,
|
||||
remaining_capacity,
|
||||
buffer,
|
||||
replace_invalid_utf8_);
|
||||
if (written == 0) {
|
||||
early_termination_ = true;
|
||||
break;
|
||||
}
|
||||
buffer += written;
|
||||
current_write += written;
|
||||
remaining_capacity -= written;
|
||||
last_character = character;
|
||||
prev_char = character;
|
||||
}
|
||||
// Write state back out to object.
|
||||
last_character_ = last_character;
|
||||
buffer_ = buffer;
|
||||
utf16_chars_read_ += i;
|
||||
}
|
||||
|
||||
inline bool IsDone() {
|
||||
return early_termination_;
|
||||
}
|
||||
// Write out number of utf16 characters written to the stream.
|
||||
if (utf16_chars_read_out != nullptr) *utf16_chars_read_out = read_index;
|
||||
|
||||
inline void VisitOneByteString(const uint8_t* chars, int length) {
|
||||
Visit(chars, length);
|
||||
// Only null-terminate if there's space.
|
||||
if (write_null && (write_capacity == -1 ||
|
||||
(current_write - write_start) < write_capacity)) {
|
||||
*current_write++ = '\0';
|
||||
}
|
||||
|
||||
inline void VisitTwoByteString(const uint16_t* chars, int length) {
|
||||
Visit(chars, length);
|
||||
}
|
||||
|
||||
int CompleteWrite(bool write_null, int* utf16_chars_read_out) {
|
||||
// Write out number of utf16 characters written to the stream.
|
||||
if (utf16_chars_read_out != nullptr) {
|
||||
*utf16_chars_read_out = utf16_chars_read_;
|
||||
}
|
||||
// Only null terminate if all of the string was written and there's space.
|
||||
if (write_null &&
|
||||
!early_termination_ &&
|
||||
(capacity_ == -1 || (buffer_ - start_) < capacity_)) {
|
||||
*buffer_++ = '\0';
|
||||
}
|
||||
return static_cast<int>(buffer_ - start_);
|
||||
}
|
||||
|
||||
private:
|
||||
bool early_termination_;
|
||||
int last_character_;
|
||||
char* buffer_;
|
||||
char* const start_;
|
||||
int capacity_;
|
||||
bool const skip_capacity_check_;
|
||||
bool const replace_invalid_utf8_;
|
||||
int utf16_chars_read_;
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor);
|
||||
};
|
||||
|
||||
// TODO(yangguo): Simplify this. We can now expect the string to be flat.
|
||||
static bool RecursivelySerializeToUtf8(i::String current,
|
||||
Utf8WriterVisitor* writer,
|
||||
int recursion_budget) {
|
||||
while (!writer->IsDone()) {
|
||||
i::ConsString cons_string = i::String::VisitFlat(writer, current);
|
||||
if (cons_string.is_null()) return true; // Leaf node.
|
||||
if (recursion_budget <= 0) return false;
|
||||
// Must write the left branch first.
|
||||
i::String first = cons_string->first();
|
||||
bool success = RecursivelySerializeToUtf8(first,
|
||||
writer,
|
||||
recursion_budget - 1);
|
||||
if (!success) return false;
|
||||
// Inline tail recurse for right branch.
|
||||
current = cons_string->second();
|
||||
}
|
||||
return true;
|
||||
return static_cast<int>(current_write - write_start);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
int String::WriteUtf8(Isolate* v8_isolate, char* buffer, int capacity,
|
||||
int* nchars_ref, int options) const {
|
||||
@ -5509,43 +5438,16 @@ int String::WriteUtf8(Isolate* v8_isolate, char* buffer, int capacity,
|
||||
i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
|
||||
LOG_API(isolate, String, WriteUtf8);
|
||||
ENTER_V8_NO_SCRIPT_NO_EXCEPTION(isolate);
|
||||
str = i::String::Flatten(isolate, str); // Flatten the string for efficiency.
|
||||
const int string_length = str->length();
|
||||
bool write_null = !(options & NO_NULL_TERMINATION);
|
||||
bool replace_invalid_utf8 = (options & REPLACE_INVALID_UTF8);
|
||||
int max16BitCodeUnitSize = unibrow::Utf8::kMax16BitCodeUnitSize;
|
||||
// First check if we can just write the string without checking capacity.
|
||||
if (capacity == -1 || capacity / max16BitCodeUnitSize >= string_length) {
|
||||
Utf8WriterVisitor writer(buffer, capacity, true, replace_invalid_utf8);
|
||||
const int kMaxRecursion = 100;
|
||||
bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion);
|
||||
if (success) return writer.CompleteWrite(write_null, nchars_ref);
|
||||
} else if (capacity >= string_length) {
|
||||
// First check that the buffer is large enough.
|
||||
int utf8_bytes = Utf8Length(v8_isolate);
|
||||
if (utf8_bytes <= capacity) {
|
||||
// one-byte fast path.
|
||||
if (utf8_bytes == string_length) {
|
||||
WriteOneByte(v8_isolate, reinterpret_cast<uint8_t*>(buffer), 0,
|
||||
capacity, options);
|
||||
if (nchars_ref != nullptr) *nchars_ref = string_length;
|
||||
if (write_null && (utf8_bytes+1 <= capacity)) {
|
||||
return string_length + 1;
|
||||
}
|
||||
return string_length;
|
||||
}
|
||||
if (write_null && (utf8_bytes+1 > capacity)) {
|
||||
options |= NO_NULL_TERMINATION;
|
||||
}
|
||||
// Recurse once without a capacity limit.
|
||||
// This will get into the first branch above.
|
||||
// TODO(dcarney) Check max left rec. in Utf8Length and fall through.
|
||||
return WriteUtf8(v8_isolate, buffer, -1, nchars_ref, options);
|
||||
}
|
||||
str = i::String::Flatten(isolate, str);
|
||||
i::DisallowHeapAllocation no_gc;
|
||||
i::String::FlatContent content = str->GetFlatContent(no_gc);
|
||||
if (content.IsOneByte()) {
|
||||
return WriteUtf8Impl<uint8_t>(content.ToOneByteVector(), buffer, capacity,
|
||||
options, nchars_ref);
|
||||
} else {
|
||||
return WriteUtf8Impl<uint16_t>(content.ToUC16Vector(), buffer, capacity,
|
||||
options, nchars_ref);
|
||||
}
|
||||
Utf8WriterVisitor writer(buffer, capacity, false, replace_invalid_utf8);
|
||||
i::String::VisitFlat(&writer, *str);
|
||||
return writer.CompleteWrite(write_null, nchars_ref);
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
|
@ -8452,6 +8452,13 @@ THREADED_TEST(StringWrite) {
|
||||
int len;
|
||||
int charlen;
|
||||
|
||||
memset(utf8buf, 0x1, 1000);
|
||||
len = v8::String::Empty(isolate)->WriteUtf8(isolate, utf8buf, sizeof(utf8buf),
|
||||
&charlen);
|
||||
CHECK_EQ(1, len);
|
||||
CHECK_EQ(0, charlen);
|
||||
CHECK_EQ(0, strcmp(utf8buf, ""));
|
||||
|
||||
memset(utf8buf, 0x1, 1000);
|
||||
len = str2->WriteUtf8(isolate, utf8buf, sizeof(utf8buf), &charlen);
|
||||
CHECK_EQ(9, len);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "src/v8.h"
|
||||
|
||||
#include "src/api-inl.h"
|
||||
#include "src/base/platform/elapsed-timer.h"
|
||||
#include "src/heap/factory.h"
|
||||
#include "src/messages.h"
|
||||
#include "src/objects-inl.h"
|
||||
@ -957,6 +958,109 @@ TEST(Utf8Conversion) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Utf8ConversionPerf) {
|
||||
// Smoke test for converting strings to utf-8.
|
||||
LocalContext context;
|
||||
v8::HandleScope handle_scope(CcTest::isolate());
|
||||
v8::Local<v8::String> ascii_string =
|
||||
CompileRun("'abc'.repeat(1E6)").As<v8::String>();
|
||||
v8::Local<v8::String> one_byte_string =
|
||||
CompileRun("'\\u0255\\u0254\\u0253'.repeat(1E6)").As<v8::String>();
|
||||
v8::Local<v8::String> two_byte_string =
|
||||
CompileRun("'\\u2255\\u2254\\u2253'.repeat(1E6)").As<v8::String>();
|
||||
v8::Local<v8::String> surrogate_string =
|
||||
CompileRun("'\\u{12345}\\u2244'.repeat(1E6)").As<v8::String>();
|
||||
int size = 1E7;
|
||||
char* buffer = new char[4 * size];
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
ascii_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
ascii_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
ascii_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
|
||||
printf("ascii string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
one_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
one_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
one_byte_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
|
||||
printf("one byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
two_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
two_byte_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
two_byte_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
|
||||
printf("two byte string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
surrogate_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
surrogate_string->WriteUtf8(CcTest::isolate(), buffer, size, nullptr);
|
||||
printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
{
|
||||
v8::base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
surrogate_string->WriteUtf8(CcTest::isolate(), buffer, 4 * size, nullptr);
|
||||
printf("surrogate string %0.3f\n", timer.Elapsed().InMillisecondsF());
|
||||
timer.Stop();
|
||||
}
|
||||
delete[] buffer;
|
||||
}
|
||||
|
||||
TEST(ExternalShortStringAdd) {
|
||||
LocalContext context;
|
||||
@ -1064,6 +1168,27 @@ TEST(ExternalShortStringAdd) {
|
||||
CHECK_EQ(0, CompileRun(source)->Int32Value(context.local()).FromJust());
|
||||
}
|
||||
|
||||
TEST(ReplaceInvalidUtf8) {
|
||||
LocalContext context;
|
||||
v8::HandleScope handle_scope(CcTest::isolate());
|
||||
v8::Local<v8::String> string = CompileRun("'ab\\ud800cd'").As<v8::String>();
|
||||
char buffer[7];
|
||||
memset(buffer, 0, 7);
|
||||
int chars_written = 0;
|
||||
int size = string->WriteUtf8(CcTest::isolate(), buffer, 7, &chars_written,
|
||||
v8::String::REPLACE_INVALID_UTF8);
|
||||
CHECK_EQ(7, size);
|
||||
CHECK_EQ(5, chars_written);
|
||||
CHECK_EQ(0, memcmp("\x61\x62\xef\xbf\xbd\x63\x64", buffer, 7));
|
||||
|
||||
memset(buffer, 0, 7);
|
||||
chars_written = 0;
|
||||
size = string->WriteUtf8(CcTest::isolate(), buffer, 6, &chars_written,
|
||||
v8::String::REPLACE_INVALID_UTF8);
|
||||
CHECK_EQ(6, size);
|
||||
CHECK_EQ(4, chars_written);
|
||||
CHECK_EQ(0, memcmp("\x61\x62\xef\xbf\xbd\x63", buffer, 6));
|
||||
}
|
||||
|
||||
TEST(JSONStringifySliceMadeExternal) {
|
||||
if (!FLAG_string_slices) return;
|
||||
|
Loading…
Reference in New Issue
Block a user