Fix input and output to handle UTF16 surrogate pairs.
Review URL: https://chromiumcodereview.appspot.com/9600009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11007 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
cd91894d2f
commit
03cfc4363b
44
src/api.cc
44
src/api.cc
@ -1430,7 +1430,7 @@ void ObjectTemplate::SetInternalFieldCount(int value) {
|
||||
|
||||
|
||||
ScriptData* ScriptData::PreCompile(const char* input, int length) {
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const unsigned char*>(input), length);
|
||||
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
||||
}
|
||||
@ -1439,11 +1439,11 @@ ScriptData* ScriptData::PreCompile(const char* input, int length) {
|
||||
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
|
||||
i::Handle<i::String> str = Utils::OpenHandle(*source);
|
||||
if (str->IsExternalTwoByteString()) {
|
||||
i::ExternalTwoByteStringUC16CharacterStream stream(
|
||||
i::ExternalTwoByteStringUtf16CharacterStream stream(
|
||||
i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
|
||||
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
||||
} else {
|
||||
i::GenericStringUC16CharacterStream stream(str, 0, str->length());
|
||||
i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
|
||||
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
||||
}
|
||||
}
|
||||
@ -3690,7 +3690,7 @@ int String::Length() const {
|
||||
int String::Utf8Length() const {
|
||||
i::Handle<i::String> str = Utils::OpenHandle(this);
|
||||
if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
|
||||
return str->Utf8Length();
|
||||
return i::Utf8Length(str);
|
||||
}
|
||||
|
||||
|
||||
@ -3736,11 +3736,13 @@ int String::WriteUtf8(char* buffer,
|
||||
int i;
|
||||
int pos = 0;
|
||||
int nchars = 0;
|
||||
int previous = unibrow::Utf16::kNoPreviousCharacter;
|
||||
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
|
||||
i::uc32 c = write_input_buffer.GetNext();
|
||||
int written = unibrow::Utf8::Encode(buffer + pos, c);
|
||||
int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
||||
pos += written;
|
||||
nchars++;
|
||||
previous = c;
|
||||
}
|
||||
if (i < len) {
|
||||
// For the last characters we need to check the length for each one
|
||||
@ -3749,16 +3751,33 @@ int String::WriteUtf8(char* buffer,
|
||||
char intermediate[unibrow::Utf8::kMaxEncodedSize];
|
||||
for (; i < len && pos < capacity; i++) {
|
||||
i::uc32 c = write_input_buffer.GetNext();
|
||||
int written = unibrow::Utf8::Encode(intermediate, c);
|
||||
if (pos + written <= capacity) {
|
||||
for (int j = 0; j < written; j++)
|
||||
buffer[pos + j] = intermediate[j];
|
||||
if (unibrow::Utf16::IsTrailSurrogate(c) &&
|
||||
unibrow::Utf16::IsLeadSurrogate(previous)) {
|
||||
// We can't use the intermediate buffer here because the encoding
|
||||
// of surrogate pairs is done under assumption that you can step
|
||||
// back and fix the UTF8 stream. Luckily we only need space for one
|
||||
// more byte, so there is always space.
|
||||
ASSERT(pos < capacity);
|
||||
int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
||||
ASSERT(written == 1);
|
||||
pos += written;
|
||||
nchars++;
|
||||
} else {
|
||||
// We've reached the end of the buffer
|
||||
break;
|
||||
int written =
|
||||
unibrow::Utf8::Encode(intermediate,
|
||||
c,
|
||||
unibrow::Utf16::kNoPreviousCharacter);
|
||||
if (pos + written <= capacity) {
|
||||
for (int j = 0; j < written; j++)
|
||||
buffer[pos + j] = intermediate[j];
|
||||
pos += written;
|
||||
nchars++;
|
||||
} else {
|
||||
// We've reached the end of the buffer
|
||||
break;
|
||||
}
|
||||
}
|
||||
previous = c;
|
||||
}
|
||||
}
|
||||
if (nchars_ref != NULL) *nchars_ref = nchars;
|
||||
@ -5240,7 +5259,8 @@ String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj)
|
||||
TryCatch try_catch;
|
||||
Handle<String> str = obj->ToString();
|
||||
if (str.IsEmpty()) return;
|
||||
length_ = str->Utf8Length();
|
||||
i::Handle<i::String> i_str = Utils::OpenHandle(*str);
|
||||
length_ = i::Utf8Length(i_str);
|
||||
str_ = i::NewArray<char>(length_ + 1);
|
||||
str->WriteUtf8(str_);
|
||||
}
|
||||
|
@ -472,7 +472,7 @@ void RegExpMacroAssemblerARM::CheckNotCharacterAfterMinusAnd(
|
||||
uc16 minus,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
ASSERT(minus < String::kMaxUC16CharCode);
|
||||
ASSERT(minus < String::kMaxUtf16CodeUnit);
|
||||
__ sub(r0, current_character(), Operand(minus));
|
||||
__ and_(r0, r0, Operand(mask));
|
||||
__ cmp(r0, Operand(c));
|
||||
|
@ -372,8 +372,11 @@ bool DebuggerAgentUtil::SendMessage(const Socket* conn,
|
||||
|
||||
// Calculate the message size in UTF-8 encoding.
|
||||
int utf8_len = 0;
|
||||
int previous = unibrow::Utf16::kNoPreviousCharacter;
|
||||
for (int i = 0; i < message.length(); i++) {
|
||||
utf8_len += unibrow::Utf8::Length(message[i]);
|
||||
uint16_t character = message[i];
|
||||
utf8_len += unibrow::Utf8::Length(character, previous);
|
||||
previous = character;
|
||||
}
|
||||
|
||||
// Send the header.
|
||||
@ -388,17 +391,33 @@ bool DebuggerAgentUtil::SendMessage(const Socket* conn,
|
||||
|
||||
// Send message body as UTF-8.
|
||||
int buffer_position = 0; // Current buffer position.
|
||||
previous = unibrow::Utf16::kNoPreviousCharacter;
|
||||
for (int i = 0; i < message.length(); i++) {
|
||||
// Write next UTF-8 encoded character to buffer.
|
||||
uint16_t character = message[i];
|
||||
buffer_position +=
|
||||
unibrow::Utf8::Encode(buffer + buffer_position, message[i]);
|
||||
unibrow::Utf8::Encode(buffer + buffer_position, character, previous);
|
||||
ASSERT(buffer_position < kBufferSize);
|
||||
|
||||
// Send buffer if full or last character is encoded.
|
||||
if (kBufferSize - buffer_position < 3 || i == message.length() - 1) {
|
||||
conn->Send(buffer, buffer_position);
|
||||
buffer_position = 0;
|
||||
if (kBufferSize - buffer_position <
|
||||
unibrow::Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit ||
|
||||
i == message.length() - 1) {
|
||||
if (unibrow::Utf16::IsLeadSurrogate(character)) {
|
||||
const int kEncodedSurrogateLength =
|
||||
unibrow::Utf16::kUtf8BytesToCodeASurrogate;
|
||||
ASSERT(buffer_position >= kEncodedSurrogateLength);
|
||||
conn->Send(buffer, buffer_position - kEncodedSurrogateLength);
|
||||
for (int i = 0; i < kEncodedSurrogateLength; i++) {
|
||||
buffer[i] = buffer[buffer_position + i];
|
||||
}
|
||||
buffer_position = kEncodedSurrogateLength;
|
||||
} else {
|
||||
conn->Send(buffer, buffer_position);
|
||||
buffer_position = 0;
|
||||
}
|
||||
}
|
||||
previous = character;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -267,8 +267,9 @@ const int kBinary32ExponentShift = 23;
|
||||
// other bits set.
|
||||
const uint64_t kQuietNaNMask = static_cast<uint64_t>(0xfff) << 51;
|
||||
|
||||
// ASCII/UC16 constants
|
||||
// ASCII/UTF-16 constants
|
||||
// Code-point values in Unicode 4.0 are 21 bits wide.
|
||||
// Code units in UTF-16 are 16 bits wide.
|
||||
typedef uint16_t uc16;
|
||||
typedef int32_t uc32;
|
||||
const int kASCIISize = kCharSize;
|
||||
|
158
src/handles.cc
158
src/handles.cc
@ -800,4 +800,162 @@ Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table,
|
||||
}
|
||||
|
||||
|
||||
// This method determines the type of string involved and then gets the UTF8
|
||||
// length of the string. It doesn't flatten the string and has log(n) recursion
|
||||
// for a string of length n. If the failure flag gets set, then we have to
|
||||
// flatten the string and retry. Failures are caused by surrogate pairs in deep
|
||||
// cons strings.
|
||||
|
||||
// Single surrogate characters that are encountered in the UTF-16 character
|
||||
// sequence of the input string get counted as 3 UTF-8 bytes, because that
|
||||
// is the way that WriteUtf8 will encode them. Surrogate pairs are counted and
|
||||
// encoded as one 4-byte UTF-8 sequence.
|
||||
|
||||
// This function conceptually uses recursion on the two halves of cons strings.
|
||||
// However, in order to avoid the recursion going too deep it recurses on the
|
||||
// second string of the cons, but iterates on the first substring (by manually
|
||||
// eliminating it as a tail recursion). This means it counts the UTF-8 length
|
||||
// from the end to the start, which makes no difference to the total.
|
||||
|
||||
// Surrogate pairs are recognized even if they are split across two sides of a
|
||||
// cons, which complicates the implementation somewhat. Therefore, too deep
|
||||
// recursion cannot always be avoided. This case is detected, and the failure
|
||||
// flag is set, a signal to the caller that the string should be flattened and
|
||||
// the operation retried.
|
||||
int Utf8LengthHelper(String* input,
|
||||
int from,
|
||||
int to,
|
||||
bool followed_by_surrogate,
|
||||
int max_recursion,
|
||||
bool* failure,
|
||||
bool* starts_with_surrogate) {
|
||||
if (from == to) return 0;
|
||||
int total = 0;
|
||||
bool dummy;
|
||||
while (true) {
|
||||
if (input->IsAsciiRepresentation()) {
|
||||
*starts_with_surrogate = false;
|
||||
return total + to - from;
|
||||
}
|
||||
switch (StringShape(input).representation_tag()) {
|
||||
case kConsStringTag: {
|
||||
ConsString* str = ConsString::cast(input);
|
||||
String* first = str->first();
|
||||
String* second = str->second();
|
||||
int first_length = first->length();
|
||||
if (first_length - from > to - first_length) {
|
||||
if (first_length < to) {
|
||||
// Right hand side is shorter. No need to check the recursion depth
|
||||
// since this can only happen log(n) times.
|
||||
bool right_starts_with_surrogate = false;
|
||||
total += Utf8LengthHelper(second,
|
||||
0,
|
||||
to - first_length,
|
||||
followed_by_surrogate,
|
||||
max_recursion - 1,
|
||||
failure,
|
||||
&right_starts_with_surrogate);
|
||||
if (*failure) return 0;
|
||||
followed_by_surrogate = right_starts_with_surrogate;
|
||||
input = first;
|
||||
to = first_length;
|
||||
} else {
|
||||
// We only need the left hand side.
|
||||
input = first;
|
||||
}
|
||||
} else {
|
||||
if (first_length > from) {
|
||||
// Left hand side is shorter.
|
||||
if (first->IsAsciiRepresentation()) {
|
||||
total += first_length - from;
|
||||
*starts_with_surrogate = false;
|
||||
starts_with_surrogate = &dummy;
|
||||
input = second;
|
||||
from = 0;
|
||||
to -= first_length;
|
||||
} else if (second->IsAsciiRepresentation()) {
|
||||
followed_by_surrogate = false;
|
||||
total += to - first_length;
|
||||
input = first;
|
||||
to = first_length;
|
||||
} else if (max_recursion > 0) {
|
||||
bool right_starts_with_surrogate = false;
|
||||
// Recursing on the long one. This may fail.
|
||||
total += Utf8LengthHelper(second,
|
||||
0,
|
||||
to - first_length,
|
||||
followed_by_surrogate,
|
||||
max_recursion - 1,
|
||||
failure,
|
||||
&right_starts_with_surrogate);
|
||||
if (*failure) return 0;
|
||||
input = first;
|
||||
to = first_length;
|
||||
followed_by_surrogate = right_starts_with_surrogate;
|
||||
} else {
|
||||
*failure = true;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
// We only need the right hand side.
|
||||
input = second;
|
||||
from = 0;
|
||||
to -= first_length;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
case kExternalStringTag:
|
||||
case kSeqStringTag: {
|
||||
Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector();
|
||||
const uc16* p = vector.start();
|
||||
int previous = unibrow::Utf16::kNoPreviousCharacter;
|
||||
for (int i = from; i < to; i++) {
|
||||
uc16 c = p[i];
|
||||
total += unibrow::Utf8::Length(c, previous);
|
||||
previous = c;
|
||||
}
|
||||
if (to - from > 0) {
|
||||
if (unibrow::Utf16::IsLeadSurrogate(previous) &&
|
||||
followed_by_surrogate) {
|
||||
total -= unibrow::Utf8::kBytesSavedByCombiningSurrogates;
|
||||
}
|
||||
if (unibrow::Utf16::IsTrailSurrogate(p[from])) {
|
||||
*starts_with_surrogate = true;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
case kSlicedStringTag: {
|
||||
SlicedString* str = SlicedString::cast(input);
|
||||
int offset = str->offset();
|
||||
input = str->parent();
|
||||
from += offset;
|
||||
to += offset;
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int Utf8Length(Handle<String> str) {
|
||||
bool dummy;
|
||||
bool failure;
|
||||
int len;
|
||||
const int kRecursionBudget = 100;
|
||||
do {
|
||||
failure = false;
|
||||
len = Utf8LengthHelper(
|
||||
*str, 0, str->length(), false, kRecursionBudget, &failure, &dummy);
|
||||
if (failure) FlattenString(str);
|
||||
} while (failure);
|
||||
return len;
|
||||
}
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
@ -174,6 +174,8 @@ void FlattenString(Handle<String> str);
|
||||
// string.
|
||||
Handle<String> FlattenGetString(Handle<String> str);
|
||||
|
||||
int Utf8Length(Handle<String> str);
|
||||
|
||||
Handle<Object> SetProperty(Handle<Object> object,
|
||||
Handle<Object> key,
|
||||
Handle<Object> value,
|
||||
|
36
src/heap.cc
36
src/heap.cc
@ -4186,8 +4186,6 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
|
||||
|
||||
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
|
||||
PretenureFlag pretenure) {
|
||||
// V8 only supports characters in the Basic Multilingual Plane.
|
||||
const uc32 kMaxSupportedChar = 0xFFFF;
|
||||
// Count the number of characters in the UTF-8 string and check if
|
||||
// it is an ASCII string.
|
||||
Access<UnicodeCache::Utf8Decoder>
|
||||
@ -4195,8 +4193,12 @@ MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
|
||||
decoder->Reset(string.start(), string.length());
|
||||
int chars = 0;
|
||||
while (decoder->has_more()) {
|
||||
decoder->GetNext();
|
||||
chars++;
|
||||
uint32_t r = decoder->GetNext();
|
||||
if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
chars++;
|
||||
} else {
|
||||
chars += 2;
|
||||
}
|
||||
}
|
||||
|
||||
Object* result;
|
||||
@ -4207,10 +4209,15 @@ MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
|
||||
// Convert and copy the characters into the new object.
|
||||
String* string_result = String::cast(result);
|
||||
decoder->Reset(string.start(), string.length());
|
||||
for (int i = 0; i < chars; i++) {
|
||||
uc32 r = decoder->GetNext();
|
||||
if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; }
|
||||
string_result->Set(i, r);
|
||||
int i = 0;
|
||||
while (i < chars) {
|
||||
uint32_t r = decoder->GetNext();
|
||||
if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
|
||||
string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
|
||||
} else {
|
||||
string_result->Set(i++, r);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -4267,7 +4274,7 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
|
||||
uint32_t hash_field) {
|
||||
ASSERT(chars >= 0);
|
||||
// Ensure the chars matches the number of characters in the buffer.
|
||||
ASSERT(static_cast<unsigned>(chars) == buffer->Length());
|
||||
ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
|
||||
// Determine whether the string is ASCII.
|
||||
bool is_ascii = true;
|
||||
while (buffer->has_more()) {
|
||||
@ -4313,8 +4320,15 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
|
||||
ASSERT_EQ(size, answer->Size());
|
||||
|
||||
// Fill in the characters.
|
||||
for (int i = 0; i < chars; i++) {
|
||||
answer->Set(i, buffer->GetNext());
|
||||
int i = 0;
|
||||
while (i < chars) {
|
||||
uint32_t character = buffer->GetNext();
|
||||
if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
|
||||
answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
|
||||
} else {
|
||||
answer->Set(i++, character);
|
||||
}
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
@ -4284,7 +4284,7 @@ class HStringCharCodeAt: public HTemplateInstruction<3> {
|
||||
virtual bool DataEquals(HValue* other) { return true; }
|
||||
|
||||
virtual Range* InferRange(Zone* zone) {
|
||||
return new(zone) Range(0, String::kMaxUC16CharCode);
|
||||
return new(zone) Range(0, String::kMaxUtf16CodeUnit);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -523,7 +523,7 @@ void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
|
||||
uc16 minus,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
ASSERT(minus < String::kMaxUC16CharCode);
|
||||
ASSERT(minus < String::kMaxUtf16CodeUnit);
|
||||
__ lea(eax, Operand(current_character(), -minus));
|
||||
__ and_(eax, mask);
|
||||
__ cmp(eax, c);
|
||||
|
@ -1444,7 +1444,7 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
|
||||
if (ascii) {
|
||||
char_mask = String::kMaxAsciiCharCode;
|
||||
} else {
|
||||
char_mask = String::kMaxUC16CharCode;
|
||||
char_mask = String::kMaxUtf16CodeUnit;
|
||||
}
|
||||
uc16 exor = c1 ^ c2;
|
||||
// Check whether exor has only one bit set.
|
||||
@ -1546,7 +1546,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
||||
if (ascii) {
|
||||
max_char = String::kMaxAsciiCharCode;
|
||||
} else {
|
||||
max_char = String::kMaxUC16CharCode;
|
||||
max_char = String::kMaxUtf16CodeUnit;
|
||||
}
|
||||
|
||||
Label success;
|
||||
@ -1642,7 +1642,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
||||
macro_assembler->CheckCharacterLT(from, on_failure);
|
||||
}
|
||||
}
|
||||
if (to != String::kMaxUC16CharCode) {
|
||||
if (to != String::kMaxUtf16CodeUnit) {
|
||||
if (cc->is_negated()) {
|
||||
macro_assembler->CheckCharacterLT(to + 1, on_failure);
|
||||
} else {
|
||||
@ -1835,7 +1835,7 @@ bool QuickCheckDetails::Rationalize(bool asc) {
|
||||
if (asc) {
|
||||
char_mask = String::kMaxAsciiCharCode;
|
||||
} else {
|
||||
char_mask = String::kMaxUC16CharCode;
|
||||
char_mask = String::kMaxUtf16CodeUnit;
|
||||
}
|
||||
mask_ = 0;
|
||||
value_ = 0;
|
||||
@ -1887,7 +1887,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
|
||||
if (compiler->ascii()) {
|
||||
char_mask = String::kMaxAsciiCharCode;
|
||||
} else {
|
||||
char_mask = String::kMaxUC16CharCode;
|
||||
char_mask = String::kMaxUtf16CodeUnit;
|
||||
}
|
||||
if ((mask & char_mask) == char_mask) need_mask = false;
|
||||
mask &= char_mask;
|
||||
@ -1939,7 +1939,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
if (compiler->ascii()) {
|
||||
char_mask = String::kMaxAsciiCharCode;
|
||||
} else {
|
||||
char_mask = String::kMaxUC16CharCode;
|
||||
char_mask = String::kMaxUtf16CodeUnit;
|
||||
}
|
||||
for (int k = 0; k < elms_->length(); k++) {
|
||||
TextElement elm = elms_->at(k);
|
||||
@ -4079,7 +4079,7 @@ static void AddClassNegated(const uc16 *elmv,
|
||||
int elmc,
|
||||
ZoneList<CharacterRange>* ranges) {
|
||||
ASSERT(elmv[0] != 0x0000);
|
||||
ASSERT(elmv[elmc-1] != String::kMaxUC16CharCode);
|
||||
ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
|
||||
uc16 last = 0x0000;
|
||||
for (int i = 0; i < elmc; i += 2) {
|
||||
ASSERT(last <= elmv[i] - 1);
|
||||
@ -4087,7 +4087,7 @@ static void AddClassNegated(const uc16 *elmv,
|
||||
ranges->Add(CharacterRange(last, elmv[i] - 1));
|
||||
last = elmv[i + 1] + 1;
|
||||
}
|
||||
ranges->Add(CharacterRange(last, String::kMaxUC16CharCode));
|
||||
ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit));
|
||||
}
|
||||
|
||||
|
||||
@ -4633,8 +4633,8 @@ void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
|
||||
from = range.to();
|
||||
i++;
|
||||
}
|
||||
if (from < String::kMaxUC16CharCode) {
|
||||
negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode));
|
||||
if (from < String::kMaxUtf16CodeUnit) {
|
||||
negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit));
|
||||
}
|
||||
}
|
||||
|
||||
@ -4797,7 +4797,7 @@ void DispatchTable::AddRange(CharacterRange full_range, int value) {
|
||||
entry->AddValue(value);
|
||||
// Bail out if the last interval ended at 0xFFFF since otherwise
|
||||
// adding 1 will wrap around to 0.
|
||||
if (entry->to() == String::kMaxUC16CharCode)
|
||||
if (entry->to() == String::kMaxUtf16CodeUnit)
|
||||
break;
|
||||
ASSERT(entry->to() + 1 > current.from());
|
||||
current.set_from(entry->to() + 1);
|
||||
@ -5117,7 +5117,7 @@ int TextNode::ComputeFirstCharacterSet(int budget) {
|
||||
int new_length = length + 1;
|
||||
if (length > 0) {
|
||||
if (ranges->at(0).from() == 0) new_length--;
|
||||
if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) {
|
||||
if (ranges->at(length - 1).to() == String::kMaxUtf16CodeUnit) {
|
||||
new_length--;
|
||||
}
|
||||
}
|
||||
@ -5207,14 +5207,14 @@ void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
|
||||
if (last < range.from())
|
||||
AddRange(CharacterRange(last, range.from() - 1));
|
||||
if (range.to() >= last) {
|
||||
if (range.to() == String::kMaxUC16CharCode) {
|
||||
if (range.to() == String::kMaxUtf16CodeUnit) {
|
||||
return;
|
||||
} else {
|
||||
last = range.to() + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
AddRange(CharacterRange(last, String::kMaxUC16CharCode));
|
||||
AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));
|
||||
}
|
||||
|
||||
|
||||
|
16
src/log.cc
16
src/log.cc
@ -461,18 +461,20 @@ class Logger::NameBuffer {
|
||||
utf8_pos_ += utf8_length;
|
||||
return;
|
||||
}
|
||||
int uc16_length = Min(str->length(), kUc16BufferSize);
|
||||
String::WriteToFlat(str, uc16_buffer_, 0, uc16_length);
|
||||
int uc16_length = Min(str->length(), kUtf16BufferSize);
|
||||
String::WriteToFlat(str, utf16_buffer, 0, uc16_length);
|
||||
int previous = unibrow::Utf16::kNoPreviousCharacter;
|
||||
for (int i = 0; i < uc16_length && utf8_pos_ < kUtf8BufferSize; ++i) {
|
||||
uc16 c = uc16_buffer_[i];
|
||||
uc16 c = utf16_buffer[i];
|
||||
if (c <= String::kMaxAsciiCharCodeU) {
|
||||
utf8_buffer_[utf8_pos_++] = static_cast<char>(c);
|
||||
} else {
|
||||
int char_length = unibrow::Utf8::Length(c);
|
||||
int char_length = unibrow::Utf8::Length(c, previous);
|
||||
if (utf8_pos_ + char_length > kUtf8BufferSize) break;
|
||||
unibrow::Utf8::Encode(utf8_buffer_ + utf8_pos_, c);
|
||||
unibrow::Utf8::Encode(utf8_buffer_ + utf8_pos_, c, previous);
|
||||
utf8_pos_ += char_length;
|
||||
}
|
||||
previous = c;
|
||||
}
|
||||
}
|
||||
|
||||
@ -504,11 +506,11 @@ class Logger::NameBuffer {
|
||||
|
||||
private:
|
||||
static const int kUtf8BufferSize = 512;
|
||||
static const int kUc16BufferSize = 128;
|
||||
static const int kUtf16BufferSize = 128;
|
||||
|
||||
int utf8_pos_;
|
||||
char utf8_buffer_[kUtf8BufferSize];
|
||||
uc16 uc16_buffer_[kUc16BufferSize];
|
||||
uc16 utf16_buffer[kUtf16BufferSize];
|
||||
};
|
||||
|
||||
|
||||
|
@ -4463,7 +4463,11 @@ bool StringHasher::has_trivial_hash() {
|
||||
}
|
||||
|
||||
|
||||
void StringHasher::AddCharacter(uc32 c) {
|
||||
void StringHasher::AddCharacter(uint32_t c) {
|
||||
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
AddSurrogatePair(c); // Not inlined.
|
||||
return;
|
||||
}
|
||||
// Use the Jenkins one-at-a-time hash function to update the hash
|
||||
// for the given character.
|
||||
raw_running_hash_ += c;
|
||||
@ -4492,8 +4496,12 @@ void StringHasher::AddCharacter(uc32 c) {
|
||||
}
|
||||
|
||||
|
||||
void StringHasher::AddCharacterNoIndex(uc32 c) {
|
||||
void StringHasher::AddCharacterNoIndex(uint32_t c) {
|
||||
ASSERT(!is_array_index());
|
||||
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
AddSurrogatePairNoIndex(c); // Not inlined.
|
||||
return;
|
||||
}
|
||||
raw_running_hash_ += c;
|
||||
raw_running_hash_ += (raw_running_hash_ << 10);
|
||||
raw_running_hash_ ^= (raw_running_hash_ >> 6);
|
||||
|
109
src/objects.cc
109
src/objects.cc
@ -6051,9 +6051,11 @@ SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
|
||||
buffer->Reset(offset, this);
|
||||
int character_position = offset;
|
||||
int utf8_bytes = 0;
|
||||
int last = unibrow::Utf16::kNoPreviousCharacter;
|
||||
while (buffer->has_more() && character_position++ < offset + length) {
|
||||
uint16_t character = buffer->GetNext();
|
||||
utf8_bytes += unibrow::Utf8::Length(character);
|
||||
utf8_bytes += unibrow::Utf8::Length(character, last);
|
||||
last = character;
|
||||
}
|
||||
|
||||
if (length_return) {
|
||||
@ -6067,13 +6069,15 @@ SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
|
||||
buffer->Seek(offset);
|
||||
character_position = offset;
|
||||
int utf8_byte_position = 0;
|
||||
last = unibrow::Utf16::kNoPreviousCharacter;
|
||||
while (buffer->has_more() && character_position++ < offset + length) {
|
||||
uint16_t character = buffer->GetNext();
|
||||
if (allow_nulls == DISALLOW_NULLS && character == 0) {
|
||||
character = ' ';
|
||||
}
|
||||
utf8_byte_position +=
|
||||
unibrow::Utf8::Encode(result + utf8_byte_position, character);
|
||||
unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
|
||||
last = character;
|
||||
}
|
||||
result[utf8_byte_position] = 0;
|
||||
return SmartArrayPointer<char>(result);
|
||||
@ -6387,73 +6391,6 @@ const unibrow::byte* String::ReadBlock(String* input,
|
||||
}
|
||||
|
||||
|
||||
// This method determines the type of string involved and then gets the UTF8
|
||||
// length of the string. It doesn't flatten the string and has log(n) recursion
|
||||
// for a string of length n.
|
||||
int String::Utf8Length(String* input, int from, int to) {
|
||||
if (from == to) return 0;
|
||||
int total = 0;
|
||||
while (true) {
|
||||
if (input->IsAsciiRepresentation()) return total + to - from;
|
||||
switch (StringShape(input).representation_tag()) {
|
||||
case kConsStringTag: {
|
||||
ConsString* str = ConsString::cast(input);
|
||||
String* first = str->first();
|
||||
String* second = str->second();
|
||||
int first_length = first->length();
|
||||
if (first_length - from < to - first_length) {
|
||||
if (first_length > from) {
|
||||
// Left hand side is shorter.
|
||||
total += Utf8Length(first, from, first_length);
|
||||
input = second;
|
||||
from = 0;
|
||||
to -= first_length;
|
||||
} else {
|
||||
// We only need the right hand side.
|
||||
input = second;
|
||||
from -= first_length;
|
||||
to -= first_length;
|
||||
}
|
||||
} else {
|
||||
if (first_length <= to) {
|
||||
// Right hand side is shorter.
|
||||
total += Utf8Length(second, 0, to - first_length);
|
||||
input = first;
|
||||
to = first_length;
|
||||
} else {
|
||||
// We only need the left hand side.
|
||||
input = first;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
case kExternalStringTag:
|
||||
case kSeqStringTag: {
|
||||
Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector();
|
||||
const uc16* p = vector.start();
|
||||
for (int i = from; i < to; i++) {
|
||||
total += unibrow::Utf8::Length(p[i]);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
case kSlicedStringTag: {
|
||||
SlicedString* str = SlicedString::cast(input);
|
||||
int offset = str->offset();
|
||||
input = str->parent();
|
||||
from += offset;
|
||||
to += offset;
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void Relocatable::PostGarbageCollectionProcessing() {
|
||||
Isolate* isolate = Isolate::Current();
|
||||
Relocatable* current = isolate->relocatable_top();
|
||||
@ -6847,8 +6784,10 @@ static inline bool CompareStringContents(IteratorA* ia, IteratorB* ib) {
|
||||
// General slow case check. We know that the ia and ib iterators
|
||||
// have the same length.
|
||||
while (ia->has_more()) {
|
||||
uc32 ca = ia->GetNext();
|
||||
uc32 cb = ib->GetNext();
|
||||
uint32_t ca = ia->GetNext();
|
||||
uint32_t cb = ib->GetNext();
|
||||
ASSERT(ca <= unibrow::Utf16::kMaxNonSurrogateCharCode);
|
||||
ASSERT(cb <= unibrow::Utf16::kMaxNonSurrogateCharCode);
|
||||
if (ca != cb)
|
||||
return false;
|
||||
}
|
||||
@ -7031,8 +6970,14 @@ bool String::IsEqualTo(Vector<const char> str) {
|
||||
decoder->Reset(str.start(), str.length());
|
||||
int i;
|
||||
for (i = 0; i < slen && decoder->has_more(); i++) {
|
||||
uc32 r = decoder->GetNext();
|
||||
if (Get(i) != r) return false;
|
||||
uint32_t r = decoder->GetNext();
|
||||
if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
if (i > slen - 1) return false;
|
||||
if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false;
|
||||
if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false;
|
||||
} else {
|
||||
if (Get(i) != r) return false;
|
||||
}
|
||||
}
|
||||
return i == slen && !decoder->has_more();
|
||||
}
|
||||
@ -7162,6 +7107,22 @@ uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) {
|
||||
}
|
||||
|
||||
|
||||
void StringHasher::AddSurrogatePair(uc32 c) {
|
||||
uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
||||
AddCharacter(lead);
|
||||
uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
||||
AddCharacter(trail);
|
||||
}
|
||||
|
||||
|
||||
void StringHasher::AddSurrogatePairNoIndex(uc32 c) {
|
||||
uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
||||
AddCharacterNoIndex(lead);
|
||||
uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
||||
AddCharacterNoIndex(trail);
|
||||
}
|
||||
|
||||
|
||||
uint32_t StringHasher::GetHashField() {
|
||||
ASSERT(is_valid());
|
||||
if (length_ <= String::kMaxHashCalcLength) {
|
||||
@ -10655,7 +10616,7 @@ class Utf8SymbolKey : public HashTableKey {
|
||||
if (hash_field_ != 0) return hash_field_ >> String::kHashShift;
|
||||
unibrow::Utf8InputBuffer<> buffer(string_.start(),
|
||||
static_cast<unsigned>(string_.length()));
|
||||
chars_ = buffer.Length();
|
||||
chars_ = buffer.Utf16Length();
|
||||
hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);
|
||||
uint32_t result = hash_field_ >> String::kHashShift;
|
||||
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
||||
|
@ -6616,12 +6616,17 @@ class StringHasher {
|
||||
inline bool has_trivial_hash();
|
||||
|
||||
// Add a character to the hash and update the array index calculation.
|
||||
inline void AddCharacter(uc32 c);
|
||||
inline void AddCharacter(uint32_t c);
|
||||
|
||||
// Adds a character to the hash but does not update the array index
|
||||
// calculation. This can only be called when it has been verified
|
||||
// that the input is not an array index.
|
||||
inline void AddCharacterNoIndex(uc32 c);
|
||||
inline void AddCharacterNoIndex(uint32_t c);
|
||||
|
||||
// Add a character above 0xffff as a surrogate pair. These can get into
|
||||
// the hasher through the routines that take a UTF-8 string and make a symbol.
|
||||
void AddSurrogatePair(uc32 c);
|
||||
void AddSurrogatePairNoIndex(uc32 c);
|
||||
|
||||
// Returns the value to store in the hash field of a string with
|
||||
// the given length and contents.
|
||||
@ -6871,9 +6876,6 @@ class String: public HeapObject {
|
||||
RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
|
||||
int* length_output = 0);
|
||||
|
||||
inline int Utf8Length() { return Utf8Length(this, 0, length()); }
|
||||
static int Utf8Length(String* input, int from, int to);
|
||||
|
||||
// Return a 16 bit Unicode representation of the string.
|
||||
// The string should be nearly flat, otherwise the performance of
|
||||
// of this method may be very bad. Setting robustness_flag to
|
||||
@ -6939,7 +6941,7 @@ class String: public HeapObject {
|
||||
// Max ASCII char code.
|
||||
static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar;
|
||||
static const unsigned kMaxAsciiCharCodeU = unibrow::Utf8::kMaxOneByteChar;
|
||||
static const int kMaxUC16CharCode = 0xffff;
|
||||
static const int kMaxUtf16CodeUnit = 0xffff;
|
||||
|
||||
// Mask constant for checking if a string has a computed hash code
|
||||
// and if it is an array index. The least significant bit indicates
|
||||
|
@ -258,7 +258,7 @@ Handle<String> Parser::LookupSymbol(int symbol_id) {
|
||||
scanner().literal_ascii_string());
|
||||
} else {
|
||||
return isolate()->factory()->LookupTwoByteSymbol(
|
||||
scanner().literal_uc16_string());
|
||||
scanner().literal_utf16_string());
|
||||
}
|
||||
}
|
||||
return LookupCachedSymbol(symbol_id);
|
||||
@ -279,7 +279,7 @@ Handle<String> Parser::LookupCachedSymbol(int symbol_id) {
|
||||
scanner().literal_ascii_string());
|
||||
} else {
|
||||
result = isolate()->factory()->LookupTwoByteSymbol(
|
||||
scanner().literal_uc16_string());
|
||||
scanner().literal_utf16_string());
|
||||
}
|
||||
symbol_cache_.at(symbol_id) = result;
|
||||
return result;
|
||||
@ -576,12 +576,12 @@ FunctionLiteral* Parser::ParseProgram(CompilationInfo* info) {
|
||||
// Notice that the stream is destroyed at the end of the branch block.
|
||||
// The last line of the blocks can't be moved outside, even though they're
|
||||
// identical calls.
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
ExternalTwoByteStringUtf16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
|
||||
scanner_.Initialize(&stream);
|
||||
return DoParseProgram(info, source, &zone_scope);
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source, 0, source->length());
|
||||
GenericStringUtf16CharacterStream stream(source, 0, source->length());
|
||||
scanner_.Initialize(&stream);
|
||||
return DoParseProgram(info, source, &zone_scope);
|
||||
}
|
||||
@ -665,16 +665,16 @@ FunctionLiteral* Parser::ParseLazy(CompilationInfo* info) {
|
||||
// Initialize parser state.
|
||||
source->TryFlatten();
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
ExternalTwoByteStringUtf16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source),
|
||||
shared_info->start_position(),
|
||||
shared_info->end_position());
|
||||
FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
|
||||
return result;
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source,
|
||||
shared_info->start_position(),
|
||||
shared_info->end_position());
|
||||
GenericStringUtf16CharacterStream stream(source,
|
||||
shared_info->start_position(),
|
||||
shared_info->end_position());
|
||||
FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
|
||||
return result;
|
||||
}
|
||||
@ -682,7 +682,7 @@ FunctionLiteral* Parser::ParseLazy(CompilationInfo* info) {
|
||||
|
||||
|
||||
FunctionLiteral* Parser::ParseLazy(CompilationInfo* info,
|
||||
UC16CharacterStream* source,
|
||||
Utf16CharacterStream* source,
|
||||
ZoneScope* zone_scope) {
|
||||
Handle<SharedFunctionInfo> shared_info = info->shared_info();
|
||||
scanner_.Initialize(source);
|
||||
@ -4285,7 +4285,7 @@ class SingletonLogger : public ParserRecorder {
|
||||
|
||||
// Logs a symbol creation of a literal or identifier.
|
||||
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
|
||||
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
|
||||
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
|
||||
|
||||
// Logs an error message and marks the log as containing an error.
|
||||
// Further logging will be ignored, and ExtractData will return a vector
|
||||
@ -5874,7 +5874,7 @@ int ScriptDataImpl::ReadNumber(byte** source) {
|
||||
|
||||
|
||||
// Create a Scanner for the preparser to use as input, and preparse the source.
|
||||
static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
|
||||
static ScriptDataImpl* DoPreParse(Utf16CharacterStream* source,
|
||||
int flags,
|
||||
ParserRecorder* recorder) {
|
||||
Isolate* isolate = Isolate::Current();
|
||||
@ -5915,17 +5915,17 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||
PartialParserRecorder recorder;
|
||||
int source_length = source->length();
|
||||
if (source->IsExternalTwoByteString()) {
|
||||
ExternalTwoByteStringUC16CharacterStream stream(
|
||||
ExternalTwoByteStringUtf16CharacterStream stream(
|
||||
Handle<ExternalTwoByteString>::cast(source), 0, source_length);
|
||||
return DoPreParse(&stream, flags, &recorder);
|
||||
} else {
|
||||
GenericStringUC16CharacterStream stream(source, 0, source_length);
|
||||
GenericStringUtf16CharacterStream stream(source, 0, source_length);
|
||||
return DoPreParse(&stream, flags, &recorder);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source,
|
||||
ScriptDataImpl* ParserApi::PreParse(Utf16CharacterStream* source,
|
||||
v8::Extension* extension,
|
||||
int flags) {
|
||||
Handle<Script> no_script;
|
||||
|
@ -172,7 +172,7 @@ class ParserApi {
|
||||
static bool Parse(CompilationInfo* info, int flags);
|
||||
|
||||
// Generic preparser generating full preparse data.
|
||||
static ScriptDataImpl* PreParse(UC16CharacterStream* source,
|
||||
static ScriptDataImpl* PreParse(Utf16CharacterStream* source,
|
||||
v8::Extension* extension,
|
||||
int flags);
|
||||
|
||||
@ -542,7 +542,7 @@ class Parser {
|
||||
|
||||
|
||||
FunctionLiteral* ParseLazy(CompilationInfo* info,
|
||||
UC16CharacterStream* source,
|
||||
Utf16CharacterStream* source,
|
||||
ZoneScope* zone_scope);
|
||||
|
||||
Isolate* isolate() { return isolate_; }
|
||||
@ -712,7 +712,7 @@ class Parser {
|
||||
scanner().literal_ascii_string(), tenured);
|
||||
} else {
|
||||
return isolate_->factory()->NewStringFromTwoByte(
|
||||
scanner().literal_uc16_string(), tenured);
|
||||
scanner().literal_utf16_string(), tenured);
|
||||
}
|
||||
}
|
||||
|
||||
@ -722,7 +722,7 @@ class Parser {
|
||||
scanner().next_literal_ascii_string(), tenured);
|
||||
} else {
|
||||
return isolate_->factory()->NewStringFromTwoByte(
|
||||
scanner().next_literal_uc16_string(), tenured);
|
||||
scanner().next_literal_utf16_string(), tenured);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ class ParserRecorder {
|
||||
|
||||
// Logs a symbol creation of a literal or identifier.
|
||||
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
|
||||
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
|
||||
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
|
||||
|
||||
// Logs an error message and marks the log as containing an error.
|
||||
// Further logging will be ignored, and ExtractData will return a vector
|
||||
@ -149,7 +149,7 @@ class PartialParserRecorder : public FunctionLoggingParserRecorder {
|
||||
public:
|
||||
PartialParserRecorder() : FunctionLoggingParserRecorder() { }
|
||||
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
|
||||
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
|
||||
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
|
||||
virtual ~PartialParserRecorder() { }
|
||||
virtual Vector<unsigned> ExtractData();
|
||||
virtual int symbol_position() { return 0; }
|
||||
@ -171,7 +171,7 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
|
||||
LogSymbol(start, hash, true, Vector<const byte>::cast(literal));
|
||||
}
|
||||
|
||||
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) {
|
||||
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) {
|
||||
if (!is_recording_) return;
|
||||
int hash = vector_hash(literal);
|
||||
LogSymbol(start, hash, false, Vector<const byte>::cast(literal));
|
||||
|
@ -46,10 +46,10 @@ namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// UTF16Buffer based on a v8::UnicodeInputStream.
|
||||
class InputStreamUTF16Buffer : public UC16CharacterStream {
|
||||
class InputStreamUtf16Buffer : public Utf16CharacterStream {
|
||||
public:
|
||||
/* The InputStreamUTF16Buffer maintains an internal buffer
|
||||
* that is filled in chunks from the UC16CharacterStream.
|
||||
/* The InputStreamUtf16Buffer maintains an internal buffer
|
||||
* that is filled in chunks from the Utf16CharacterStream.
|
||||
* It also maintains unlimited pushback capability, but optimized
|
||||
* for small pushbacks.
|
||||
* The pushback_buffer_ pointer points to the limit of pushbacks
|
||||
@ -60,8 +60,8 @@ class InputStreamUTF16Buffer : public UC16CharacterStream {
|
||||
* new buffer. When this buffer is read to the end again, the cursor is
|
||||
* switched back to the internal buffer
|
||||
*/
|
||||
explicit InputStreamUTF16Buffer(v8::UnicodeInputStream* stream)
|
||||
: UC16CharacterStream(),
|
||||
explicit InputStreamUtf16Buffer(v8::UnicodeInputStream* stream)
|
||||
: Utf16CharacterStream(),
|
||||
stream_(stream),
|
||||
pushback_buffer_(buffer_),
|
||||
pushback_buffer_end_cache_(NULL),
|
||||
@ -70,7 +70,7 @@ class InputStreamUTF16Buffer : public UC16CharacterStream {
|
||||
buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize;
|
||||
}
|
||||
|
||||
virtual ~InputStreamUTF16Buffer() {
|
||||
virtual ~InputStreamUtf16Buffer() {
|
||||
if (pushback_buffer_backing_ != NULL) {
|
||||
DeleteArray(pushback_buffer_backing_);
|
||||
}
|
||||
@ -127,12 +127,18 @@ class InputStreamUTF16Buffer : public UC16CharacterStream {
|
||||
uc16* buffer_start = buffer_ + kPushBackSize;
|
||||
buffer_cursor_ = buffer_end_ = buffer_start;
|
||||
while ((value = stream_->Next()) >= 0) {
|
||||
if (value > static_cast<int32_t>(unibrow::Utf8::kMaxThreeByteChar)) {
|
||||
value = unibrow::Utf8::kBadChar;
|
||||
if (value >
|
||||
static_cast<int32_t>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
|
||||
buffer_start[buffer_end_++ - buffer_start] =
|
||||
unibrow::Utf16::LeadSurrogate(value);
|
||||
buffer_start[buffer_end_++ - buffer_start] =
|
||||
unibrow::Utf16::TrailSurrogate(value);
|
||||
} else {
|
||||
// buffer_end_ is a const pointer, but buffer_ is writable.
|
||||
buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value);
|
||||
}
|
||||
// buffer_end_ is a const pointer, but buffer_ is writable.
|
||||
buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value);
|
||||
if (buffer_end_ == buffer_ + kPushBackSize + kBufferSize) break;
|
||||
// Stop one before the end of the buffer in case we get a surrogate pair.
|
||||
if (buffer_end_ <= buffer_ + 1 + kPushBackSize + kBufferSize) break;
|
||||
}
|
||||
return buffer_end_ > buffer_start;
|
||||
}
|
||||
@ -179,7 +185,7 @@ UnicodeInputStream::~UnicodeInputStream() { }
|
||||
|
||||
|
||||
PreParserData Preparse(UnicodeInputStream* input, size_t max_stack) {
|
||||
internal::InputStreamUTF16Buffer buffer(input);
|
||||
internal::InputStreamUtf16Buffer buffer(input);
|
||||
uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack;
|
||||
internal::UnicodeCache unicode_cache;
|
||||
internal::Scanner scanner(&unicode_cache);
|
||||
|
@ -1214,7 +1214,7 @@ void PreParser::CheckDuplicate(DuplicateFinder* finder,
|
||||
old_type = finder->AddAsciiSymbol(scanner_->literal_ascii_string(),
|
||||
type);
|
||||
} else {
|
||||
old_type = finder->AddUC16Symbol(scanner_->literal_uc16_string(), type);
|
||||
old_type = finder->AddUtf16Symbol(scanner_->literal_utf16_string(), type);
|
||||
}
|
||||
if (HasConflict(old_type, type)) {
|
||||
if (IsDataDataConflict(old_type, type)) {
|
||||
@ -1387,7 +1387,7 @@ PreParser::Expression PreParser::ParseFunctionLiteral(bool* ok) {
|
||||
duplicate_finder.AddAsciiSymbol(scanner_->literal_ascii_string(), 1);
|
||||
} else {
|
||||
prev_value =
|
||||
duplicate_finder.AddUC16Symbol(scanner_->literal_uc16_string(), 1);
|
||||
duplicate_finder.AddUtf16Symbol(scanner_->literal_utf16_string(), 1);
|
||||
}
|
||||
|
||||
if (prev_value != 0) {
|
||||
@ -1485,7 +1485,7 @@ void PreParser::LogSymbol() {
|
||||
if (scanner_->is_literal_ascii()) {
|
||||
log_->LogAsciiSymbol(identifier_pos, scanner_->literal_ascii_string());
|
||||
} else {
|
||||
log_->LogUC16Symbol(identifier_pos, scanner_->literal_uc16_string());
|
||||
log_->LogUtf16Symbol(identifier_pos, scanner_->literal_utf16_string());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1657,7 +1657,7 @@ int DuplicateFinder::AddAsciiSymbol(i::Vector<const char> key, int value) {
|
||||
return AddSymbol(i::Vector<const byte>::cast(key), true, value);
|
||||
}
|
||||
|
||||
int DuplicateFinder::AddUC16Symbol(i::Vector<const uint16_t> key, int value) {
|
||||
int DuplicateFinder::AddUtf16Symbol(i::Vector<const uint16_t> key, int value) {
|
||||
return AddSymbol(i::Vector<const byte>::cast(key), false, value);
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ class DuplicateFinder {
|
||||
map_(&Match) { }
|
||||
|
||||
int AddAsciiSymbol(i::Vector<const char> key, int value);
|
||||
int AddUC16Symbol(i::Vector<const uint16_t> key, int value);
|
||||
int AddUtf16Symbol(i::Vector<const uint16_t> key, int value);
|
||||
// Add a a number literal by converting it (if necessary)
|
||||
// to the string that ToString(ToNumber(literal)) would generate.
|
||||
// and then adding that string with AddAsciiSymbol.
|
||||
|
@ -36,19 +36,19 @@ namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// BufferedUC16CharacterStreams
|
||||
// BufferedUtf16CharacterStreams
|
||||
|
||||
BufferedUC16CharacterStream::BufferedUC16CharacterStream()
|
||||
: UC16CharacterStream(),
|
||||
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
|
||||
: Utf16CharacterStream(),
|
||||
pushback_limit_(NULL) {
|
||||
// Initialize buffer as being empty. First read will fill the buffer.
|
||||
buffer_cursor_ = buffer_;
|
||||
buffer_end_ = buffer_;
|
||||
}
|
||||
|
||||
BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { }
|
||||
BufferedUtf16CharacterStream::~BufferedUtf16CharacterStream() { }
|
||||
|
||||
void BufferedUC16CharacterStream::PushBack(uc32 character) {
|
||||
void BufferedUtf16CharacterStream::PushBack(uc32 character) {
|
||||
if (character == kEndOfInput) {
|
||||
pos_--;
|
||||
return;
|
||||
@ -63,7 +63,7 @@ void BufferedUC16CharacterStream::PushBack(uc32 character) {
|
||||
}
|
||||
|
||||
|
||||
void BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
|
||||
void BufferedUtf16CharacterStream::SlowPushBack(uc16 character) {
|
||||
// In pushback mode, the end of the buffer contains pushback,
|
||||
// and the start of the buffer (from buffer start to pushback_limit_)
|
||||
// contains valid data that comes just after the pushback.
|
||||
@ -89,7 +89,7 @@ void BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
|
||||
}
|
||||
|
||||
|
||||
bool BufferedUC16CharacterStream::ReadBlock() {
|
||||
bool BufferedUtf16CharacterStream::ReadBlock() {
|
||||
buffer_cursor_ = buffer_;
|
||||
if (pushback_limit_ != NULL) {
|
||||
// Leave pushback mode.
|
||||
@ -106,7 +106,7 @@ bool BufferedUC16CharacterStream::ReadBlock() {
|
||||
}
|
||||
|
||||
|
||||
unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
|
||||
unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {
|
||||
// Leave pushback mode (i.e., ignore that there might be valid data
|
||||
// in the buffer before the pushback_limit_ point).
|
||||
pushback_limit_ = NULL;
|
||||
@ -114,10 +114,10 @@ unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// GenericStringUC16CharacterStream
|
||||
// GenericStringUtf16CharacterStream
|
||||
|
||||
|
||||
GenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
|
||||
GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(
|
||||
Handle<String> data,
|
||||
unsigned start_position,
|
||||
unsigned end_position)
|
||||
@ -130,10 +130,10 @@ GenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
|
||||
}
|
||||
|
||||
|
||||
GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { }
|
||||
GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { }
|
||||
|
||||
|
||||
unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned old_pos = pos_;
|
||||
pos_ = Min(pos_ + delta, length_);
|
||||
ReadBlock();
|
||||
@ -141,7 +141,7 @@ unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
}
|
||||
|
||||
|
||||
unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
|
||||
unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos,
|
||||
unsigned length) {
|
||||
if (from_pos >= length_) return 0;
|
||||
if (from_pos + length > length_) {
|
||||
@ -153,10 +153,10 @@ unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Utf8ToUC16CharacterStream
|
||||
Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
|
||||
unsigned length)
|
||||
: BufferedUC16CharacterStream(),
|
||||
// Utf8ToUtf16CharacterStream
|
||||
Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,
|
||||
unsigned length)
|
||||
: BufferedUtf16CharacterStream(),
|
||||
raw_data_(data),
|
||||
raw_data_length_(length),
|
||||
raw_data_pos_(0),
|
||||
@ -165,10 +165,10 @@ Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
|
||||
}
|
||||
|
||||
|
||||
Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { }
|
||||
Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
|
||||
|
||||
|
||||
unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
unsigned old_pos = pos_;
|
||||
unsigned target_pos = pos_ + delta;
|
||||
SetRawPosition(target_pos);
|
||||
@ -178,9 +178,9 @@ unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
|
||||
}
|
||||
|
||||
|
||||
unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
|
||||
unsigned length) {
|
||||
static const unibrow::uchar kMaxUC16Character = 0xffff;
|
||||
unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position,
|
||||
unsigned length) {
|
||||
static const unibrow::uchar kMaxUtf16Character = 0xffff;
|
||||
SetRawPosition(char_position);
|
||||
if (raw_character_position_ != char_position) {
|
||||
// char_position was not a valid position in the stream (hit the end
|
||||
@ -188,7 +188,7 @@ unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
|
||||
return 0u;
|
||||
}
|
||||
unsigned i = 0;
|
||||
while (i < length) {
|
||||
while (i < length - 1) {
|
||||
if (raw_data_pos_ == raw_data_length_) break;
|
||||
unibrow::uchar c = raw_data_[raw_data_pos_];
|
||||
if (c <= unibrow::Utf8::kMaxOneByteChar) {
|
||||
@ -197,12 +197,13 @@ unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
|
||||
c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
|
||||
raw_data_length_ - raw_data_pos_,
|
||||
&raw_data_pos_);
|
||||
// Don't allow characters outside of the BMP.
|
||||
if (c > kMaxUC16Character) {
|
||||
c = unibrow::Utf8::kBadChar;
|
||||
}
|
||||
}
|
||||
buffer_[i++] = static_cast<uc16>(c);
|
||||
if (c > kMaxUtf16Character) {
|
||||
buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
|
||||
buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
|
||||
} else {
|
||||
buffer_[i++] = static_cast<uc16>(c);
|
||||
}
|
||||
}
|
||||
raw_character_position_ = char_position + i;
|
||||
return i;
|
||||
@ -266,37 +267,52 @@ static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
|
||||
}
|
||||
|
||||
|
||||
void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
|
||||
// This can't set a raw position between two surrogate pairs, since there
|
||||
// is no position in the UTF8 stream that corresponds to that. This assumes
|
||||
// that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If
|
||||
// it is illegally coded as two 3 byte sequences then there is no problem here.
|
||||
void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {
|
||||
if (raw_character_position_ > target_position) {
|
||||
// Spool backwards in utf8 buffer.
|
||||
do {
|
||||
int old_pos = raw_data_pos_;
|
||||
Utf8CharacterBack(raw_data_, &raw_data_pos_);
|
||||
raw_character_position_--;
|
||||
ASSERT(old_pos - raw_data_pos_ <= 4);
|
||||
// Step back over both code units for surrogate pairs.
|
||||
if (old_pos - raw_data_pos_ == 4) raw_character_position_--;
|
||||
} while (raw_character_position_ > target_position);
|
||||
// No surrogate pair splitting.
|
||||
ASSERT(raw_character_position_ == target_position);
|
||||
return;
|
||||
}
|
||||
// Spool forwards in the utf8 buffer.
|
||||
while (raw_character_position_ < target_position) {
|
||||
if (raw_data_pos_ == raw_data_length_) return;
|
||||
int old_pos = raw_data_pos_;
|
||||
Utf8CharacterForward(raw_data_, &raw_data_pos_);
|
||||
raw_character_position_++;
|
||||
ASSERT(raw_data_pos_ - old_pos <= 4);
|
||||
if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
|
||||
}
|
||||
// No surrogate pair splitting.
|
||||
ASSERT(raw_character_position_ == target_position);
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// ExternalTwoByteStringUC16CharacterStream
|
||||
// ExternalTwoByteStringUtf16CharacterStream
|
||||
|
||||
ExternalTwoByteStringUC16CharacterStream::
|
||||
~ExternalTwoByteStringUC16CharacterStream() { }
|
||||
ExternalTwoByteStringUtf16CharacterStream::
|
||||
~ExternalTwoByteStringUtf16CharacterStream() { }
|
||||
|
||||
|
||||
ExternalTwoByteStringUC16CharacterStream
|
||||
::ExternalTwoByteStringUC16CharacterStream(
|
||||
ExternalTwoByteStringUtf16CharacterStream
|
||||
::ExternalTwoByteStringUtf16CharacterStream(
|
||||
Handle<ExternalTwoByteString> data,
|
||||
int start_position,
|
||||
int end_position)
|
||||
: UC16CharacterStream(),
|
||||
: Utf16CharacterStream(),
|
||||
source_(data),
|
||||
raw_data_(data->GetTwoByteData(start_position)) {
|
||||
buffer_cursor_ = raw_data_,
|
||||
|
@ -36,10 +36,10 @@ namespace internal {
|
||||
// A buffered character stream based on a random access character
|
||||
// source (ReadBlock can be called with pos_ pointing to any position,
|
||||
// even positions before the current).
|
||||
class BufferedUC16CharacterStream: public UC16CharacterStream {
|
||||
class BufferedUtf16CharacterStream: public Utf16CharacterStream {
|
||||
public:
|
||||
BufferedUC16CharacterStream();
|
||||
virtual ~BufferedUC16CharacterStream();
|
||||
BufferedUtf16CharacterStream();
|
||||
virtual ~BufferedUtf16CharacterStream();
|
||||
|
||||
virtual void PushBack(uc32 character);
|
||||
|
||||
@ -60,12 +60,12 @@ class BufferedUC16CharacterStream: public UC16CharacterStream {
|
||||
|
||||
|
||||
// Generic string stream.
|
||||
class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
|
||||
public:
|
||||
GenericStringUC16CharacterStream(Handle<String> data,
|
||||
unsigned start_position,
|
||||
unsigned end_position);
|
||||
virtual ~GenericStringUC16CharacterStream();
|
||||
GenericStringUtf16CharacterStream(Handle<String> data,
|
||||
unsigned start_position,
|
||||
unsigned end_position);
|
||||
virtual ~GenericStringUtf16CharacterStream();
|
||||
|
||||
protected:
|
||||
virtual unsigned BufferSeekForward(unsigned delta);
|
||||
@ -77,11 +77,11 @@ class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
};
|
||||
|
||||
|
||||
// UC16 stream based on a literal UTF-8 string.
|
||||
class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
// Utf16 stream based on a literal UTF-8 string.
|
||||
class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
|
||||
public:
|
||||
Utf8ToUC16CharacterStream(const byte* data, unsigned length);
|
||||
virtual ~Utf8ToUC16CharacterStream();
|
||||
Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
|
||||
virtual ~Utf8ToUtf16CharacterStream();
|
||||
|
||||
protected:
|
||||
virtual unsigned BufferSeekForward(unsigned delta);
|
||||
@ -98,12 +98,12 @@ class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
|
||||
|
||||
|
||||
// UTF16 buffer to read characters from an external string.
|
||||
class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
|
||||
class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
|
||||
public:
|
||||
ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
|
||||
int start_position,
|
||||
int end_position);
|
||||
virtual ~ExternalTwoByteStringUC16CharacterStream();
|
||||
ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
|
||||
int start_position,
|
||||
int end_position);
|
||||
virtual ~ExternalTwoByteStringUtf16CharacterStream();
|
||||
|
||||
virtual void PushBack(uc32 character) {
|
||||
ASSERT(buffer_cursor_ > raw_data_);
|
||||
|
@ -45,7 +45,7 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
|
||||
harmony_modules_(false) { }
|
||||
|
||||
|
||||
void Scanner::Initialize(UC16CharacterStream* source) {
|
||||
void Scanner::Initialize(Utf16CharacterStream* source) {
|
||||
source_ = source;
|
||||
// Need to capture identifiers in order to recognize "get" and "set"
|
||||
// in object literals.
|
||||
|
@ -73,15 +73,17 @@ inline int HexValue(uc32 c) {
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Buffered stream of characters, using an internal UC16 buffer.
|
||||
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
|
||||
// A code unit is a 16 bit value representing either a 16 bit code point
|
||||
// or one part of a surrogate pair that make a single 21 bit code point.
|
||||
|
||||
class UC16CharacterStream {
|
||||
class Utf16CharacterStream {
|
||||
public:
|
||||
UC16CharacterStream() : pos_(0) { }
|
||||
virtual ~UC16CharacterStream() { }
|
||||
Utf16CharacterStream() : pos_(0) { }
|
||||
virtual ~Utf16CharacterStream() { }
|
||||
|
||||
// Returns and advances past the next UC16 character in the input
|
||||
// stream. If there are no more characters, it returns a negative
|
||||
// Returns and advances past the next UTF-16 code unit in the input
|
||||
// stream. If there are no more code units, it returns a negative
|
||||
// value.
|
||||
inline uc32 Advance() {
|
||||
if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
|
||||
@ -90,47 +92,47 @@ class UC16CharacterStream {
|
||||
}
|
||||
// Note: currently the following increment is necessary to avoid a
|
||||
// parser problem! The scanner treats the final kEndOfInput as
|
||||
// a character with a position, and does math relative to that
|
||||
// a code unit with a position, and does math relative to that
|
||||
// position.
|
||||
pos_++;
|
||||
|
||||
return kEndOfInput;
|
||||
}
|
||||
|
||||
// Return the current position in the character stream.
|
||||
// Return the current position in the code unit stream.
|
||||
// Starts at zero.
|
||||
inline unsigned pos() const { return pos_; }
|
||||
|
||||
// Skips forward past the next character_count UC16 characters
|
||||
// Skips forward past the next code_unit_count UTF-16 code units
|
||||
// in the input, or until the end of input if that comes sooner.
|
||||
// Returns the number of characters actually skipped. If less
|
||||
// than character_count,
|
||||
inline unsigned SeekForward(unsigned character_count) {
|
||||
// Returns the number of code units actually skipped. If less
|
||||
// than code_unit_count,
|
||||
inline unsigned SeekForward(unsigned code_unit_count) {
|
||||
unsigned buffered_chars =
|
||||
static_cast<unsigned>(buffer_end_ - buffer_cursor_);
|
||||
if (character_count <= buffered_chars) {
|
||||
buffer_cursor_ += character_count;
|
||||
pos_ += character_count;
|
||||
return character_count;
|
||||
if (code_unit_count <= buffered_chars) {
|
||||
buffer_cursor_ += code_unit_count;
|
||||
pos_ += code_unit_count;
|
||||
return code_unit_count;
|
||||
}
|
||||
return SlowSeekForward(character_count);
|
||||
return SlowSeekForward(code_unit_count);
|
||||
}
|
||||
|
||||
// Pushes back the most recently read UC16 character (or negative
|
||||
// Pushes back the most recently read UTF-16 code unit (or negative
|
||||
// value if at end of input), i.e., the value returned by the most recent
|
||||
// call to Advance.
|
||||
// Must not be used right after calling SeekForward.
|
||||
virtual void PushBack(int32_t character) = 0;
|
||||
virtual void PushBack(int32_t code_unit) = 0;
|
||||
|
||||
protected:
|
||||
static const uc32 kEndOfInput = -1;
|
||||
|
||||
// Ensures that the buffer_cursor_ points to the character at
|
||||
// Ensures that the buffer_cursor_ points to the code_unit at
|
||||
// position pos_ of the input, if possible. If the position
|
||||
// is at or after the end of the input, return false. If there
|
||||
// are more characters available, return true.
|
||||
// are more code_units available, return true.
|
||||
virtual bool ReadBlock() = 0;
|
||||
virtual unsigned SlowSeekForward(unsigned character_count) = 0;
|
||||
virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
|
||||
|
||||
const uc16* buffer_cursor_;
|
||||
const uc16* buffer_end_;
|
||||
@ -178,23 +180,24 @@ class LiteralBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
INLINE(void AddChar(uc16 character)) {
|
||||
INLINE(void AddChar(uint32_t code_unit)) {
|
||||
if (position_ >= backing_store_.length()) ExpandBuffer();
|
||||
if (is_ascii_) {
|
||||
if (character < kMaxAsciiCharCodeU) {
|
||||
backing_store_[position_] = static_cast<byte>(character);
|
||||
if (code_unit < kMaxAsciiCharCodeU) {
|
||||
backing_store_[position_] = static_cast<byte>(code_unit);
|
||||
position_ += kASCIISize;
|
||||
return;
|
||||
}
|
||||
ConvertToUC16();
|
||||
ConvertToUtf16();
|
||||
}
|
||||
*reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
|
||||
ASSERT(code_unit < 0x10000u);
|
||||
*reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
|
||||
position_ += kUC16Size;
|
||||
}
|
||||
|
||||
bool is_ascii() { return is_ascii_; }
|
||||
|
||||
Vector<const uc16> uc16_literal() {
|
||||
Vector<const uc16> utf16_literal() {
|
||||
ASSERT(!is_ascii_);
|
||||
ASSERT((position_ & 0x1) == 0);
|
||||
return Vector<const uc16>(
|
||||
@ -236,13 +239,13 @@ class LiteralBuffer {
|
||||
backing_store_ = new_store;
|
||||
}
|
||||
|
||||
void ConvertToUC16() {
|
||||
void ConvertToUtf16() {
|
||||
ASSERT(is_ascii_);
|
||||
Vector<byte> new_store;
|
||||
int new_content_size = position_ * kUC16Size;
|
||||
if (new_content_size >= backing_store_.length()) {
|
||||
// Ensure room for all currently read characters as UC16 as well
|
||||
// as the character about to be stored.
|
||||
// Ensure room for all currently read code units as UC16 as well
|
||||
// as the code unit about to be stored.
|
||||
new_store = Vector<byte>::New(NewCapacity(new_content_size));
|
||||
} else {
|
||||
new_store = backing_store_;
|
||||
@ -316,7 +319,7 @@ class Scanner {
|
||||
|
||||
explicit Scanner(UnicodeCache* scanner_contants);
|
||||
|
||||
void Initialize(UC16CharacterStream* source);
|
||||
void Initialize(Utf16CharacterStream* source);
|
||||
|
||||
// Returns the next token and advances input.
|
||||
Token::Value Next();
|
||||
@ -335,9 +338,9 @@ class Scanner {
|
||||
ASSERT_NOT_NULL(current_.literal_chars);
|
||||
return current_.literal_chars->ascii_literal();
|
||||
}
|
||||
Vector<const uc16> literal_uc16_string() {
|
||||
Vector<const uc16> literal_utf16_string() {
|
||||
ASSERT_NOT_NULL(current_.literal_chars);
|
||||
return current_.literal_chars->uc16_literal();
|
||||
return current_.literal_chars->utf16_literal();
|
||||
}
|
||||
bool is_literal_ascii() {
|
||||
ASSERT_NOT_NULL(current_.literal_chars);
|
||||
@ -371,9 +374,9 @@ class Scanner {
|
||||
ASSERT_NOT_NULL(next_.literal_chars);
|
||||
return next_.literal_chars->ascii_literal();
|
||||
}
|
||||
Vector<const uc16> next_literal_uc16_string() {
|
||||
Vector<const uc16> next_literal_utf16_string() {
|
||||
ASSERT_NOT_NULL(next_.literal_chars);
|
||||
return next_.literal_chars->uc16_literal();
|
||||
return next_.literal_chars->utf16_literal();
|
||||
}
|
||||
bool is_next_literal_ascii() {
|
||||
ASSERT_NOT_NULL(next_.literal_chars);
|
||||
@ -542,8 +545,8 @@ class Scanner {
|
||||
TokenDesc current_; // desc for current token (as returned by Next())
|
||||
TokenDesc next_; // desc for next token (one token look-ahead)
|
||||
|
||||
// Input stream. Must be initialized to an UC16CharacterStream.
|
||||
UC16CharacterStream* source_;
|
||||
// Input stream. Must be initialized to an Utf16CharacterStream.
|
||||
Utf16CharacterStream* source_;
|
||||
|
||||
|
||||
// Start position of the octal literal last scanned.
|
||||
|
@ -78,7 +78,7 @@ template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
|
||||
}
|
||||
|
||||
|
||||
unsigned Utf8::Encode(char* str, uchar c) {
|
||||
unsigned Utf8::Encode(char* str, uchar c, int previous) {
|
||||
static const int kMask = ~(1 << 6);
|
||||
if (c <= kMaxOneByteChar) {
|
||||
str[0] = c;
|
||||
@ -88,6 +88,13 @@ unsigned Utf8::Encode(char* str, uchar c) {
|
||||
str[1] = 0x80 | (c & kMask);
|
||||
return 2;
|
||||
} else if (c <= kMaxThreeByteChar) {
|
||||
if (Utf16::IsTrailSurrogate(c) &&
|
||||
Utf16::IsLeadSurrogate(previous)) {
|
||||
const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
|
||||
return Encode(str - kUnmatchedSize,
|
||||
Utf16::CombineSurrogatePair(previous, c),
|
||||
Utf16::kNoPreviousCharacter) - kUnmatchedSize;
|
||||
}
|
||||
str[0] = 0xE0 | (c >> 12);
|
||||
str[1] = 0x80 | ((c >> 6) & kMask);
|
||||
str[2] = 0x80 | (c & kMask);
|
||||
@ -113,12 +120,16 @@ uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) {
|
||||
return CalculateValue(bytes, length, cursor);
|
||||
}
|
||||
|
||||
unsigned Utf8::Length(uchar c) {
|
||||
unsigned Utf8::Length(uchar c, int previous) {
|
||||
if (c <= kMaxOneByteChar) {
|
||||
return 1;
|
||||
} else if (c <= kMaxTwoByteChar) {
|
||||
return 2;
|
||||
} else if (c <= kMaxThreeByteChar) {
|
||||
if (Utf16::IsTrailSurrogate(c) &&
|
||||
Utf16::IsLeadSurrogate(previous)) {
|
||||
return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;
|
||||
}
|
||||
return 3;
|
||||
} else {
|
||||
return 4;
|
||||
|
@ -276,6 +276,7 @@ uchar Utf8::CalculateValue(const byte* str,
|
||||
return kBadChar;
|
||||
}
|
||||
|
||||
|
||||
const byte* Utf8::ReadBlock(Buffer<const char*> str, byte* buffer,
|
||||
unsigned capacity, unsigned* chars_read_ptr, unsigned* offset_ptr) {
|
||||
unsigned offset = *offset_ptr;
|
||||
@ -338,6 +339,16 @@ unsigned CharacterStream::Length() {
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned CharacterStream::Utf16Length() {
|
||||
unsigned result = 0;
|
||||
while (has_more()) {
|
||||
uchar c = GetNext();
|
||||
result += c > Utf16::kMaxNonSurrogateCharCode ? 2 : 1;
|
||||
}
|
||||
Rewind();
|
||||
return result;
|
||||
}
|
||||
|
||||
void CharacterStream::Seek(unsigned position) {
|
||||
Rewind();
|
||||
for (unsigned i = 0; i < position; i++) {
|
||||
|
@ -100,7 +100,7 @@ class UnicodeData {
|
||||
static const uchar kMaxCodePoint;
|
||||
};
|
||||
|
||||
// --- U t f 8 ---
|
||||
// --- U t f 8 a n d 16 ---
|
||||
|
||||
template <typename Data>
|
||||
class Buffer {
|
||||
@ -114,10 +114,46 @@ class Buffer {
|
||||
unsigned length_;
|
||||
};
|
||||
|
||||
|
||||
class Utf16 {
|
||||
public:
|
||||
static inline bool IsLeadSurrogate(int32_t code) {
|
||||
if (code == kNoPreviousCharacter) return false;
|
||||
return (code & 0xfc00) == 0xd800;
|
||||
}
|
||||
static inline bool IsTrailSurrogate(int32_t code) {
|
||||
if (code == kNoPreviousCharacter) return false;
|
||||
return (code & 0xfc00) == 0xdc00;
|
||||
}
|
||||
|
||||
static inline int32_t CombineSurrogatePair(uchar lead, uchar trail) {
|
||||
return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff);
|
||||
}
|
||||
static const int32_t kNoPreviousCharacter = -1;
|
||||
static const uchar kMaxNonSurrogateCharCode = 0xffff;
|
||||
// Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes
|
||||
// of UTF-8 data. The special case where the unit is a surrogate
|
||||
// trail produces 1 byte net, because the encoding of the pair is
|
||||
// 4 bytes and the 3 bytes that were used to encode the lead surrogate
|
||||
// can be reclaimed.
|
||||
static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3;
|
||||
// One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes.
|
||||
// The illegality stems from the surrogate not being part of a pair.
|
||||
static const int kUtf8BytesToCodeASurrogate = 3;
|
||||
static inline uchar LeadSurrogate(int32_t char_code) {
|
||||
return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff);
|
||||
}
|
||||
static inline uchar TrailSurrogate(int32_t char_code) {
|
||||
return 0xdc00 + (char_code & 0x3ff);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Utf8 {
|
||||
public:
|
||||
static inline uchar Length(uchar chr);
|
||||
static inline unsigned Encode(char* out, uchar c);
|
||||
static inline uchar Length(uchar chr, int previous);
|
||||
static inline unsigned Encode(
|
||||
char* out, uchar c, int previous);
|
||||
static const byte* ReadBlock(Buffer<const char*> str, byte* buffer,
|
||||
unsigned capacity, unsigned* chars_read, unsigned* offset);
|
||||
static uchar CalculateValue(const byte* str,
|
||||
@ -130,6 +166,11 @@ class Utf8 {
|
||||
static const unsigned kMaxThreeByteChar = 0xffff;
|
||||
static const unsigned kMaxFourByteChar = 0x1fffff;
|
||||
|
||||
// A single surrogate is coded as a 3 byte UTF-8 sequence, but two together
|
||||
// that match are coded as a 4 byte UTF-8 sequence.
|
||||
static const unsigned kBytesSavedByCombiningSurrogates = 2;
|
||||
static const unsigned kSizeOfUnmatchedSurrogate = 3;
|
||||
|
||||
private:
|
||||
template <unsigned s> friend class Utf8InputBuffer;
|
||||
friend class Test;
|
||||
@ -147,6 +188,7 @@ class CharacterStream {
|
||||
// Note that default implementation is not efficient.
|
||||
virtual void Seek(unsigned);
|
||||
unsigned Length();
|
||||
unsigned Utf16Length();
|
||||
virtual ~CharacterStream() { }
|
||||
static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
|
||||
unsigned& offset);
|
||||
@ -156,6 +198,7 @@ class CharacterStream {
|
||||
unsigned capacity, unsigned& offset);
|
||||
static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);
|
||||
virtual void Rewind() = 0;
|
||||
|
||||
protected:
|
||||
virtual void FillBuffer() = 0;
|
||||
// The number of characters left in the current buffer
|
||||
|
@ -564,7 +564,7 @@ void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
|
||||
uc16 minus,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
ASSERT(minus < String::kMaxUC16CharCode);
|
||||
ASSERT(minus < String::kMaxUtf16CodeUnit);
|
||||
__ lea(rax, Operand(current_character(), -minus));
|
||||
__ and_(rax, Immediate(mask));
|
||||
__ cmpl(rax, Immediate(c));
|
||||
|
@ -5526,6 +5526,17 @@ static int StrNCmp16(uint16_t* a, uint16_t* b, int n) {
|
||||
}
|
||||
|
||||
|
||||
int GetUtf8Length(Handle<String> str) {
|
||||
int len = str->Utf8Length();
|
||||
if (len < 0) {
|
||||
i::Handle<i::String> istr(v8::Utils::OpenHandle(*str));
|
||||
i::FlattenString(istr);
|
||||
len = str->Utf8Length();
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
THREADED_TEST(StringWrite) {
|
||||
LocalContext context;
|
||||
v8::HandleScope scope;
|
||||
@ -5606,7 +5617,7 @@ THREADED_TEST(StringWrite) {
|
||||
CHECK_EQ(0, strncmp(utf8buf, "ab\1", 3));
|
||||
|
||||
memset(utf8buf, 0x1, sizeof(utf8buf));
|
||||
len = left_tree->Utf8Length();
|
||||
len = GetUtf8Length(left_tree);
|
||||
int utf8_expected =
|
||||
(0x80 + (0x800 - 0x80) * 2 + (0xd800 - 0x800) * 3) / kStride;
|
||||
CHECK_EQ(utf8_expected, len);
|
||||
@ -5620,7 +5631,7 @@ THREADED_TEST(StringWrite) {
|
||||
CHECK_EQ(1, utf8buf[utf8_expected]);
|
||||
|
||||
memset(utf8buf, 0x1, sizeof(utf8buf));
|
||||
len = right_tree->Utf8Length();
|
||||
len = GetUtf8Length(right_tree);
|
||||
CHECK_EQ(utf8_expected, len);
|
||||
len = right_tree->WriteUtf8(utf8buf, utf8_expected, &charlen);
|
||||
CHECK_EQ(utf8_expected, len);
|
||||
@ -5745,6 +5756,217 @@ THREADED_TEST(StringWrite) {
|
||||
}
|
||||
|
||||
|
||||
static void Utf16Helper(
|
||||
LocalContext& context,
|
||||
const char* name,
|
||||
const char* lengths_name,
|
||||
int len) {
|
||||
Local<v8::Array> a =
|
||||
Local<v8::Array>::Cast(context->Global()->Get(v8_str(name)));
|
||||
Local<v8::Array> alens =
|
||||
Local<v8::Array>::Cast(context->Global()->Get(v8_str(lengths_name)));
|
||||
for (int i = 0; i < len; i++) {
|
||||
Local<v8::String> string =
|
||||
Local<v8::String>::Cast(a->Get(i));
|
||||
Local<v8::Number> expected_len =
|
||||
Local<v8::Number>::Cast(alens->Get(i));
|
||||
int length = GetUtf8Length(string);
|
||||
CHECK_EQ(static_cast<int>(expected_len->Value()), length);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static uint16_t StringGet(Handle<String> str, int index) {
|
||||
i::Handle<i::String> istring =
|
||||
v8::Utils::OpenHandle(String::Cast(*str));
|
||||
return istring->Get(index);
|
||||
}
|
||||
|
||||
|
||||
static void WriteUtf8Helper(
|
||||
LocalContext& context,
|
||||
const char* name,
|
||||
const char* lengths_name,
|
||||
int len) {
|
||||
Local<v8::Array> b =
|
||||
Local<v8::Array>::Cast(context->Global()->Get(v8_str(name)));
|
||||
Local<v8::Array> alens =
|
||||
Local<v8::Array>::Cast(context->Global()->Get(v8_str(lengths_name)));
|
||||
char buffer[1000];
|
||||
char buffer2[1000];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Local<v8::String> string =
|
||||
Local<v8::String>::Cast(b->Get(i));
|
||||
Local<v8::Number> expected_len =
|
||||
Local<v8::Number>::Cast(alens->Get(i));
|
||||
int utf8_length = static_cast<int>(expected_len->Value());
|
||||
for (int j = utf8_length + 1; j >= 0; j--) {
|
||||
memset(reinterpret_cast<void*>(&buffer), 42, sizeof(buffer));
|
||||
memset(reinterpret_cast<void*>(&buffer2), 42, sizeof(buffer2));
|
||||
int nchars;
|
||||
int utf8_written =
|
||||
string->WriteUtf8(buffer, j, &nchars, String::NO_OPTIONS);
|
||||
int utf8_written2 =
|
||||
string->WriteUtf8(buffer2, j, &nchars, String::NO_NULL_TERMINATION);
|
||||
CHECK_GE(utf8_length + 1, utf8_written);
|
||||
CHECK_GE(utf8_length, utf8_written2);
|
||||
for (int k = 0; k < utf8_written2; k++) {
|
||||
CHECK_EQ(buffer[k], buffer2[k]);
|
||||
}
|
||||
CHECK(nchars * 3 >= utf8_written - 1);
|
||||
CHECK(nchars <= utf8_written);
|
||||
if (j == utf8_length + 1) {
|
||||
CHECK_EQ(utf8_written2, utf8_length);
|
||||
CHECK_EQ(utf8_written2 + 1, utf8_written);
|
||||
}
|
||||
CHECK_EQ(buffer[utf8_written], 42);
|
||||
if (j > utf8_length) {
|
||||
if (utf8_written != 0) CHECK_EQ(buffer[utf8_written - 1], 0);
|
||||
if (utf8_written > 1) CHECK_NE(buffer[utf8_written - 2], 42);
|
||||
Handle<String> roundtrip = v8_str(buffer);
|
||||
CHECK(roundtrip->Equals(string));
|
||||
} else {
|
||||
if (utf8_written != 0) CHECK_NE(buffer[utf8_written - 1], 42);
|
||||
}
|
||||
if (utf8_written2 != 0) CHECK_NE(buffer[utf8_written - 1], 42);
|
||||
if (nchars >= 2) {
|
||||
uint16_t trail = StringGet(string, nchars - 1);
|
||||
uint16_t lead = StringGet(string, nchars - 2);
|
||||
if (((lead & 0xfc00) == 0xd800) &&
|
||||
((trail & 0xfc00) == 0xdc00)) {
|
||||
unsigned char u1 = buffer2[utf8_written2 - 4];
|
||||
unsigned char u2 = buffer2[utf8_written2 - 3];
|
||||
unsigned char u3 = buffer2[utf8_written2 - 2];
|
||||
unsigned char u4 = buffer2[utf8_written2 - 1];
|
||||
CHECK_EQ((u1 & 0xf8), 0xf0);
|
||||
CHECK_EQ((u2 & 0xc0), 0x80);
|
||||
CHECK_EQ((u3 & 0xc0), 0x80);
|
||||
CHECK_EQ((u4 & 0xc0), 0x80);
|
||||
uint32_t c = 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff);
|
||||
CHECK_EQ((u4 & 0x3f), (c & 0x3f));
|
||||
CHECK_EQ((u3 & 0x3f), ((c >> 6) & 0x3f));
|
||||
CHECK_EQ((u2 & 0x3f), ((c >> 12) & 0x3f));
|
||||
CHECK_EQ((u1 & 0x3), c >> 18);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
THREADED_TEST(Utf16) {
|
||||
LocalContext context;
|
||||
v8::HandleScope scope;
|
||||
CompileRun(
|
||||
"var pad = '01234567890123456789';"
|
||||
"var p = [];"
|
||||
"var plens = [20, 3, 3];"
|
||||
"p.push('01234567890123456789');"
|
||||
"var lead = 0xd800;"
|
||||
"var trail = 0xdc00;"
|
||||
"p.push(String.fromCharCode(0xd800));"
|
||||
"p.push(String.fromCharCode(0xdc00));"
|
||||
"var a = [];"
|
||||
"var b = [];"
|
||||
"var alens = [];"
|
||||
"for (var i = 0; i < 3; i++) {"
|
||||
" p[1] = String.fromCharCode(lead++);"
|
||||
" for (var j = 0; j < 3; j++) {"
|
||||
" p[2] = String.fromCharCode(trail++);"
|
||||
" a.push(p[i] + p[j]);"
|
||||
" b.push(p[i] + p[j]);"
|
||||
" alens.push(plens[i] + plens[j]);"
|
||||
" }"
|
||||
"}"
|
||||
"alens[5] -= 2;" // Here the surrogate pairs match up.
|
||||
"var a2 = [];"
|
||||
"var b2 = [];"
|
||||
"var a2lens = [];"
|
||||
"for (var m = 0; m < 9; m++) {"
|
||||
" for (var n = 0; n < 9; n++) {"
|
||||
" a2.push(a[m] + a[n]);"
|
||||
" b2.push(b[m] + b[n]);"
|
||||
" var utf = alens[m] + alens[n];" // And here.
|
||||
// The 'n's that start with 0xdc.. are 6-8
|
||||
// The 'm's that end with 0xd8.. are 1, 4 and 7
|
||||
" if ((m % 3) == 1 && n >= 6) utf -= 2;"
|
||||
" a2lens.push(utf);"
|
||||
" }"
|
||||
"}");
|
||||
Utf16Helper(context, "a", "alens", 9);
|
||||
Utf16Helper(context, "a2", "a2lens", 81);
|
||||
WriteUtf8Helper(context, "b", "alens", 9);
|
||||
WriteUtf8Helper(context, "b2", "a2lens", 81);
|
||||
}
|
||||
|
||||
|
||||
static bool SameSymbol(Handle<String> s1, Handle<String> s2) {
|
||||
i::Handle<i::String> is1(v8::Utils::OpenHandle(*s1));
|
||||
i::Handle<i::String> is2(v8::Utils::OpenHandle(*s2));
|
||||
return *is1 == *is2;
|
||||
}
|
||||
|
||||
|
||||
static void SameSymbolHelper(const char* a, const char* b) {
|
||||
Handle<String> symbol1 = v8::String::NewSymbol(a);
|
||||
Handle<String> symbol2 = v8::String::NewSymbol(b);
|
||||
CHECK(SameSymbol(symbol1, symbol2));
|
||||
}
|
||||
|
||||
|
||||
THREADED_TEST(Utf16Symbol) {
|
||||
LocalContext context;
|
||||
v8::HandleScope scope;
|
||||
|
||||
Handle<String> symbol1 = v8::String::NewSymbol("abc");
|
||||
Handle<String> symbol2 = v8::String::NewSymbol("abc");
|
||||
CHECK(SameSymbol(symbol1, symbol2));
|
||||
|
||||
SameSymbolHelper("\360\220\220\205", // 4 byte encoding.
|
||||
"\355\240\201\355\260\205"); // 2 3-byte surrogates.
|
||||
SameSymbolHelper("\355\240\201\355\260\206", // 2 3-byte surrogates.
|
||||
"\360\220\220\206"); // 4 byte encoding.
|
||||
SameSymbolHelper("x\360\220\220\205", // 4 byte encoding.
|
||||
"x\355\240\201\355\260\205"); // 2 3-byte surrogates.
|
||||
SameSymbolHelper("x\355\240\201\355\260\206", // 2 3-byte surrogates.
|
||||
"x\360\220\220\206"); // 4 byte encoding.
|
||||
CompileRun(
|
||||
"var sym0 = 'benedictus';"
|
||||
"var sym0b = 'S\303\270ren';"
|
||||
"var sym1 = '\355\240\201\355\260\207';"
|
||||
"var sym2 = '\360\220\220\210';"
|
||||
"var sym3 = 'x\355\240\201\355\260\207';"
|
||||
"var sym4 = 'x\360\220\220\210';"
|
||||
"if (sym1.length != 2) throw sym1;"
|
||||
"if (sym1.charCodeAt(1) != 0xdc07) throw sym1.charCodeAt(1);"
|
||||
"if (sym2.length != 2) throw sym2;"
|
||||
"if (sym2.charCodeAt(1) != 0xdc08) throw sym2.charCodeAt(2);"
|
||||
"if (sym3.length != 3) throw sym3;"
|
||||
"if (sym3.charCodeAt(2) != 0xdc07) throw sym1.charCodeAt(2);"
|
||||
"if (sym4.length != 3) throw sym4;"
|
||||
"if (sym4.charCodeAt(2) != 0xdc08) throw sym2.charCodeAt(2);");
|
||||
Handle<String> sym0 = v8::String::NewSymbol("benedictus");
|
||||
Handle<String> sym0b = v8::String::NewSymbol("S\303\270ren");
|
||||
Handle<String> sym1 = v8::String::NewSymbol("\355\240\201\355\260\207");
|
||||
Handle<String> sym2 = v8::String::NewSymbol("\360\220\220\210");
|
||||
Handle<String> sym3 = v8::String::NewSymbol("x\355\240\201\355\260\207");
|
||||
Handle<String> sym4 = v8::String::NewSymbol("x\360\220\220\210");
|
||||
v8::Local<v8::Object> global = context->Global();
|
||||
Local<Value> s0 = global->Get(v8_str("sym0"));
|
||||
Local<Value> s0b = global->Get(v8_str("sym0b"));
|
||||
Local<Value> s1 = global->Get(v8_str("sym1"));
|
||||
Local<Value> s2 = global->Get(v8_str("sym2"));
|
||||
Local<Value> s3 = global->Get(v8_str("sym3"));
|
||||
Local<Value> s4 = global->Get(v8_str("sym4"));
|
||||
CHECK(SameSymbol(sym0, Handle<String>(String::Cast(*s0))));
|
||||
CHECK(SameSymbol(sym0b, Handle<String>(String::Cast(*s0b))));
|
||||
CHECK(SameSymbol(sym1, Handle<String>(String::Cast(*s1))));
|
||||
CHECK(SameSymbol(sym2, Handle<String>(String::Cast(*s2))));
|
||||
CHECK(SameSymbol(sym3, Handle<String>(String::Cast(*s3))));
|
||||
CHECK(SameSymbol(sym4, Handle<String>(String::Cast(*s4))));
|
||||
}
|
||||
|
||||
|
||||
THREADED_TEST(ToArrayIndex) {
|
||||
v8::HandleScope scope;
|
||||
LocalContext context;
|
||||
|
@ -63,7 +63,7 @@ TEST(ScanKeywords) {
|
||||
int length = i::StrLength(key_token.keyword);
|
||||
CHECK(static_cast<int>(sizeof(buffer)) >= length);
|
||||
{
|
||||
i::Utf8ToUC16CharacterStream stream(keyword, length);
|
||||
i::Utf8ToUtf16CharacterStream stream(keyword, length);
|
||||
i::Scanner scanner(&unicode_cache);
|
||||
// The scanner should parse Harmony keywords for this test.
|
||||
scanner.SetHarmonyScoping(true);
|
||||
@ -74,7 +74,7 @@ TEST(ScanKeywords) {
|
||||
}
|
||||
// Removing characters will make keyword matching fail.
|
||||
{
|
||||
i::Utf8ToUC16CharacterStream stream(keyword, length - 1);
|
||||
i::Utf8ToUtf16CharacterStream stream(keyword, length - 1);
|
||||
i::Scanner scanner(&unicode_cache);
|
||||
scanner.Initialize(&stream);
|
||||
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
|
||||
@ -85,7 +85,7 @@ TEST(ScanKeywords) {
|
||||
for (int j = 0; j < static_cast<int>(ARRAY_SIZE(chars_to_append)); ++j) {
|
||||
memmove(buffer, keyword, length);
|
||||
buffer[length] = chars_to_append[j];
|
||||
i::Utf8ToUC16CharacterStream stream(buffer, length + 1);
|
||||
i::Utf8ToUtf16CharacterStream stream(buffer, length + 1);
|
||||
i::Scanner scanner(&unicode_cache);
|
||||
scanner.Initialize(&stream);
|
||||
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
|
||||
@ -95,7 +95,7 @@ TEST(ScanKeywords) {
|
||||
{
|
||||
memmove(buffer, keyword, length);
|
||||
buffer[length - 1] = '_';
|
||||
i::Utf8ToUC16CharacterStream stream(buffer, length);
|
||||
i::Utf8ToUtf16CharacterStream stream(buffer, length);
|
||||
i::Scanner scanner(&unicode_cache);
|
||||
scanner.Initialize(&stream);
|
||||
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
|
||||
@ -255,7 +255,7 @@ TEST(StandAlonePreParser) {
|
||||
uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
|
||||
for (int i = 0; programs[i]; i++) {
|
||||
const char* program = programs[i];
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::CompleteParserRecorder log;
|
||||
@ -291,7 +291,7 @@ TEST(StandAlonePreParserNoNatives) {
|
||||
uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
|
||||
for (int i = 0; programs[i]; i++) {
|
||||
const char* program = programs[i];
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::CompleteParserRecorder log;
|
||||
@ -326,8 +326,9 @@ TEST(RegressChromium62639) {
|
||||
// and then used the invalid currently scanned literal. This always
|
||||
// failed in debug mode, and sometimes crashed in release mode.
|
||||
|
||||
i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(program),
|
||||
static_cast<unsigned>(strlen(program)));
|
||||
i::ScriptDataImpl* data =
|
||||
i::ParserApi::PreParse(&stream, NULL, false);
|
||||
CHECK(data->HasError());
|
||||
@ -392,7 +393,7 @@ TEST(PreParseOverflow) {
|
||||
|
||||
uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
|
||||
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(*program),
|
||||
static_cast<unsigned>(kProgramSize));
|
||||
i::CompleteParserRecorder log;
|
||||
@ -449,10 +450,10 @@ void TestCharacterStream(const char* ascii_source,
|
||||
i::Handle<i::String> uc16_string(
|
||||
FACTORY->NewExternalStringFromTwoByte(&resource));
|
||||
|
||||
i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
|
||||
i::ExternalTwoByteStringUtf16CharacterStream uc16_stream(
|
||||
i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
|
||||
i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
|
||||
i::Utf8ToUC16CharacterStream utf8_stream(
|
||||
i::GenericStringUtf16CharacterStream string_stream(ascii_string, start, end);
|
||||
i::Utf8ToUtf16CharacterStream utf8_stream(
|
||||
reinterpret_cast<const i::byte*>(ascii_source), end);
|
||||
utf8_stream.SeekForward(start);
|
||||
|
||||
@ -575,12 +576,14 @@ TEST(Utf8CharacterStream) {
|
||||
char buffer[kAllUtf8CharsSizeU];
|
||||
unsigned cursor = 0;
|
||||
for (int i = 0; i <= kMaxUC16Char; i++) {
|
||||
cursor += unibrow::Utf8::Encode(buffer + cursor, i);
|
||||
cursor += unibrow::Utf8::Encode(buffer + cursor,
|
||||
i,
|
||||
unibrow::Utf16::kNoPreviousCharacter);
|
||||
}
|
||||
ASSERT(cursor == kAllUtf8CharsSizeU);
|
||||
|
||||
i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
|
||||
kAllUtf8CharsSizeU);
|
||||
i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
|
||||
kAllUtf8CharsSizeU);
|
||||
for (int i = 0; i <= kMaxUC16Char; i++) {
|
||||
CHECK_EQU(i, stream.pos());
|
||||
int32_t c = stream.Advance();
|
||||
@ -610,7 +613,7 @@ TEST(Utf8CharacterStream) {
|
||||
|
||||
#undef CHECK_EQU
|
||||
|
||||
void TestStreamScanner(i::UC16CharacterStream* stream,
|
||||
void TestStreamScanner(i::Utf16CharacterStream* stream,
|
||||
i::Token::Value* expected_tokens,
|
||||
int skip_pos = 0, // Zero means not skipping.
|
||||
int skip_to = 0) {
|
||||
@ -633,8 +636,8 @@ TEST(StreamScanner) {
|
||||
v8::V8::Initialize();
|
||||
|
||||
const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
|
||||
i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
|
||||
static_cast<unsigned>(strlen(str1)));
|
||||
i::Utf8ToUtf16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
|
||||
static_cast<unsigned>(strlen(str1)));
|
||||
i::Token::Value expectations1[] = {
|
||||
i::Token::LBRACE,
|
||||
i::Token::IDENTIFIER,
|
||||
@ -652,8 +655,8 @@ TEST(StreamScanner) {
|
||||
TestStreamScanner(&stream1, expectations1, 0, 0);
|
||||
|
||||
const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
|
||||
i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
|
||||
static_cast<unsigned>(strlen(str2)));
|
||||
i::Utf8ToUtf16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
|
||||
static_cast<unsigned>(strlen(str2)));
|
||||
i::Token::Value expectations2[] = {
|
||||
i::Token::CASE,
|
||||
i::Token::DEFAULT,
|
||||
@ -683,7 +686,7 @@ TEST(StreamScanner) {
|
||||
for (int i = 0; i <= 4; i++) {
|
||||
expectations3[6 - i] = i::Token::ILLEGAL;
|
||||
expectations3[5 - i] = i::Token::EOS;
|
||||
i::Utf8ToUC16CharacterStream stream3(
|
||||
i::Utf8ToUtf16CharacterStream stream3(
|
||||
reinterpret_cast<const i::byte*>(str3),
|
||||
static_cast<unsigned>(strlen(str3)));
|
||||
TestStreamScanner(&stream3, expectations3, 1, 1 + i);
|
||||
@ -692,7 +695,7 @@ TEST(StreamScanner) {
|
||||
|
||||
|
||||
void TestScanRegExp(const char* re_source, const char* expected) {
|
||||
i::Utf8ToUC16CharacterStream stream(
|
||||
i::Utf8ToUtf16CharacterStream stream(
|
||||
reinterpret_cast<const i::byte*>(re_source),
|
||||
static_cast<unsigned>(strlen(re_source)));
|
||||
i::Scanner scanner(i::Isolate::Current()->unicode_cache());
|
||||
@ -748,6 +751,67 @@ TEST(RegExpScanning) {
|
||||
}
|
||||
|
||||
|
||||
static int Utf8LengthHelper(const char* s) {
|
||||
int len = strlen(s);
|
||||
int character_length = len;
|
||||
for (int i = 0; i < len; i++) {
|
||||
unsigned char c = s[i];
|
||||
int input_offset = 0;
|
||||
int output_adjust = 0;
|
||||
if (c > 0x7f) {
|
||||
if (c < 0xc0) continue;
|
||||
if (c >= 0xf0) {
|
||||
if (c >= 0xf8) {
|
||||
// 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8
|
||||
// byte.
|
||||
continue; // Handle first UTF-8 byte.
|
||||
}
|
||||
if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) {
|
||||
// This 4 byte sequence could have been coded as a 3 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
input_offset = 3;
|
||||
// 4 bytes of UTF-8 turn into 2 UTF-16 code units.
|
||||
character_length -= 2;
|
||||
} else if (c >= 0xe0) {
|
||||
if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
|
||||
// This 3 byte sequence could have been coded as a 2 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
input_offset = 2;
|
||||
// 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
|
||||
output_adjust = 2;
|
||||
} else {
|
||||
if ((c & 0x1e) == 0) {
|
||||
// This 2 byte sequence could have been coded as a 1 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
input_offset = 1;
|
||||
// 2 bytes of UTF-8 turn into 1 UTF-16 code unit.
|
||||
output_adjust = 1;
|
||||
}
|
||||
bool bad = false;
|
||||
for (int j = 1; j <= input_offset; j++) {
|
||||
if ((s[i + j] & 0xc0) != 0x80) {
|
||||
// Bad UTF-8 sequence turns the first in the sequence into kBadChar,
|
||||
// which is a single UTF-16 code unit.
|
||||
bad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bad) {
|
||||
i += input_offset;
|
||||
character_length -= output_adjust;
|
||||
}
|
||||
}
|
||||
}
|
||||
return character_length;
|
||||
}
|
||||
|
||||
|
||||
TEST(ScopePositions) {
|
||||
// Test the parser for correctly setting the start and end positions
|
||||
// of a scope. We check the scope positions of exactly one scope
|
||||
@ -835,6 +899,91 @@ TEST(ScopePositions) {
|
||||
{ " for ", "(let x in {})\n"
|
||||
" statement;", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
|
||||
// Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
|
||||
// the preparser off in terms of byte offsets.
|
||||
// 6 byte encoding.
|
||||
{ " 'foo\355\240\201\355\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// 4 byte encoding.
|
||||
{ " 'foo\360\220\220\212';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// 3 byte encoding of \u0fff.
|
||||
{ " 'foo\340\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 6 byte encoding with missing last byte.
|
||||
{ " 'foo\355\240\201\355\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 3 byte encoding of \u0fff with missing last byte.
|
||||
{ " 'foo\340\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 3 byte encoding of \u0fff with missing 2 last bytes.
|
||||
{ " 'foo\340';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
|
||||
{ " 'foo\340\203\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 3 byte encoding of \u007f should be a 2 byte encoding.
|
||||
{ " 'foo\340\201\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Unpaired lead surrogate.
|
||||
{ " 'foo\355\240\201';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Unpaired lead surrogate where following code point is a 3 byte sequence.
|
||||
{ " 'foo\355\240\201\340\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Unpaired lead surrogate where following code point is a 4 byte encoding
|
||||
// of a trail surrogate.
|
||||
{ " 'foo\355\240\201\360\215\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Unpaired trail surrogate.
|
||||
{ " 'foo\355\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// 2 byte encoding of \u00ff.
|
||||
{ " 'foo\303\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 2 byte encoding of \u00ff with missing last byte.
|
||||
{ " 'foo\303';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Broken 2 byte encoding of \u007f should be a 1 byte encoding.
|
||||
{ " 'foo\301\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Illegal 5 byte encoding.
|
||||
{ " 'foo\370\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Illegal 6 byte encoding.
|
||||
{ " 'foo\374\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Illegal 0xfe byte
|
||||
{ " 'foo\376\277\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
// Illegal 0xff byte
|
||||
{ " 'foo\377\277\277\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
{ " 'foo';\n"
|
||||
" (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
{ " 'foo';\n"
|
||||
" (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::CLASSIC_MODE },
|
||||
{ NULL, NULL, NULL, i::EVAL_SCOPE, i::CLASSIC_MODE }
|
||||
};
|
||||
|
||||
@ -848,20 +997,24 @@ TEST(ScopePositions) {
|
||||
i::FLAG_harmony_scoping = true;
|
||||
|
||||
for (int i = 0; source_data[i].outer_prefix; i++) {
|
||||
int kPrefixLen = i::StrLength(source_data[i].outer_prefix);
|
||||
int kInnerLen = i::StrLength(source_data[i].inner_source);
|
||||
int kSuffixLen = i::StrLength(source_data[i].outer_suffix);
|
||||
int kPrefixLen = Utf8LengthHelper(source_data[i].outer_prefix);
|
||||
int kInnerLen = Utf8LengthHelper(source_data[i].inner_source);
|
||||
int kSuffixLen = Utf8LengthHelper(source_data[i].outer_suffix);
|
||||
int kPrefixByteLen = i::StrLength(source_data[i].outer_prefix);
|
||||
int kInnerByteLen = i::StrLength(source_data[i].inner_source);
|
||||
int kSuffixByteLen = i::StrLength(source_data[i].outer_suffix);
|
||||
int kProgramSize = kPrefixLen + kInnerLen + kSuffixLen;
|
||||
i::Vector<char> program = i::Vector<char>::New(kProgramSize + 1);
|
||||
int length = i::OS::SNPrintF(program, "%s%s%s",
|
||||
source_data[i].outer_prefix,
|
||||
source_data[i].inner_source,
|
||||
source_data[i].outer_suffix);
|
||||
CHECK(length == kProgramSize);
|
||||
int kProgramByteSize = kPrefixByteLen + kInnerByteLen + kSuffixByteLen;
|
||||
i::Vector<char> program = i::Vector<char>::New(kProgramByteSize + 1);
|
||||
i::OS::SNPrintF(program, "%s%s%s",
|
||||
source_data[i].outer_prefix,
|
||||
source_data[i].inner_source,
|
||||
source_data[i].outer_suffix);
|
||||
|
||||
// Parse program source.
|
||||
i::Handle<i::String> source(
|
||||
FACTORY->NewStringFromAscii(i::CStrVector(program.start())));
|
||||
FACTORY->NewStringFromUtf8(i::CStrVector(program.start())));
|
||||
CHECK_EQ(source->length(), kProgramSize);
|
||||
i::Handle<i::Script> script = FACTORY->NewScript(source);
|
||||
i::Parser parser(script, i::kAllowLazy | i::EXTENDED_MODE, NULL, NULL);
|
||||
i::CompilationInfo info(script);
|
||||
@ -894,7 +1047,7 @@ void TestParserSync(i::Handle<i::String> source, int flags) {
|
||||
// Preparse the data.
|
||||
i::CompleteParserRecorder log;
|
||||
i::Scanner scanner(i::Isolate::Current()->unicode_cache());
|
||||
i::GenericStringUC16CharacterStream stream(source, 0, source->length());
|
||||
i::GenericStringUtf16CharacterStream stream(source, 0, source->length());
|
||||
scanner.SetHarmonyScoping(harmony_scoping);
|
||||
scanner.Initialize(&stream);
|
||||
v8::preparser::PreParser::PreParseResult result =
|
||||
|
Loading…
Reference in New Issue
Block a user