[string] Handle two-byte contents in String.p.toLowerCase
Previously (since f0e95769
), this toLowerCase fast-path assumed
it would only see one-byte flat contents. Unfortunately, it's
possible to have a one-byte sliced string that has a two-byte parent.
This CL ensures that String.p.toLowerCase handles such cases
correctly.
BUG=chromium:736451
Cq-Include-Trybots: master.tryserver.v8:v8_linux_noi18n_rel_ng
Change-Id: Iae056b3db5535bb5665439a5cc8282a51571a548
Reviewed-on: https://chromium-review.googlesource.com/565559
Reviewed-by: Yang Guo <yangguo@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#46574}
This commit is contained in:
parent
292e96709c
commit
3c260762da
81
src/intl.cc
81
src/intl.cc
@ -125,7 +125,7 @@ void ToUpperWithSharpS(const Vector<const Char>& src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
|
inline int FindFirstUpperOrNonAscii(String* s, int length) {
|
||||||
for (int index = 0; index < length; ++index) {
|
for (int index = 0; index < length; ++index) {
|
||||||
uint16_t ch = s->Get(index);
|
uint16_t ch = s->Get(index);
|
||||||
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
|
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
|
||||||
@ -200,35 +200,50 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// A stripped-down version of ConvertToLower that can only handle flat one-byte
|
// A stripped-down version of ConvertToLower that can only handle flat one-byte
|
||||||
// strings and does not allocate.
|
// strings and does not allocate. Note that {src} could still be, e.g., a
|
||||||
|
// one-byte sliced string with a two-byte parent string.
|
||||||
// Called from TF builtins.
|
// Called from TF builtins.
|
||||||
MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,
|
MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,
|
||||||
Isolate* isolate) {
|
Isolate* isolate) {
|
||||||
DCHECK_EQ(src->length(), dst->length());
|
DCHECK_EQ(src->length(), dst->length());
|
||||||
DCHECK(src->IsOneByteRepresentation());
|
DCHECK(src->HasOnlyOneByteChars());
|
||||||
DCHECK(src->IsFlat());
|
DCHECK(src->IsFlat());
|
||||||
DCHECK(dst->IsSeqOneByteString());
|
DCHECK(dst->IsSeqOneByteString());
|
||||||
|
|
||||||
DisallowHeapAllocation no_gc;
|
DisallowHeapAllocation no_gc;
|
||||||
|
|
||||||
const int length = src->length();
|
const int length = src->length();
|
||||||
|
String::FlatContent src_flat = src->GetFlatContent();
|
||||||
const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start();
|
|
||||||
uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars();
|
uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars();
|
||||||
|
|
||||||
bool has_changed_character = false;
|
if (src_flat.IsOneByte()) {
|
||||||
int index_to_first_unprocessed = FastAsciiConvert<true>(
|
const uint8_t* src_data = src_flat.ToOneByteVector().start();
|
||||||
reinterpret_cast<char*>(dst_data),
|
|
||||||
reinterpret_cast<const char*>(src_data), length, &has_changed_character);
|
|
||||||
|
|
||||||
if (index_to_first_unprocessed == length) {
|
bool has_changed_character = false;
|
||||||
return has_changed_character ? dst : src;
|
int index_to_first_unprocessed =
|
||||||
}
|
FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
|
||||||
|
reinterpret_cast<const char*>(src_data), length,
|
||||||
|
&has_changed_character);
|
||||||
|
|
||||||
// If not ASCII, we keep the result up to index_to_first_unprocessed and
|
if (index_to_first_unprocessed == length) {
|
||||||
// process the rest.
|
return has_changed_character ? dst : src;
|
||||||
for (int index = index_to_first_unprocessed; index < length; ++index) {
|
}
|
||||||
dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
|
|
||||||
|
// If not ASCII, we keep the result up to index_to_first_unprocessed and
|
||||||
|
// process the rest.
|
||||||
|
for (int index = index_to_first_unprocessed; index < length; ++index) {
|
||||||
|
dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DCHECK(src_flat.IsTwoByte());
|
||||||
|
int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
|
||||||
|
if (index_to_first_unprocessed == length) return src;
|
||||||
|
|
||||||
|
const uint16_t* src_data = src_flat.ToUC16Vector().start();
|
||||||
|
CopyChars(dst_data, src_data, index_to_first_unprocessed);
|
||||||
|
for (int index = index_to_first_unprocessed; index < length; ++index) {
|
||||||
|
dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
@ -252,41 +267,17 @@ MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
|
|||||||
// TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
|
// TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
|
||||||
// to two parts, one for scanning the prefix with no change and the other for
|
// to two parts, one for scanning the prefix with no change and the other for
|
||||||
// handling ASCII-only characters.
|
// handling ASCII-only characters.
|
||||||
int index_to_first_unprocessed = length;
|
|
||||||
const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
|
bool is_short = length < static_cast<int>(sizeof(uintptr_t));
|
||||||
if (is_short) {
|
if (is_short) {
|
||||||
index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
|
bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
|
||||||
// Nothing to do if the string is all ASCII with no uppercase.
|
if (is_lower_ascii) return *s;
|
||||||
if (index_to_first_unprocessed == length) return *s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Handle<SeqOneByteString> result =
|
Handle<SeqOneByteString> result =
|
||||||
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
|
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
|
||||||
|
|
||||||
if (s->IsOneByteRepresentation()) {
|
return ConvertOneByteToLower(*s, *result, isolate);
|
||||||
return ConvertOneByteToLower(*s, *result, isolate);
|
|
||||||
}
|
|
||||||
|
|
||||||
DisallowHeapAllocation no_gc;
|
|
||||||
DCHECK(s->IsFlat());
|
|
||||||
DCHECK(s->IsTwoByteRepresentation());
|
|
||||||
String::FlatContent flat = s->GetFlatContent();
|
|
||||||
DCHECK(flat.IsTwoByte());
|
|
||||||
|
|
||||||
uint8_t* dest = result->GetChars();
|
|
||||||
if (index_to_first_unprocessed == length) {
|
|
||||||
DCHECK(!is_short);
|
|
||||||
index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
|
|
||||||
}
|
|
||||||
// Nothing to do if the string is all ASCII with no uppercase.
|
|
||||||
if (index_to_first_unprocessed == length) return *s;
|
|
||||||
const uint16_t* src = flat.ToUC16Vector().start();
|
|
||||||
CopyChars(dest, src, index_to_first_unprocessed);
|
|
||||||
for (int index = index_to_first_unprocessed; index < length; ++index) {
|
|
||||||
dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
|
|
||||||
}
|
|
||||||
|
|
||||||
return *result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
|
MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
|
||||||
|
13
test/mjsunit/regress/regress-crbug-736451.js
Normal file
13
test/mjsunit/regress/regress-crbug-736451.js
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
// Flags: --expose-externalize-string --no-stress-opt
|
||||||
|
|
||||||
|
!function() {
|
||||||
|
const s0 = "external string turned into two byte";
|
||||||
|
const s1 = s0.substring(1);
|
||||||
|
externalizeString(s0, true);
|
||||||
|
|
||||||
|
s1.toLowerCase();
|
||||||
|
}();
|
Loading…
Reference in New Issue
Block a user