Optimize case conversion with icu_case_mapping

Use FastAsciiConvert (as used by Unibrow) for i18n-aware
case conversion with --icu_case_mapping.

Move FastAsciiConvert to src/string-case.cc so that it can be used
by both runtime-{string,i18n}.

Add more tests.

BUG=v8:4477,v8:4476
TEST=intl/general/case*

Review-Url: https://codereview.chromium.org/2533983006
Cr-Commit-Position: refs/heads/master@{#41821}
This commit is contained in:
jshin 2016-12-19 10:43:55 -08:00 committed by Commit bot
parent 4c640be19b
commit af38272dd9
9 changed files with 269 additions and 204 deletions

View File

@ -1691,6 +1691,8 @@ v8_source_set("v8_base") {
"src/startup-data-util.h",
"src/string-builder.cc",
"src/string-builder.h",
"src/string-case.cc",
"src/string-case.h",
"src/string-search.h",
"src/string-stream.cc",
"src/string-stream.h",

View File

@ -2121,27 +2121,16 @@ OverrideFunction(GlobalString.prototype, 'normalize', function() {
);
function ToLowerCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw %make_type_error(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
var s = TO_STRING(this);
return %StringToLowerCaseI18N(s);
return %StringToLowerCaseI18N(TO_STRING(this));
}
function ToUpperCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw %make_type_error(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toUpperCase");
var s = TO_STRING(this);
return %StringToUpperCaseI18N(s);
return %StringToUpperCaseI18N(TO_STRING(this));
}
function ToLocaleLowerCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw %make_type_error(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
return LocaleConvertCase(TO_STRING(this), locales, false);
}
@ -2149,9 +2138,6 @@ function ToLocaleLowerCaseI18N(locales) {
%FunctionSetLength(ToLocaleLowerCaseI18N, 0);
function ToLocaleUpperCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw %make_type_error(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
return LocaleConvertCase(TO_STRING(this), locales, true);
}

View File

@ -8,13 +8,15 @@
#include <memory>
#include "src/api.h"
#include "src/api-natives.h"
#include "src/api.h"
#include "src/arguments.h"
#include "src/factory.h"
#include "src/i18n.h"
#include "src/isolate-inl.h"
#include "src/messages.h"
#include "src/string-case.h"
#include "src/utils.h"
#include "unicode/brkiter.h"
#include "unicode/calendar.h"
@ -1041,15 +1043,14 @@ bool ToUpperFastASCII(const Vector<const Char>& src,
const uint16_t sharp_s = 0xDF;
template <typename Char>
bool ToUpperOneByte(const Vector<const Char>& src,
Handle<SeqOneByteString> result, int* sharp_s_count) {
bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
int* sharp_s_count) {
// Still pretty-fast path for the input with non-ASCII Latin-1 characters.
// There are two special cases.
// 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
// 2. Lower case sharp-S converts to "SS" (two characters)
*sharp_s_count = 0;
int32_t index = 0;
for (auto it = src.begin(); it != src.end(); ++it) {
uint16_t ch = static_cast<uint16_t>(*it);
if (V8_UNLIKELY(ch == sharp_s)) {
@ -1061,7 +1062,7 @@ bool ToUpperOneByte(const Vector<const Char>& src,
// need to take the 16-bit path.
return false;
}
result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
*dest++ = ToLatin1Upper(ch);
}
return true;
@ -1082,6 +1083,16 @@ void ToUpperWithSharpS(const Vector<const Char>& src,
}
}
inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
for (int index = 0; index < length; ++index) {
uint16_t ch = s->Get(index);
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
return index;
}
}
return length;
}
} // namespace
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
@ -1091,60 +1102,65 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
int length = s->length();
s = String::Flatten(s);
// First scan the string for uppercase and non-ASCII characters:
if (s->HasOnlyOneByteChars()) {
int first_index_to_lower = length;
for (int index = 0; index < length; ++index) {
// Blink specializes this path for one-byte strings, so it
// does not need to do a generic get, but can do the equivalent
// of SeqOneByteStringGet.
uint16_t ch = s->Get(index);
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
first_index_to_lower = index;
break;
}
}
// Nothing to do if the string is all ASCII with no uppercase.
if (first_index_to_lower == length) return *s;
// We depend here on the invariant that the length of a Latin1
// string is invariant under ToLowerCase, and the result always
// fits in the Latin1 range in the *root locale*. It does not hold
// for ToUpperCase even in the root locale.
Handle<SeqOneByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
if (flat.IsOneByte()) {
const uint8_t* src = flat.ToOneByteVector().start();
CopyChars(result->GetChars(), src,
static_cast<size_t>(first_index_to_lower));
for (int index = first_index_to_lower; index < length; ++index) {
uint16_t ch = static_cast<uint16_t>(src[index]);
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
}
} else {
const uint16_t* src = flat.ToUC16Vector().start();
CopyChars(result->GetChars(), src,
static_cast<size_t>(first_index_to_lower));
for (int index = first_index_to_lower; index < length; ++index) {
uint16_t ch = src[index];
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
}
}
return *result;
if (!s->HasOnlyOneByteChars()) {
// Use a slower implementation for strings with characters beyond U+00FF.
return LocaleConvertCase(s, isolate, false, "");
}
// Blink had an additional case here for ASCII 2-byte strings, but
// that is subsumed by the above code (assuming there isn't a false
// negative for HasOnlyOneByteChars).
// We depend here on the invariant that the length of a Latin1
// string is invariant under ToLowerCase, and the result always
// fits in the Latin1 range in the *root locale*. It does not hold
// for ToUpperCase even in the root locale.
// Do a slower implementation for cases that include non-ASCII characters.
return LocaleConvertCase(s, isolate, false, "");
// Scan the string for uppercase and non-ASCII characters for strings
// shorter than a machine-word without any memory allocation overhead.
// TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
// to two parts, one for scanning the prefix with no change and the other for
// handling ASCII-only characters.
int index_to_first_unprocessed = length;
const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
if (is_short) {
index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
// Nothing to do if the string is all ASCII with no uppercase.
if (index_to_first_unprocessed == length) return *s;
}
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
uint8_t* dest = result->GetChars();
if (flat.IsOneByte()) {
const uint8_t* src = flat.ToOneByteVector().start();
bool has_changed_character = false;
index_to_first_unprocessed = FastAsciiConvert<true>(
reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src),
length, &has_changed_character);
// If not ASCII, we keep the result up to index_to_first_unprocessed and
// process the rest.
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
for (int index = index_to_first_unprocessed; index < length; ++index) {
dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
}
} else {
if (index_to_first_unprocessed == length) {
DCHECK(!is_short);
index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
}
// Nothing to do if the string is all ASCII with no uppercase.
if (index_to_first_unprocessed == length) return *s;
const uint16_t* src = flat.ToUC16Vector().start();
CopyChars(dest, src, index_to_first_unprocessed);
for (int index = index_to_first_unprocessed; index < length; ++index) {
dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
}
}
return *result;
}
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
@ -1152,35 +1168,38 @@ RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
// This function could be optimized for no-op cases the way lowercase
// counterpart is, but in empirical testing, few actual calls to upper()
// are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
int32_t length = s->length();
s = String::Flatten(s);
if (s->HasOnlyOneByteChars()) {
Handle<SeqOneByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawOneByteString(length));
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
int sharp_s_count;
bool is_result_single_byte;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
// If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
// could be removed because ToUpperOneByte is pretty fast now (it
// does not call ICU API any more.).
uint8_t* dest = result->GetChars();
if (flat.IsOneByte()) {
Vector<const uint8_t> src = flat.ToOneByteVector();
if (ToUpperFastASCII(src, result)) return *result;
is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
bool has_changed_character = false;
int index_to_first_unprocessed =
FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<const char*>(src.start()),
length, &has_changed_character);
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
// If not ASCII, we keep the result up to index_to_first_unprocessed and
// process the rest.
is_result_single_byte =
ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
dest + index_to_first_unprocessed, &sharp_s_count);
} else {
DCHECK(flat.IsTwoByte());
Vector<const uint16_t> src = flat.ToUC16Vector();
if (ToUpperFastASCII(src, result)) return *result;
is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
}
}

View File

@ -7,6 +7,7 @@
#include "src/arguments.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/string-builder.h"
#include "src/string-case.h"
#include "src/string-search.h"
namespace v8 {
@ -694,122 +695,6 @@ MUST_USE_RESULT static Object* ConvertCaseHelper(
}
}
static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
// Given a word and two range boundaries returns a word with high bit
// set in every byte iff the corresponding input byte was strictly in
// the range (m, n). All the other bits in the result are cleared.
// This function is only useful when it can be inlined and the
// boundaries are statically known.
// Requires: all bytes in the input word and the boundaries must be
// ASCII (less than 0x7F).
static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
// Use strict inequalities since in edge cases the function could be
// further simplified.
DCHECK(0 < m && m < n);
// Has high bit set in every w byte less than n.
uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
// Has high bit set in every w byte greater than m.
uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
}
#ifdef DEBUG
static bool CheckFastAsciiConvert(char* dst, const char* src, int length,
bool changed, bool is_to_lower) {
bool expected_changed = false;
for (int i = 0; i < length; i++) {
if (dst[i] == src[i]) continue;
expected_changed = true;
if (is_to_lower) {
DCHECK('A' <= src[i] && src[i] <= 'Z');
DCHECK(dst[i] == src[i] + ('a' - 'A'));
} else {
DCHECK('a' <= src[i] && src[i] <= 'z');
DCHECK(dst[i] == src[i] - ('a' - 'A'));
}
}
return (expected_changed == changed);
}
#endif
template <class Converter>
static bool FastAsciiConvert(char* dst, const char* src, int length,
bool* changed_out) {
#ifdef DEBUG
char* saved_dst = dst;
const char* saved_src = src;
#endif
DisallowHeapAllocation no_gc;
// We rely on the distance between upper and lower case letters
// being a known power of 2.
DCHECK('a' - 'A' == (1 << 5));
// Boundaries for the range of input characters than require conversion.
static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
bool changed = false;
uintptr_t or_acc = 0;
const char* const limit = src + length;
// dst is newly allocated and always aligned.
DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));
// Only attempt processing one word at a time if src is also aligned.
if (IsAligned(reinterpret_cast<intptr_t>(src), sizeof(uintptr_t))) {
// Process the prefix of the input that requires no conversion one aligned
// (machine) word at a time.
while (src <= limit - sizeof(uintptr_t)) {
const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
or_acc |= w;
if (AsciiRangeMask(w, lo, hi) != 0) {
changed = true;
break;
}
*reinterpret_cast<uintptr_t*>(dst) = w;
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
// Process the remainder of the input performing conversion when
// required one word at a time.
while (src <= limit - sizeof(uintptr_t)) {
const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
or_acc |= w;
uintptr_t m = AsciiRangeMask(w, lo, hi);
// The mask has high (7th) bit set in every byte that needs
// conversion and we know that the distance between cases is
// 1 << 5.
*reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
}
// Process the last few bytes of the input (or the whole input if
// unaligned access is not supported).
while (src < limit) {
char c = *src;
or_acc |= c;
if (lo < c && c < hi) {
c ^= (1 << 5);
changed = true;
}
*dst = c;
++src;
++dst;
}
if ((or_acc & kAsciiMask) != 0) return false;
DCHECK(CheckFastAsciiConvert(saved_dst, saved_src, length, changed,
Converter::kIsToLower));
*changed_out = changed;
return true;
}
template <class Converter>
MUST_USE_RESULT static Object* ConvertCase(
Handle<String> s, Isolate* isolate,
@ -833,12 +718,13 @@ MUST_USE_RESULT static Object* ConvertCase(
String::FlatContent flat_content = s->GetFlatContent();
DCHECK(flat_content.IsFlat());
bool has_changed_character = false;
bool is_ascii = FastAsciiConvert<Converter>(
int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
length, &has_changed_character);
// If not ASCII, we discard the result and take the 2 byte path.
if (is_ascii) return has_changed_character ? *result : *s;
if (index_to_first_unprocessed == length)
return has_changed_character ? *result : *s;
}
Handle<SeqString> result; // Same length as input.
@ -872,7 +758,6 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
}
RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);

View File

@ -6,6 +6,7 @@
#define V8_RUNTIME_RUNTIME_UTILS_H_
#include "src/base/logging.h"
#include "src/globals.h"
#include "src/runtime/runtime.h"
namespace v8 {

130
src/string-case.cc Normal file
View File

@ -0,0 +1,130 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/string-case.h"
#include "src/assert-scope.h"
#include "src/base/logging.h"
#include "src/globals.h"
#include "src/utils.h"
namespace v8 {
namespace internal {
#ifdef DEBUG
bool CheckFastAsciiConvert(char* dst, const char* src, int length, bool changed,
bool is_to_lower) {
bool expected_changed = false;
for (int i = 0; i < length; i++) {
if (dst[i] == src[i]) continue;
expected_changed = true;
if (is_to_lower) {
DCHECK('A' <= src[i] && src[i] <= 'Z');
DCHECK(dst[i] == src[i] + ('a' - 'A'));
} else {
DCHECK('a' <= src[i] && src[i] <= 'z');
DCHECK(dst[i] == src[i] - ('a' - 'A'));
}
}
return (expected_changed == changed);
}
#endif
const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
const uintptr_t kAsciiMask = kOneInEveryByte << 7;
// Given a word and two range boundaries returns a word with high bit
// set in every byte iff the corresponding input byte was strictly in
// the range (m, n). All the other bits in the result are cleared.
// This function is only useful when it can be inlined and the
// boundaries are statically known.
// Requires: all bytes in the input word and the boundaries must be
// ASCII (less than 0x7F).
static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
// Use strict inequalities since in edge cases the function could be
// further simplified.
DCHECK(0 < m && m < n);
// Has high bit set in every w byte less than n.
uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
// Has high bit set in every w byte greater than m.
uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
}
template <bool is_lower>
int FastAsciiConvert(char* dst, const char* src, int length,
bool* changed_out) {
#ifdef DEBUG
char* saved_dst = dst;
#endif
const char* saved_src = src;
DisallowHeapAllocation no_gc;
// We rely on the distance between upper and lower case letters
// being a known power of 2.
DCHECK('a' - 'A' == (1 << 5));
// Boundaries for the range of input characters than require conversion.
static const char lo = is_lower ? 'A' - 1 : 'a' - 1;
static const char hi = is_lower ? 'Z' + 1 : 'z' + 1;
bool changed = false;
const char* const limit = src + length;
// dst is newly allocated and always aligned.
DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));
// Only attempt processing one word at a time if src is also aligned.
if (IsAligned(reinterpret_cast<intptr_t>(src), sizeof(uintptr_t))) {
// Process the prefix of the input that requires no conversion one aligned
// (machine) word at a time.
while (src <= limit - sizeof(uintptr_t)) {
const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
if (AsciiRangeMask(w, lo, hi) != 0) {
changed = true;
break;
}
*reinterpret_cast<uintptr_t*>(dst) = w;
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
// Process the remainder of the input performing conversion when
// required one word at a time.
while (src <= limit - sizeof(uintptr_t)) {
const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
uintptr_t m = AsciiRangeMask(w, lo, hi);
// The mask has high (7th) bit set in every byte that needs
// conversion and we know that the distance between cases is
// 1 << 5.
*reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
}
// Process the last few bytes of the input (or the whole input if
// unaligned access is not supported).
while (src < limit) {
char c = *src;
if ((c & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
if (lo < c && c < hi) {
c ^= (1 << 5);
changed = true;
}
*dst = c;
++src;
++dst;
}
DCHECK(
CheckFastAsciiConvert(saved_dst, saved_src, length, changed, is_lower));
*changed_out = changed;
return length;
}
template int FastAsciiConvert<false>(char* dst, const char* src, int length,
bool* changed_out);
template int FastAsciiConvert<true>(char* dst, const char* src, int length,
bool* changed_out);
} // namespace internal
} // namespace v8

17
src/string-case.h Normal file
View File

@ -0,0 +1,17 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_STRING_CASE_H_
#define V8_STRING_CASE_H_
namespace v8 {
namespace internal {
template <bool is_lower>
int FastAsciiConvert(char* dst, const char* src, int length, bool* changed_out);
} // namespace internal
} // namespace v8
#endif // V8_STRING_CASE_H__

View File

@ -1228,6 +1228,8 @@
'startup-data-util.h',
'string-builder.cc',
'string-builder.h',
'string-case.cc',
'string-case.h',
'string-search.h',
'string-stream.cc',
'string-stream.h',

View File

@ -16,14 +16,33 @@ assertEquals("σς", "\u03A3\u03A3".toLowerCase());
// Expand sharp s in latin1 fastpath
assertEquals("ASSB", "A\u00DFB".toUpperCase());
assertEquals("AB", "Ab".toUpperCase());
// Find first upper case in fastpath
// Find first uppercase in fastpath
// Input length < a machine word size
assertEquals("ab", "ab".toLowerCase());
assertEquals("ab", "aB".toLowerCase());
assertEquals("AÜ", "aü".toUpperCase());
assertEquals("AÜ", "AÜ".toUpperCase());
assertEquals("aü", "aü".toLowerCase());
assertEquals("aü", "aÜ".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
// Input length >= a machine word size
assertEquals("abcdefghij", "abcdefghij".toLowerCase());
assertEquals("abcdefghij", "abcdefghiJ".toLowerCase());
assertEquals("abçdefghij", "abçdefghiJ".toLowerCase());
assertEquals("abçdefghij", "abÇdefghiJ".toLowerCase());
assertEquals("abcdefghiá", "abcdeFghiá".toLowerCase());
assertEquals("abcdefghiá", "abcdeFghiÁ".toLowerCase());
assertEquals("ABCDEFGHIJ", "ABCDEFGHIJ".toUpperCase());
assertEquals("ABCDEFGHIJ", "ABCDEFGHIj".toUpperCase());
assertEquals("ABÇDEFGHIJ", "ABÇDEFGHIj".toUpperCase());
assertEquals("ABÇDEFGHIJ", "ABçDEFGHIj".toUpperCase());
assertEquals("ABCDEFGHIÁ", "ABCDEfGHIÁ".toUpperCase());
assertEquals("ABCDEFGHIÁ", "ABCDEfGHIá".toUpperCase());
// Starts with fastpath, but switches to full Unicode path
// U+00FF is uppercased to U+0178.
assertEquals("AŸ", "aÿ".toUpperCase());
@ -33,6 +52,10 @@ assertEquals("AΜ", "aµ".toUpperCase());
// Buffer size increase
assertEquals("CSSBẶ", "cßbặ".toUpperCase());
assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
assertEquals("ABCÀCSSA", "abcàcßa".toUpperCase());
assertEquals("ABCDEFGHIÀCSSA", "ABCDEFGHIàcßa".toUpperCase());
assertEquals("ABCDEFGHIÀCSSA", "abcdeFghiàcßa".toUpperCase());
// OneByte input with buffer size increase: non-fast path
assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));