Use ICU case conversion/transliterator for case conversion
When I18N is enabled, use ICU's case conversion API and transliteration API [1] to implement String.prototype.to{Upper,Lower}Case and String.prototype.toLocale{Upper,Lower}Case. * ICU-based case conversion was implemented in runtime-i18n.cc/i18n.js * The above 4 functions are overridden with those in i18n.js when --icu_case_mapping flag is turned on. To control the override by the flag, they're overriden in icu-case-mapping.js Previously, toLocale{U,L}Case just called to{U,L}Case so that they didn't support locale-sensitive case conversion for Turkic languages (az, tr), Greek (el) and Lithuanian (lt). Before ICU APIs for the most general case are called, a fast-path for Latin-1 is tried. It's taken from Blink and adopted as necessary. This fast path is always tried for to{U,L}Case. For toLocale{U,L}Case, it's only taken when a locale (explicitly specified or default) is not in {az, el, lt, tr}. With these changes, a build with --icu_case_mapping=true passes a bunch of tests in test262/intl402/Strings/* and intl/* that failed before. Handling of pure ASCII strings (aligned at word boundary) are not as fast as Unibrow's implementation that uses word-by-word case conversion. OTOH, Latin-1 input handling is faster than Unibrow. General Unicode input handling is slower but more accurate. See https://docs.google.com/spreadsheets/d/1KJCJxKc1FxFXjwmYqABS0_2cNdPetvnd8gY8_HGSbrg/edit?usp=sharing for the benchmark. This CL started with http://crrev.com/1544023002#ps200001 by littledan@, but has changed significantly since. [1] See why transliteration API is needed for uppercasing in Greek. http://bugs.icu-project.org/trac/ticket/10582 R=yangguo BUG=v8:4476,v8:4477 LOG=Y TEST=test262/{built-ins,intl402}/Strings/*, webkit/fast/js/*, mjsunit/string-case, intl/general/case* Review-Url: https://codereview.chromium.org/1812673005 Cr-Commit-Position: refs/heads/master@{#36187}
This commit is contained in:
parent
adcc511986
commit
b348d47bb9
4
BUILD.gn
4
BUILD.gn
@ -428,6 +428,10 @@ action("js2c_experimental") {
|
||||
"$target_gen_dir/experimental-libraries.cc",
|
||||
]
|
||||
|
||||
if (v8_enable_i18n_support) {
|
||||
sources += [ "src/js/icu-case-mapping.js" ]
|
||||
}
|
||||
|
||||
args = [
|
||||
rebase_path("$target_gen_dir/experimental-libraries.cc",
|
||||
root_build_dir),
|
||||
|
@ -2478,6 +2478,9 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_instanceof)
|
||||
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_restrictive_declarations)
|
||||
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_exponentiation_operator)
|
||||
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_string_padding)
|
||||
#ifdef V8_I18N_SUPPORT
|
||||
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(icu_case_mapping)
|
||||
#endif
|
||||
|
||||
void InstallPublicSymbol(Factory* factory, Handle<Context> native_context,
|
||||
const char* name, Handle<Symbol> value) {
|
||||
@ -3046,6 +3049,10 @@ bool Genesis::InstallExperimentalNatives() {
|
||||
static const char* harmony_exponentiation_operator_natives[] = {nullptr};
|
||||
static const char* harmony_string_padding_natives[] = {
|
||||
"native harmony-string-padding.js", nullptr};
|
||||
#ifdef V8_I18N_SUPPORT
|
||||
static const char* icu_case_mapping_natives[] = {"native icu-case-mapping.js",
|
||||
nullptr};
|
||||
#endif
|
||||
|
||||
for (int i = ExperimentalNatives::GetDebuggerCount();
|
||||
i < ExperimentalNatives::GetBuiltinsCount(); i++) {
|
||||
|
@ -193,12 +193,22 @@ DEFINE_IMPLICATION(es_staging, harmony_regexp_lookbehind)
|
||||
DEFINE_IMPLICATION(es_staging, move_object_start)
|
||||
|
||||
// Features that are still work in progress (behind individual flags).
|
||||
#ifdef V8_I18N_SUPPORT
|
||||
#define HARMONY_INPROGRESS(V) \
|
||||
V(harmony_function_sent, "harmony function.sent") \
|
||||
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
|
||||
V(harmony_simd, "harmony simd") \
|
||||
V(harmony_do_expressions, "harmony do-expressions") \
|
||||
V(harmony_regexp_property, "harmony unicode regexp property classes") \
|
||||
V(icu_case_mapping, "case mapping with ICU rather than Unibrow")
|
||||
#else
|
||||
#define HARMONY_INPROGRESS(V) \
|
||||
V(harmony_function_sent, "harmony function.sent") \
|
||||
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
|
||||
V(harmony_simd, "harmony simd") \
|
||||
V(harmony_do_expressions, "harmony do-expressions") \
|
||||
V(harmony_regexp_property, "harmony unicode regexp property classes")
|
||||
#endif
|
||||
|
||||
// Features that are complete (but still behind --harmony/es-staging flag).
|
||||
#define HARMONY_STAGED(V) \
|
||||
|
107
src/js/i18n.js
107
src/js/i18n.js
@ -142,6 +142,13 @@ var AVAILABLE_LOCALES = {
|
||||
*/
|
||||
var DEFAULT_ICU_LOCALE = UNDEFINED;
|
||||
|
||||
function GetDefaultICULocaleJS() {
|
||||
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
|
||||
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
|
||||
}
|
||||
return DEFAULT_ICU_LOCALE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode extension regular expression.
|
||||
*/
|
||||
@ -446,11 +453,7 @@ function lookupMatcher(service, requestedLocales) {
|
||||
}
|
||||
|
||||
// Didn't find a match, return default.
|
||||
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
|
||||
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
|
||||
}
|
||||
|
||||
return {'locale': DEFAULT_ICU_LOCALE, 'extension': '', 'position': -1};
|
||||
return {'locale': GetDefaultICULocaleJS(), 'extension': '', 'position': -1};
|
||||
}
|
||||
|
||||
|
||||
@ -722,21 +725,24 @@ function toTitleCaseTimezoneLocation(location) {
|
||||
*/
|
||||
function canonicalizeLanguageTag(localeID) {
|
||||
// null is typeof 'object' so we have to do extra check.
|
||||
if (typeof localeID !== 'string' && typeof localeID !== 'object' ||
|
||||
if ((!IS_STRING(localeID) && !IS_RECEIVER(localeID)) ||
|
||||
IS_NULL(localeID)) {
|
||||
throw MakeTypeError(kLanguageID);
|
||||
}
|
||||
|
||||
// Optimize for the most common case; a language code alone in
|
||||
// the canonical form/lowercase (e.g. "en", "fil").
|
||||
if (IS_STRING(localeID) &&
|
||||
!IS_NULL(InternalRegExpMatch(/^[a-z]{2,3}$/, localeID))) {
|
||||
return localeID;
|
||||
}
|
||||
|
||||
var localeString = GlobalString(localeID);
|
||||
|
||||
if (isValidLanguageTag(localeString) === false) {
|
||||
throw MakeRangeError(kInvalidLanguageTag, localeString);
|
||||
}
|
||||
|
||||
// This call will strip -kn but not -kn-true extensions.
|
||||
// ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265.
|
||||
// TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after
|
||||
// upgrade to ICU 4.9.
|
||||
var tag = %CanonicalizeLanguageTag(localeString);
|
||||
if (tag === 'invalid-tag') {
|
||||
throw MakeRangeError(kInvalidLanguageTag, localeString);
|
||||
@ -1989,6 +1995,37 @@ function cachedOrNewService(service, locales, options, defaults) {
|
||||
return new savedObjects[service](locales, useOptions);
|
||||
}
|
||||
|
||||
function LocaleConvertCase(s, locales, isToUpper) {
|
||||
// ECMA 402 section 13.1.2 steps 1 through 12.
|
||||
var language;
|
||||
// Optimize for the most common two cases. initializeLocaleList() can handle
|
||||
// them as well, but it's rather slow accounting for over 60% of
|
||||
// toLocale{U,L}Case() and about 40% of toLocale{U,L}Case("<locale>").
|
||||
if (IS_UNDEFINED(locales)) {
|
||||
language = GetDefaultICULocaleJS();
|
||||
} else if (IS_STRING(locales)) {
|
||||
language = canonicalizeLanguageTag(locales);
|
||||
} else {
|
||||
var locales = initializeLocaleList(locales);
|
||||
language = locales.length > 0 ? locales[0] : GetDefaultICULocaleJS();
|
||||
}
|
||||
|
||||
// StringSplit is slower than this.
|
||||
var pos = %_Call(StringIndexOf, language, '-');
|
||||
if (pos != -1) {
|
||||
language = %_Call(StringSubstring, language, 0, pos);
|
||||
}
|
||||
|
||||
var CUSTOM_CASE_LANGUAGES = ['az', 'el', 'lt', 'tr'];
|
||||
var langIndex = %_Call(ArrayIndexOf, CUSTOM_CASE_LANGUAGES, language);
|
||||
if (langIndex == -1) {
|
||||
// language-independent case conversion.
|
||||
return isToUpper ? %StringToUpperCaseI18N(s) : %StringToLowerCaseI18N(s);
|
||||
}
|
||||
return %StringLocaleConvertCase(s, isToUpper,
|
||||
CUSTOM_CASE_LANGUAGES[langIndex]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this and that, and returns less than 0, 0 or greater than 0 value.
|
||||
* Overrides the built-in method.
|
||||
@ -2041,6 +2078,56 @@ OverrideFunction(GlobalString.prototype, 'normalize', function() {
|
||||
}
|
||||
);
|
||||
|
||||
function ToLowerCaseI18N() {
|
||||
if (!IS_UNDEFINED(new.target)) {
|
||||
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
|
||||
}
|
||||
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
|
||||
var s = TO_STRING(this);
|
||||
return %StringToLowerCaseI18N(s);
|
||||
}
|
||||
|
||||
function ToUpperCaseI18N() {
|
||||
if (!IS_UNDEFINED(new.target)) {
|
||||
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
|
||||
}
|
||||
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toUpperCase");
|
||||
var s = TO_STRING(this);
|
||||
return %StringToUpperCaseI18N(s);
|
||||
}
|
||||
|
||||
function ToLocaleLowerCaseI18N(locales) {
|
||||
if (!IS_UNDEFINED(new.target)) {
|
||||
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
|
||||
}
|
||||
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
|
||||
return LocaleConvertCase(TO_STRING(this), locales, false);
|
||||
}
|
||||
|
||||
%FunctionSetLength(ToLocaleLowerCaseI18N, 0);
|
||||
|
||||
function ToLocaleUpperCaseI18N(locales) {
|
||||
if (!IS_UNDEFINED(new.target)) {
|
||||
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
|
||||
}
|
||||
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
|
||||
return LocaleConvertCase(TO_STRING(this), locales, true);
|
||||
}
|
||||
|
||||
%FunctionSetLength(ToLocaleUpperCaseI18N, 0);
|
||||
|
||||
%FunctionRemovePrototype(ToLowerCaseI18N);
|
||||
%FunctionRemovePrototype(ToUpperCaseI18N);
|
||||
%FunctionRemovePrototype(ToLocaleLowerCaseI18N);
|
||||
%FunctionRemovePrototype(ToLocaleUpperCaseI18N);
|
||||
|
||||
utils.Export(function(to) {
|
||||
to.ToLowerCaseI18N = ToLowerCaseI18N;
|
||||
to.ToUpperCaseI18N = ToUpperCaseI18N;
|
||||
to.ToLocaleLowerCaseI18N = ToLocaleLowerCaseI18N;
|
||||
to.ToLocaleUpperCaseI18N = ToLocaleUpperCaseI18N;
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* Formats a Number object (this) using locale and options values.
|
||||
|
24
src/js/icu-case-mapping.js
Normal file
24
src/js/icu-case-mapping.js
Normal file
@ -0,0 +1,24 @@
|
||||
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
(function(global, utils) {
|
||||
"use strict";
|
||||
|
||||
%CheckIsBootstrapping();
|
||||
|
||||
var GlobalString = global.String;
|
||||
var OverrideFunction = utils.OverrideFunction;
|
||||
var ToLowerCaseI18N = utils.ImportNow("ToLowerCaseI18N");
|
||||
var ToUpperCaseI18N = utils.ImportNow("ToUpperCaseI18N");
|
||||
var ToLocaleLowerCaseI18N = utils.ImportNow("ToLocaleLowerCaseI18N");
|
||||
var ToLocaleUpperCaseI18N = utils.ImportNow("ToLocaleUpperCaseI18N");
|
||||
|
||||
OverrideFunction(GlobalString.prototype, 'toLowerCase', ToLowerCaseI18N, true);
|
||||
OverrideFunction(GlobalString.prototype, 'toUpperCase', ToUpperCaseI18N, true);
|
||||
OverrideFunction(GlobalString.prototype, 'toLocaleLowerCase',
|
||||
ToLocaleLowerCaseI18N, true);
|
||||
OverrideFunction(GlobalString.prototype, 'toLocaleUpperCase',
|
||||
ToLocaleUpperCaseI18N, true);
|
||||
|
||||
})
|
@ -208,7 +208,11 @@ function PostNatives(utils) {
|
||||
"SetIteratorNext",
|
||||
"SetValues",
|
||||
"SymbolToString",
|
||||
"ToLocaleLowerCaseI18N",
|
||||
"ToLocaleUpperCaseI18N",
|
||||
"ToLowerCaseI18N",
|
||||
"ToPositiveInteger",
|
||||
"ToUpperCaseI18N",
|
||||
// From runtime:
|
||||
"is_concat_spreadable_symbol",
|
||||
"iterator_symbol",
|
||||
|
@ -8645,26 +8645,26 @@ class String: public Name {
|
||||
class FlatContent {
|
||||
public:
|
||||
// Returns true if the string is flat and this structure contains content.
|
||||
bool IsFlat() { return state_ != NON_FLAT; }
|
||||
bool IsFlat() const { return state_ != NON_FLAT; }
|
||||
// Returns true if the structure contains one-byte content.
|
||||
bool IsOneByte() { return state_ == ONE_BYTE; }
|
||||
bool IsOneByte() const { return state_ == ONE_BYTE; }
|
||||
// Returns true if the structure contains two-byte content.
|
||||
bool IsTwoByte() { return state_ == TWO_BYTE; }
|
||||
bool IsTwoByte() const { return state_ == TWO_BYTE; }
|
||||
|
||||
// Return the one byte content of the string. Only use if IsOneByte()
|
||||
// returns true.
|
||||
Vector<const uint8_t> ToOneByteVector() {
|
||||
Vector<const uint8_t> ToOneByteVector() const {
|
||||
DCHECK_EQ(ONE_BYTE, state_);
|
||||
return Vector<const uint8_t>(onebyte_start, length_);
|
||||
}
|
||||
// Return the two-byte content of the string. Only use if IsTwoByte()
|
||||
// returns true.
|
||||
Vector<const uc16> ToUC16Vector() {
|
||||
Vector<const uc16> ToUC16Vector() const {
|
||||
DCHECK_EQ(TWO_BYTE, state_);
|
||||
return Vector<const uc16>(twobyte_start, length_);
|
||||
}
|
||||
|
||||
uc16 Get(int i) {
|
||||
uc16 Get(int i) const {
|
||||
DCHECK(i < length_);
|
||||
DCHECK(state_ != NON_FLAT);
|
||||
if (state_ == ONE_BYTE) return onebyte_start[i];
|
||||
|
@ -29,10 +29,12 @@
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/smpdtfmt.h"
|
||||
#include "unicode/timezone.h"
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/ucurr.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unum.h"
|
||||
#include "unicode/uversion.h"
|
||||
|
||||
@ -749,6 +751,360 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
|
||||
return *isolate->factory()->NewStringFromStaticChars("unknown");
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void ConvertCaseWithTransliterator(icu::UnicodeString* input,
|
||||
const char* transliterator_id) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
base::SmartPointer<icu::Transliterator> translit(
|
||||
icu::Transliterator::createInstance(
|
||||
icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
|
||||
status));
|
||||
if (U_FAILURE(status)) return;
|
||||
translit->transliterate(*input);
|
||||
}
|
||||
|
||||
const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
|
||||
base::SmartArrayPointer<uc16>* dest,
|
||||
int32_t length) {
|
||||
DCHECK(flat.IsFlat());
|
||||
if (flat.IsOneByte()) {
|
||||
if (dest->is_empty()) {
|
||||
dest->Reset(NewArray<uc16>(length));
|
||||
CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
|
||||
}
|
||||
return reinterpret_cast<const UChar*>(dest->get());
|
||||
} else {
|
||||
return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
|
||||
}
|
||||
}
|
||||
|
||||
MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
|
||||
bool is_to_upper, const char* lang) {
|
||||
int32_t src_length = s->length();
|
||||
|
||||
// Greek uppercasing has to be done via transliteration.
|
||||
// TODO(jshin): Drop this special-casing once ICU's regular case conversion
|
||||
// API supports Greek uppercasing. See
|
||||
// http://bugs.icu-project.org/trac/ticket/10582 .
|
||||
// In the meantime, if there's no Greek character in |s|, call this
|
||||
// function again with the root locale (lang="").
|
||||
// ICU's C API for transliteration is nasty and we just use C++ API.
|
||||
if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) {
|
||||
icu::UnicodeString converted;
|
||||
base::SmartArrayPointer<uc16> sap;
|
||||
{
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent flat = s->GetFlatContent();
|
||||
const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
|
||||
// Starts with the source string (read-only alias with copy-on-write
|
||||
// semantics) and will be modified to contain the converted result.
|
||||
// Using read-only alias at first saves one copy operation if
|
||||
// transliteration does not change the input, which is rather rare.
|
||||
// Moreover, transliteration takes rather long so that saving one copy
|
||||
// helps only a little bit.
|
||||
converted.setTo(false, src, src_length);
|
||||
ConvertCaseWithTransliterator(&converted, "el-Upper");
|
||||
// If no change is made, just return |s|.
|
||||
if (converted.getBuffer() == src) return *s;
|
||||
}
|
||||
Handle<String> result;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
||||
isolate, result,
|
||||
isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
|
||||
reinterpret_cast<const uint16_t*>(converted.getBuffer()),
|
||||
converted.length())));
|
||||
return *result;
|
||||
}
|
||||
|
||||
auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
|
||||
|
||||
int32_t dest_length = src_length;
|
||||
UErrorCode status;
|
||||
Handle<SeqTwoByteString> result;
|
||||
base::SmartArrayPointer<uc16> sap;
|
||||
|
||||
// This is not a real loop. It'll be executed only once (no overflow) or
|
||||
// twice (overflow).
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
result =
|
||||
isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent flat = s->GetFlatContent();
|
||||
const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
|
||||
status = U_ZERO_ERROR;
|
||||
dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
|
||||
dest_length, src, src_length, lang, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) break;
|
||||
}
|
||||
|
||||
// In most cases, the output will fill the destination buffer completely
|
||||
// leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
|
||||
// Only in rare cases, it'll be shorter than the destination buffer and
|
||||
// |result| has to be truncated.
|
||||
DCHECK(U_SUCCESS(status));
|
||||
if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
|
||||
DCHECK(dest_length == result->length());
|
||||
return *result;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
DCHECK(dest_length < result->length());
|
||||
return *Handle<SeqTwoByteString>::cast(
|
||||
SeqString::Truncate(result, dest_length));
|
||||
}
|
||||
return *s;
|
||||
}
|
||||
|
||||
inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
|
||||
|
||||
const uint8_t kToLower[256] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
|
||||
0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
|
||||
0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
|
||||
0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
|
||||
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
|
||||
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||||
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
|
||||
0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
|
||||
0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
|
||||
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
|
||||
0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
|
||||
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
|
||||
0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
|
||||
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
|
||||
0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
|
||||
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
|
||||
0xFC, 0xFD, 0xFE, 0xFF,
|
||||
};
|
||||
|
||||
inline uint16_t ToLatin1Lower(uint16_t ch) {
|
||||
return static_cast<uint16_t>(kToLower[ch]);
|
||||
}
|
||||
|
||||
inline uint16_t ToASCIIUpper(uint16_t ch) {
|
||||
return ch & ~((ch >= 'a' && ch <= 'z') << 5);
|
||||
}
|
||||
|
||||
// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
|
||||
inline uint16_t ToLatin1Upper(uint16_t ch) {
|
||||
DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
|
||||
return ch &
|
||||
~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xE7))
|
||||
<< 5);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
bool ToUpperFastASCII(const Vector<const Char>& src,
|
||||
Handle<SeqOneByteString> result) {
|
||||
// Do a faster loop for the case where all the characters are ASCII.
|
||||
uint16_t ored = 0;
|
||||
int32_t index = 0;
|
||||
for (auto it = src.begin(); it != src.end(); ++it) {
|
||||
uint16_t ch = static_cast<uint16_t>(*it);
|
||||
ored |= ch;
|
||||
result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
|
||||
}
|
||||
return !(ored & ~0x7F);
|
||||
}
|
||||
|
||||
const uint16_t sharp_s = 0xDF;
|
||||
|
||||
template <typename Char>
|
||||
bool ToUpperOneByte(const Vector<const Char>& src,
|
||||
Handle<SeqOneByteString> result, int* sharp_s_count) {
|
||||
// Still pretty-fast path for the input with non-ASCII Latin-1 characters.
|
||||
|
||||
// There are two special cases.
|
||||
// 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
|
||||
// 2. Lower case sharp-S converts to "SS" (two characters)
|
||||
*sharp_s_count = 0;
|
||||
int32_t index = 0;
|
||||
for (auto it = src.begin(); it != src.end(); ++it) {
|
||||
uint16_t ch = static_cast<uint16_t>(*it);
|
||||
if (V8_UNLIKELY(ch == sharp_s)) {
|
||||
++(*sharp_s_count);
|
||||
continue;
|
||||
}
|
||||
if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
|
||||
// Since this upper-cased character does not fit in an 8-bit string, we
|
||||
// need to take the 16-bit path.
|
||||
return false;
|
||||
}
|
||||
result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
void ToUpperWithSharpS(const Vector<const Char>& src,
|
||||
Handle<SeqOneByteString> result) {
|
||||
int32_t dest_index = 0;
|
||||
for (auto it = src.begin(); it != src.end(); ++it) {
|
||||
uint16_t ch = static_cast<uint16_t>(*it);
|
||||
if (ch == sharp_s) {
|
||||
result->SeqOneByteStringSet(dest_index++, 'S');
|
||||
result->SeqOneByteStringSet(dest_index++, 'S');
|
||||
} else {
|
||||
result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK_EQ(args.length(), 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
|
||||
|
||||
int length = s->length();
|
||||
s = String::Flatten(s);
|
||||
// First scan the string for uppercase and non-ASCII characters:
|
||||
if (s->HasOnlyOneByteChars()) {
|
||||
unsigned first_index_to_lower = length;
|
||||
for (int index = 0; index < length; ++index) {
|
||||
// Blink specializes this path for one-byte strings, so it
|
||||
// does not need to do a generic get, but can do the equivalent
|
||||
// of SeqOneByteStringGet.
|
||||
uint16_t ch = s->Get(index);
|
||||
if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
|
||||
first_index_to_lower = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to do if the string is all ASCII with no uppercase.
|
||||
if (first_index_to_lower == length) return *s;
|
||||
|
||||
// We depend here on the invariant that the length of a Latin1
|
||||
// string is invariant under ToLowerCase, and the result always
|
||||
// fits in the Latin1 range in the *root locale*. It does not hold
|
||||
// for ToUpperCase even in the root locale.
|
||||
Handle<SeqOneByteString> result;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
||||
isolate, result, isolate->factory()->NewRawOneByteString(length));
|
||||
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent flat = s->GetFlatContent();
|
||||
if (flat.IsOneByte()) {
|
||||
const uint8_t* src = flat.ToOneByteVector().start();
|
||||
CopyChars(result->GetChars(), src, first_index_to_lower);
|
||||
for (int index = first_index_to_lower; index < length; ++index) {
|
||||
uint16_t ch = static_cast<uint16_t>(src[index]);
|
||||
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
|
||||
}
|
||||
} else {
|
||||
const uint16_t* src = flat.ToUC16Vector().start();
|
||||
CopyChars(result->GetChars(), src, first_index_to_lower);
|
||||
for (int index = first_index_to_lower; index < length; ++index) {
|
||||
uint16_t ch = src[index];
|
||||
result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
|
||||
}
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
// Blink had an additional case here for ASCII 2-byte strings, but
|
||||
// that is subsumed by the above code (assuming there isn't a false
|
||||
// negative for HasOnlyOneByteChars).
|
||||
|
||||
// Do a slower implementation for cases that include non-ASCII characters.
|
||||
return LocaleConvertCase(s, isolate, false, "");
|
||||
}
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK_EQ(args.length(), 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
|
||||
|
||||
// This function could be optimized for no-op cases the way lowercase
|
||||
// counterpart is, but in empirical testing, few actual calls to upper()
|
||||
// are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
|
||||
|
||||
int32_t length = s->length();
|
||||
s = String::Flatten(s);
|
||||
|
||||
if (s->HasOnlyOneByteChars()) {
|
||||
Handle<SeqOneByteString> result;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
||||
isolate, result, isolate->factory()->NewRawOneByteString(length));
|
||||
|
||||
int sharp_s_count;
|
||||
bool is_result_single_byte;
|
||||
{
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent flat = s->GetFlatContent();
|
||||
// If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
|
||||
// could be removed because ToUpperOneByte is pretty fast now (it
|
||||
// does not call ICU API any more.).
|
||||
if (flat.IsOneByte()) {
|
||||
Vector<const uint8_t> src = flat.ToOneByteVector();
|
||||
if (ToUpperFastASCII(src, result)) return *result;
|
||||
is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
|
||||
} else {
|
||||
DCHECK(flat.IsTwoByte());
|
||||
Vector<const uint16_t> src = flat.ToUC16Vector();
|
||||
if (ToUpperFastASCII(src, result)) return *result;
|
||||
is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
|
||||
}
|
||||
}
|
||||
|
||||
// Go to the full Unicode path if there are characters whose uppercase
|
||||
// is beyond the Latin-1 range (cannot be represented in OneByteString).
|
||||
if (V8_UNLIKELY(!is_result_single_byte)) {
|
||||
return LocaleConvertCase(s, isolate, true, "");
|
||||
}
|
||||
|
||||
if (sharp_s_count == 0) return *result;
|
||||
|
||||
// We have sharp_s_count sharp-s characters, but the result is still
|
||||
// in the Latin-1 range.
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
||||
isolate, result,
|
||||
isolate->factory()->NewRawOneByteString(length + sharp_s_count));
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent flat = s->GetFlatContent();
|
||||
if (flat.IsOneByte()) {
|
||||
ToUpperWithSharpS(flat.ToOneByteVector(), result);
|
||||
} else {
|
||||
ToUpperWithSharpS(flat.ToUC16Vector(), result);
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
return LocaleConvertCase(s, isolate, true, "");
|
||||
}
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK_EQ(args.length(), 3);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
|
||||
CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
|
||||
|
||||
// All the languages requiring special handling ("az", "el", "lt", "tr")
|
||||
// have a 2-letter language code.
|
||||
DCHECK(lang->length() == 2);
|
||||
uint8_t lang_str[3];
|
||||
memcpy(lang_str, lang->GetChars(), 2);
|
||||
lang_str[2] = 0;
|
||||
s = String::Flatten(s);
|
||||
// TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
|
||||
// in the root locale needs to be adjusted for az, lt and tr because even case
|
||||
// mapping of ASCII range characters are different in those locales.
|
||||
// Greek (el) does not require any adjustment, though.
|
||||
return LocaleConvertCase(s, isolate, is_upper,
|
||||
reinterpret_cast<const char*>(lang_str));
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
@ -1077,7 +1077,7 @@ MUST_USE_RESULT static Object* ConvertCase(
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK(args.length() == 1);
|
||||
DCHECK_EQ(args.length(), 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
|
||||
return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
|
||||
}
|
||||
@ -1085,7 +1085,7 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK(args.length() == 1);
|
||||
DCHECK_EQ(args.length(), 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
|
||||
return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
|
||||
}
|
||||
|
@ -262,7 +262,10 @@ namespace internal {
|
||||
F(BreakIteratorFirst, 1, 1) \
|
||||
F(BreakIteratorNext, 1, 1) \
|
||||
F(BreakIteratorCurrent, 1, 1) \
|
||||
F(BreakIteratorBreakType, 1, 1)
|
||||
F(BreakIteratorBreakType, 1, 1) \
|
||||
F(StringToLowerCaseI18N, 1, 1) \
|
||||
F(StringToUpperCaseI18N, 1, 1) \
|
||||
F(StringLocaleConvertCase, 3, 1)
|
||||
#else
|
||||
#define FOR_EACH_INTRINSIC_I18N(F)
|
||||
#endif
|
||||
|
23
src/v8.gyp
23
src/v8.gyp
@ -1988,17 +1988,6 @@
|
||||
}, {
|
||||
'toolsets': ['target'],
|
||||
}],
|
||||
['v8_enable_i18n_support==1', {
|
||||
'variables': {
|
||||
'i18n_library_files': [
|
||||
'js/i18n.js',
|
||||
],
|
||||
},
|
||||
}, {
|
||||
'variables': {
|
||||
'i18n_library_files': [],
|
||||
},
|
||||
}],
|
||||
],
|
||||
'variables': {
|
||||
'library_files': [
|
||||
@ -2048,6 +2037,12 @@
|
||||
'libraries_experimental_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental.bin',
|
||||
'libraries_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-extras.bin',
|
||||
'libraries_experimental_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental-extras.bin',
|
||||
'conditions': [
|
||||
['v8_enable_i18n_support==1', {
|
||||
'library_files': ['js/i18n.js'],
|
||||
'experimental_library_files': ['js/icu-case-mapping.js'],
|
||||
}],
|
||||
],
|
||||
},
|
||||
'actions': [
|
||||
{
|
||||
@ -2055,7 +2050,6 @@
|
||||
'inputs': [
|
||||
'../tools/js2c.py',
|
||||
'<@(library_files)',
|
||||
'<@(i18n_library_files)'
|
||||
],
|
||||
'outputs': ['<(SHARED_INTERMEDIATE_DIR)/libraries.cc'],
|
||||
'action': [
|
||||
@ -2064,7 +2058,6 @@
|
||||
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
|
||||
'CORE',
|
||||
'<@(library_files)',
|
||||
'<@(i18n_library_files)'
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -2072,7 +2065,6 @@
|
||||
'inputs': [
|
||||
'../tools/js2c.py',
|
||||
'<@(library_files)',
|
||||
'<@(i18n_library_files)'
|
||||
],
|
||||
'outputs': ['<@(libraries_bin_file)'],
|
||||
'action': [
|
||||
@ -2081,7 +2073,6 @@
|
||||
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
|
||||
'CORE',
|
||||
'<@(library_files)',
|
||||
'<@(i18n_library_files)',
|
||||
'--startup_blob', '<@(libraries_bin_file)',
|
||||
'--nojs',
|
||||
],
|
||||
@ -2098,7 +2089,7 @@
|
||||
'../tools/js2c.py',
|
||||
'<(SHARED_INTERMEDIATE_DIR)/experimental-libraries.cc',
|
||||
'EXPERIMENTAL',
|
||||
'<@(experimental_library_files)'
|
||||
'<@(experimental_library_files)',
|
||||
],
|
||||
},
|
||||
{
|
||||
|
138
test/intl/general/case-mapping.js
Normal file
138
test/intl/general/case-mapping.js
Normal file
@ -0,0 +1,138 @@
|
||||
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --icu_case_mapping
|
||||
|
||||
// Some edge cases that unibrow got wrong
|
||||
|
||||
assertEquals("𐐘", "𐑀".toUpperCase());
|
||||
assertEquals("𐑀", "𐐘".toLowerCase());
|
||||
assertEquals("σ", "Σ".toLowerCase());
|
||||
|
||||
// Some different paths in the ICU case conversion fastpath
|
||||
|
||||
assertEquals("σς", "\u03A3\u03A3".toLowerCase());
|
||||
// Expand sharp s in latin1 fastpath
|
||||
assertEquals("ASSB", "A\u00DFB".toUpperCase());
|
||||
assertEquals("AB", "Ab".toUpperCase());
|
||||
// Find first upper case in fastpath
|
||||
assertEquals("ab", "aB".toLowerCase());
|
||||
assertEquals("AÜ", "aü".toUpperCase());
|
||||
assertEquals("AÜ", "AÜ".toUpperCase());
|
||||
assertEquals("aü", "aü".toLowerCase());
|
||||
assertEquals("aü", "AÜ".toLowerCase());
|
||||
assertEquals("aü", "AÜ".toLowerCase());
|
||||
|
||||
// Starts with fastpath, but switches to full Unicode path
|
||||
// U+00FF is uppercased to U+0178.
|
||||
assertEquals("AŸ", "aÿ".toUpperCase());
|
||||
// U+00B5 (µ) is uppercased to U+039C (Μ)
|
||||
assertEquals("AΜ", "aµ".toUpperCase());
|
||||
|
||||
// Buffer size increase
|
||||
assertEquals("CSSBẶ", "cßbặ".toUpperCase());
|
||||
assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
|
||||
// OneByte input with buffer size increase: non-fast path
|
||||
assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));
|
||||
|
||||
// More comprehensive tests for "tr", "az" and "lt" are in
|
||||
// test262/intl402/Strings/*
|
||||
|
||||
// Buffer size decrease with a single locale or locale list.
|
||||
// In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped.
|
||||
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));
|
||||
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));
|
||||
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));
|
||||
|
||||
// Cons string
|
||||
assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));
|
||||
assertEquals("abcijkl",
|
||||
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));
|
||||
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
|
||||
assertEquals("abci\u0307jkl",
|
||||
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
|
||||
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
|
||||
assertEquals("abci\u0307jkl",
|
||||
("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
|
||||
|
||||
// "tr" and "az" should behave identically.
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
|
||||
"aBcI\u0307".toLocaleLowerCase("az"));
|
||||
// What matters is the first locale in the locale list.
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),
|
||||
"aBcI\u0307".toLocaleLowerCase("tr"));
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
|
||||
"aBcI\u0307".toLocaleLowerCase("en"));
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
|
||||
"aBcI\u0307".toLowerCase());
|
||||
|
||||
// An empty locale list is the same as the default locale. Try these tests
|
||||
// under Turkish and Greek locale.
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
|
||||
"aBcI\u0307".toLocaleLowerCase());
|
||||
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
|
||||
"aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));
|
||||
assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());
|
||||
assertEquals("άόύώ".toLocaleUpperCase([]),
|
||||
"άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));
|
||||
|
||||
|
||||
// English/root locale keeps U+0307 (combining dot above).
|
||||
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
|
||||
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
|
||||
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
|
||||
|
||||
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
|
||||
// other diacritic marks are dropped. This rule is based on the current CLDR's
|
||||
// el-Upper transformation, but Greek uppercasing rules are more sophisticated
|
||||
// than this. See http://bugs.icu-project.org/trac/ticket/10582 and
|
||||
// http://unicode.org/cldr/trac/ticket/7905 .
|
||||
assertEquals("Α", "α\u0301".toLocaleUpperCase("el"));
|
||||
assertEquals("Α", "α\u0301".toLocaleUpperCase("el-GR"));
|
||||
assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek"));
|
||||
assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek-GR"));
|
||||
assertEquals("Α", "ά".toLocaleUpperCase("el"));
|
||||
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
|
||||
assertEquals("ΑΟΥΩ", "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
|
||||
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
|
||||
assertEquals("ΟΕ", "Ό\u1f15".toLocaleUpperCase("el"));
|
||||
assertEquals("ΟΕ", "Ο\u0301ε\u0314\u0301".toLocaleUpperCase("el"));
|
||||
|
||||
// Input and output are identical.
|
||||
assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
|
||||
assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));
|
||||
assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));
|
||||
assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));
|
||||
// ASCII-only or Latin-1 only: 1-byte
|
||||
assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));
|
||||
assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));
|
||||
|
||||
// To make sure that the input string is not overwritten in place.
|
||||
var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];
|
||||
for (var s of strings) {
|
||||
var backupAsArray = s.split("");
|
||||
var uppered = s.toLocaleUpperCase("el");
|
||||
assertEquals(s, backupAsArray.join(""));
|
||||
}
|
||||
|
||||
// In other locales, U+0301 is preserved.
|
||||
assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",
|
||||
"α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));
|
||||
assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",
|
||||
"α\u0301ο\u0301υ\u0301ω\u0301".toUpperCase());
|
||||
|
||||
// Plane 1; Deseret and Warang Citi Script.
|
||||
assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());
|
||||
assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());
|
||||
// Mathematical Bold {Capital, Small} Letter A do not change.
|
||||
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());
|
||||
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());
|
||||
// Plane 1; New characters in Unicode 8.0
|
||||
assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());
|
||||
assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());
|
||||
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());
|
||||
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
|
||||
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
|
||||
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
|
||||
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
|
@ -26,10 +26,12 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from testrunner.local import testsuite
|
||||
from testrunner.objects import testcase
|
||||
|
||||
FLAGS_PATTERN = re.compile(r"//\s+Flags:(.*)")
|
||||
|
||||
class IntlTestSuite(testsuite.TestSuite):
|
||||
|
||||
@ -55,7 +57,11 @@ class IntlTestSuite(testsuite.TestSuite):
|
||||
return tests
|
||||
|
||||
def GetFlagsForTestCase(self, testcase, context):
|
||||
source = self.GetSourceForTest(testcase)
|
||||
flags = ["--allow-natives-syntax"] + context.mode_flags
|
||||
flags_match = re.findall(FLAGS_PATTERN, source)
|
||||
for match in flags_match:
|
||||
flags += match.strip().split()
|
||||
|
||||
files = []
|
||||
files.append(os.path.join(self.root, "assert.js"))
|
||||
@ -71,6 +77,10 @@ class IntlTestSuite(testsuite.TestSuite):
|
||||
|
||||
return testcase.flags + flags
|
||||
|
||||
def GetSourceForTest(self, testcase):
|
||||
filename = os.path.join(self.root, testcase.path + self.suffix())
|
||||
with open(filename) as f:
|
||||
return f.read()
|
||||
|
||||
def GetSuite(name, root):
|
||||
return IntlTestSuite(name, root)
|
||||
|
@ -139,14 +139,16 @@
|
||||
'intl402/NumberFormat/11.1.1_1': [FAIL],
|
||||
|
||||
# https://code.google.com/p/v8/issues/detail?id=4476
|
||||
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
|
||||
# The bug is fixed but behind a flag, --icu_case_mapping.
|
||||
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
|
||||
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
|
||||
|
||||
# https://code.google.com/p/v8/issues/detail?id=4477
|
||||
# The bug is fixed but behind a flag, --icu_case_mapping.
|
||||
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
|
||||
@ -423,6 +425,22 @@
|
||||
'built-ins/String/prototype/normalize/return-normalized-string': [SKIP],
|
||||
'built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form': [SKIP],
|
||||
'built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter': [SKIP],
|
||||
|
||||
# Case-conversion is not fully compliant to the Unicode spec with i18n off.
|
||||
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
|
||||
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
|
||||
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
|
||||
|
||||
# Locale-sensitive case-conversion is not available with i18n off.
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
|
||||
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
|
||||
'intl402/String/prototype/toLocaleUpperCase/special_casing_Azeri': [FAIL],
|
||||
'intl402/String/prototype/toLocaleUpperCase/special_casing_Lithuanian': [FAIL],
|
||||
'intl402/String/prototype/toLocaleUpperCase/special_casing_Turkish': [FAIL],
|
||||
}], # no_i18n == True
|
||||
|
||||
['arch == arm or arch == mipsel or arch == mips or arch == arm64 or arch == mips64 or arch == mips64el', {
|
||||
|
Loading…
Reference in New Issue
Block a user