[Intl] Fix Locale Canonicalization bugs
Bug: v8:9613, v8:10447 Change-Id: Iff43b298c6edaa9b258038ae15406d5df209e8b5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2543266 Commit-Queue: Frank Tang <ftang@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#71267}
This commit is contained in:
parent
300573aca8
commit
b346af5424
@ -744,22 +744,12 @@ bool IsTwoLetterLanguage(const std::string& locale) {
|
||||
IsAsciiLower(locale[1]);
|
||||
}
|
||||
|
||||
bool IsDeprecatedLanguage(const std::string& locale) {
|
||||
bool IsDeprecatedOrLegacyLanguage(const std::string& locale) {
|
||||
// Check if locale is one of the deprecated language tags:
|
||||
return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
|
||||
locale == "mo";
|
||||
}
|
||||
|
||||
// Reference:
|
||||
// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
|
||||
bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
|
||||
if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
|
||||
if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
|
||||
V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
|
||||
return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
|
||||
locale.substr(2) == "mingo";
|
||||
}
|
||||
return false;
|
||||
locale == "mo" ||
|
||||
// Check if locale is one of the legacy language tags:
|
||||
locale == "sh" || locale == "tl" || locale == "no";
|
||||
}
|
||||
|
||||
bool IsStructurallyValidLanguageTag(const std::string& tag) {
|
||||
@ -788,7 +778,7 @@ Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
|
||||
// (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
|
||||
// codes. Instead, let them be handled by ICU in the slow path. However,
|
||||
// fast-track 'fil' (3-letter canonical code).
|
||||
if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
|
||||
if ((IsTwoLetterLanguage(locale) && !IsDeprecatedOrLegacyLanguage(locale)) ||
|
||||
locale == "fil") {
|
||||
return Just(locale);
|
||||
}
|
||||
@ -797,13 +787,6 @@ Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
|
||||
// the input before any more check.
|
||||
std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
|
||||
|
||||
// ICU maps a few grandfathered tags to what looks like a regular language
|
||||
// tag even though IANA language tag registry does not have a preferred
|
||||
// entry map for them. Return them as they're with lowercasing.
|
||||
if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
|
||||
return Just(locale);
|
||||
}
|
||||
|
||||
// // ECMA 402 6.2.3
|
||||
// TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
|
||||
// language tag if it's too long (much longer than 100 chars). Even if we
|
||||
@ -817,6 +800,32 @@ Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
|
||||
// is structurally valid. Due to a couple of bugs, we can't use it
|
||||
// without Chromium patches or ICU 62 or earlier.
|
||||
icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
|
||||
|
||||
if (U_FAILURE(error) || icu_locale.isBogus()) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(
|
||||
MessageTemplate::kInvalidLanguageTag,
|
||||
isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
|
||||
Nothing<std::string>());
|
||||
}
|
||||
|
||||
// reject attribute of wrong length.
|
||||
if (std::strstr(icu_locale.getName(), "attribute=") != nullptr) {
|
||||
std::string attribute =
|
||||
icu_locale.getKeywordValue<std::string>("attribute", error);
|
||||
if (U_SUCCESS(error) &&
|
||||
(attribute.length() < 3 || attribute.length() > 8)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(
|
||||
MessageTemplate::kInvalidLanguageTag,
|
||||
isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
|
||||
Nothing<std::string>());
|
||||
}
|
||||
}
|
||||
|
||||
icu_locale.canonicalize(error);
|
||||
if (U_FAILURE(error) || icu_locale.isBogus()) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
|
@ -104,6 +104,9 @@ Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
|
||||
if (value == "yes") {
|
||||
value = "true";
|
||||
}
|
||||
if (value == "true" && strcmp(key, "kf") == 0) {
|
||||
return isolate->factory()->NewStringFromStaticChars("");
|
||||
}
|
||||
return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
|
||||
}
|
||||
|
||||
@ -242,10 +245,12 @@ Maybe<bool> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
|
||||
return Just(false);
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
builder->build(status);
|
||||
icu::Locale canonicalized = builder->build(status);
|
||||
canonicalized.canonicalize(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return Just(false);
|
||||
}
|
||||
builder->setLocale(canonicalized);
|
||||
|
||||
// 3. Let language be ? GetOption(options, "language", "string", undefined,
|
||||
// undefined).
|
||||
@ -346,6 +351,9 @@ MaybeHandle<JSLocale> JSLocale::New(Isolate* isolate, Handle<Map> map,
|
||||
MAYBE_RETURN(maybe_insert, MaybeHandle<JSLocale>());
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::Locale icu_locale = builder.build(status);
|
||||
|
||||
icu_locale.canonicalize(status);
|
||||
|
||||
if (!maybe_insert.FromJust() || U_FAILURE(status)) {
|
||||
THROW_NEW_ERROR(isolate,
|
||||
NewRangeError(MessageTemplate::kLocaleBadParameters),
|
||||
|
@ -14,3 +14,15 @@ assertEquals("en-u-ca-gregory", Intl.getCanonicalLocales("en-u-ca-gregory-ca-chi
|
||||
|
||||
// Check duplicate subtags (after the first tag) are detected.
|
||||
assertThrows(() => Intl.getCanonicalLocales("en-foobar-foobar"), RangeError);
|
||||
|
||||
// Check some common case
|
||||
assertEquals("id", Intl.getCanonicalLocales("in")[0]);
|
||||
assertEquals("he", Intl.getCanonicalLocales("iw")[0]);
|
||||
assertEquals("yi", Intl.getCanonicalLocales("ji")[0]);
|
||||
assertEquals("jv", Intl.getCanonicalLocales("jw")[0]);
|
||||
assertEquals("ro", Intl.getCanonicalLocales("mo")[0]);
|
||||
assertEquals("sr", Intl.getCanonicalLocales("scc")[0]);
|
||||
assertEquals("sr-Latn", Intl.getCanonicalLocales("sh")[0]);
|
||||
assertEquals("sr-ME", Intl.getCanonicalLocales("cnr")[0]);
|
||||
assertEquals("nb", Intl.getCanonicalLocales("no")[0]);
|
||||
assertEquals("fil", Intl.getCanonicalLocales("tl")[0]);
|
||||
|
@ -7,7 +7,7 @@
|
||||
// tag registry. Nonetheless, ICU cooks up a value when canonicalizing.
|
||||
// v8 works around that ICU issue.
|
||||
// See https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
||||
["cel-gaulish", "cel-gaulish"],
|
||||
["cel-gaulish", "xtg-x-cel-gaulish"],
|
||||
|
||||
// Matching should be case-insensitive.
|
||||
].forEach(([inputLocale, expectedLocale]) => {
|
||||
|
19
test/intl/locale/locale.js
Normal file
19
test/intl/locale/locale.js
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
assertEquals("id", (new Intl.Locale("in")).toString());
|
||||
assertEquals("he", (new Intl.Locale("iw")).toString());
|
||||
assertEquals("yi", (new Intl.Locale("ji")).toString());
|
||||
assertEquals("jv", (new Intl.Locale("jw")).toString());
|
||||
assertEquals("ro", (new Intl.Locale("mo")).toString());
|
||||
assertEquals("sr", (new Intl.Locale("scc")).toString());
|
||||
assertEquals("hr", (new Intl.Locale("scr")).toString());
|
||||
|
||||
assertEquals("sr-Latn", (new Intl.Locale("sh")).toString());
|
||||
assertEquals("sr-ME", (new Intl.Locale("cnr")).toString());
|
||||
assertEquals("nb", (new Intl.Locale("no")).toString());
|
||||
assertEquals("fil", (new Intl.Locale("tl")).toString());
|
||||
|
||||
assertEquals("hy-AM", (new Intl.Locale("hy-SU")).toString());
|
||||
assertEquals("lv-LV", (new Intl.Locale("lv-SU")).toString());
|
@ -546,29 +546,22 @@
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=9049
|
||||
'language/comments/hashbang/use-strict': [SKIP],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=9613
|
||||
# https://github.com/tc39/test262/pull/2903
|
||||
'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/grandfathered': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/preferred-grandfathered': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/preferred-variant': [FAIL],
|
||||
'intl402/Locale/constructor-apply-options-canonicalizes-twice': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=9613
|
||||
'intl402/Locale/likely-subtags-grandfathered': [FAIL],
|
||||
|
||||
# http://crbug/v8/11039
|
||||
'intl402/Locale/reject-duplicate-variants-in-tlang': [FAIL],
|
||||
|
||||
# http://crbug/v8/10447
|
||||
'intl402/Intl/getCanonicalLocales/complex-language-subtag-replacement': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/complex-region-subtag-replacement': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/transformed-ext-canonical': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/transformed-ext-invalid': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true': [FAIL],
|
||||
'intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=9742
|
||||
'intl402/Locale/getters': [FAIL],
|
||||
|
||||
# https://github.com/tc39/test262/pull/2349
|
||||
'intl402/Locale/constructor-options-region-valid': [FAIL],
|
||||
|
||||
# http://crbug/v8/11174
|
||||
'intl402/DateTimeFormat/intl-legacy-constructed-symbol': [FAIL],
|
||||
@ -653,9 +646,6 @@
|
||||
'built-ins/TypedArray/prototype/item/returns-undefined-for-holes-in-sparse-arrays': [FAIL],
|
||||
'built-ins/TypedArray/prototype/item/returns-undefined-for-out-of-range-index': [FAIL],
|
||||
|
||||
# http://crbug/v8/11039
|
||||
'intl402/Locale/reject-duplicate-variants-in-tlang': [FAIL],
|
||||
|
||||
######################## NEEDS INVESTIGATION ###########################
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=7833
|
||||
|
Loading…
Reference in New Issue
Block a user