diff --git a/src/objects-debug.cc b/src/objects-debug.cc index a6414b7dd9..92bc10be78 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -2056,6 +2056,7 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) { JSObjectVerify(isolate); VerifyObjectField(isolate, kICUCollatorOffset); VerifyObjectField(isolate, kBoundCompareOffset); + VerifyObjectField(isolate, kLocaleOffset); } void JSDateTimeFormat::JSDateTimeFormatVerify(Isolate* isolate) { diff --git a/src/objects-printer.cc b/src/objects-printer.cc index f1acb30e99..1a1df5e19d 100644 --- a/src/objects-printer.cc +++ b/src/objects-printer.cc @@ -2028,6 +2028,7 @@ void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT JSObjectPrintHeader(os, *this, "JSCollator"); os << "\n - icu collator: " << Brief(icu_collator()); os << "\n - bound compare: " << Brief(bound_compare()); + os << "\n - locale: " << Brief(locale()); JSObjectPrintBody(os, *this); } diff --git a/src/objects/intl-objects.cc b/src/objects/intl-objects.cc index d4251afa1f..8433fb0de1 100644 --- a/src/objects/intl-objects.cc +++ b/src/objects/intl-objects.cc @@ -26,6 +26,7 @@ #include "src/string-case.h" #include "unicode/basictz.h" #include "unicode/brkiter.h" +#include "unicode/calendar.h" #include "unicode/coll.h" #include "unicode/decimfmt.h" #include "unicode/locid.h" @@ -511,24 +512,12 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale, std::set Intl::BuildLocaleSet( const icu::Locale* icu_available_locales, int32_t count) { std::set locales; - UErrorCode error = U_ZERO_ERROR; - char result[ULOC_FULLNAME_CAPACITY]; - for (int32_t i = 0; i < count; ++i) { - const char* icu_name = icu_available_locales[i].getName(); - - error = U_ZERO_ERROR; - // No need to force strict BCP47 rules. - uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error); - if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) { - // This shouldn't happen, but lets not break the user. - continue; - } - std::string locale(result); + std::string locale = Intl::ToLanguageTag(icu_available_locales[i]); locales.insert(locale); std::string shortened_locale; - if (RemoveLocaleScriptTag(icu_name, &shortened_locale)) { + if (RemoveLocaleScriptTag(locale, &shortened_locale)) { std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-'); locales.insert(shortened_locale); } @@ -537,6 +526,27 @@ std::set Intl::BuildLocaleSet( return locales; } +std::string Intl::ToLanguageTag(const icu::Locale& locale) { + UErrorCode status = U_ZERO_ERROR; + std::string res = locale.toLanguageTag(status); + CHECK(U_SUCCESS(status)); + + // Hack to remove -true from unicode extensions + // Address https://crbug.com/v8/8565 + // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag + // by fixing ICU-20310. + const char* kSepTrue = "-true"; + size_t u_ext_start = res.find("-u-"); + if (u_ext_start != std::string::npos) { + for (size_t sep_true = + res.find(kSepTrue, u_ext_start + 5 /* strlen("-u-xx") == 5 */); + sep_true != std::string::npos; sep_true = res.find(kSepTrue)) { + res.erase(sep_true, 5 /* strlen(kSepTrue) == 5 */); + } + } + return res; +} + namespace { std::string DefaultLocale(Isolate* isolate) { if (isolate->default_locale().empty()) { @@ -546,13 +556,9 @@ std::string DefaultLocale(Isolate* isolate) { isolate->set_default_locale("en-US"); } else { // Set the locale - char result[ULOC_FULLNAME_CAPACITY]; - UErrorCode status = U_ZERO_ERROR; - int32_t length = - uloc_toLanguageTag(default_locale.getName(), result, - ULOC_FULLNAME_CAPACITY, FALSE, &status); - isolate->set_default_locale( - U_SUCCESS(status) ? std::string(result, length) : "und"); + isolate->set_default_locale(default_locale.isBogus() + ? "und" + : Intl::ToLanguageTag(default_locale)); } DCHECK(!isolate->default_locale().empty()); } @@ -758,29 +764,12 @@ Maybe Intl::CanonicalizeLanguageTag(Isolate* isolate, // propose to Ecma 402 to put a limit on the locale length or change ICU to // handle long locale names better. See // https://unicode-org.atlassian.net/browse/ICU-13417 - UErrorCode error = U_ZERO_ERROR; - char icu_result[ULOC_FULLNAME_CAPACITY]; - // uloc_forLanguageTag checks the structrual validity. If the input BCP47 + // forLanguageTag checks the structrual validity. If the input BCP47 // language tag is parsed all the way to the end, it indicates that the input // is structurally valid. Due to a couple of bugs, we can't use it // without Chromium patches or ICU 62 or earlier. - int parsed_length; - uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY, - &parsed_length, &error); - if (U_FAILURE(error) || - static_cast(parsed_length) < locale.length() || - error == U_STRING_NOT_TERMINATED_WARNING) { - THROW_NEW_ERROR_RETURN_VALUE( - isolate, - NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str), - Nothing()); - } - - // Force strict BCP47 rules. - char result[ULOC_FULLNAME_CAPACITY]; - int32_t result_len = uloc_toLanguageTag(icu_result, result, - ULOC_FULLNAME_CAPACITY, TRUE, &error); - + UErrorCode error = U_ZERO_ERROR; + icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error); if (U_FAILURE(error)) { THROW_NEW_ERROR_RETURN_VALUE( isolate, @@ -788,7 +777,7 @@ Maybe Intl::CanonicalizeLanguageTag(Isolate* isolate, Nothing()); } - return Just(std::string(result, result_len)); + return Just(Intl::ToLanguageTag(icu_locale)); } Maybe> Intl::CanonicalizeLocaleList( @@ -1454,6 +1443,44 @@ MaybeHandle Intl::SupportedLocalesOf( } namespace { +template +bool IsValidExtension(const icu::Locale& locale, const char* key, + const std::string& value) { + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr enumeration( + T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()), + false, status)); + if (U_SUCCESS(status)) { + int32_t length; + std::string legacy_type(uloc_toLegacyType(key, value.c_str())); + for (const char* item = enumeration->next(&length, status); item != nullptr; + item = enumeration->next(&length, status)) { + if (U_SUCCESS(status) && legacy_type == item) { + return true; + } + } + } + return false; +} + +bool IsValidCalendar(const icu::Locale& locale, const std::string& value) { + return IsValidExtension(locale, "calendar", value); +} + +bool IsValidCollation(const icu::Locale& locale, const std::string& value) { + std::set invalid_values = {"standard", "search"}; + if (invalid_values.find(value) != invalid_values.end()) return false; + return IsValidExtension(locale, "collation", value); +} + +bool IsValidNumberingSystem(const std::string& value) { + std::set invalid_values = {"native", "traditio", "finance"}; + if (invalid_values.find(value) != invalid_values.end()) return false; + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr numbering_system( + icu::NumberingSystem::createInstanceByName(value.c_str(), status)); + return U_SUCCESS(status) && numbering_system.get() != nullptr; +} std::map LookupAndValidateUnicodeExtensions( icu::Locale* icu_locale, const std::set& relevant_keys) { @@ -1493,13 +1520,40 @@ std::map LookupAndValidateUnicodeExtensions( if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); - extensions.insert( - std::pair(bcp47_key, bcp47_value)); - } else { - status = U_ZERO_ERROR; - icu_locale->setKeywordValue(keyword, nullptr, status); - CHECK(U_SUCCESS(status)); + bool is_valid_value = false; + // 8.h.ii.1.a If keyLocaleData contains requestedValue, then + if (strcmp("ca", bcp47_key) == 0) { + is_valid_value = IsValidCalendar(*icu_locale, bcp47_value); + } else if (strcmp("co", bcp47_key) == 0) { + is_valid_value = IsValidCollation(*icu_locale, bcp47_value); + } else if (strcmp("hc", bcp47_key) == 0) { + // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml + std::set valid_values = {"h11", "h12", "h23", "h24"}; + is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); + } else if (strcmp("lb", bcp47_key) == 0) { + // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml + std::set valid_values = {"strict", "normal", "loose"}; + is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); + } else if (strcmp("kn", bcp47_key) == 0) { + // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml + std::set valid_values = {"true", "false"}; + is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); + } else if (strcmp("kf", bcp47_key) == 0) { + // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml + std::set valid_values = {"upper", "lower", "false"}; + is_valid_value = valid_values.find(bcp47_value) != valid_values.end(); + } else if (strcmp("nu", bcp47_key) == 0) { + is_valid_value = IsValidNumberingSystem(bcp47_value); + } + if (is_valid_value) { + extensions.insert( + std::pair(bcp47_key, bcp47_value)); + continue; + } } + status = U_ZERO_ERROR; + icu_locale->setKeywordValue(keyword, nullptr, status); + CHECK(U_SUCCESS(status)); } return extensions; @@ -1581,11 +1635,7 @@ Intl::ResolvedLocale Intl::ResolveLocale( std::map extensions = LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys); - char canonicalized_locale[ULOC_FULLNAME_CAPACITY]; - UErrorCode status = U_ZERO_ERROR; - uloc_toLanguageTag(icu_locale.getName(), canonicalized_locale, - ULOC_FULLNAME_CAPACITY, true, &status); - CHECK(U_SUCCESS(status)); + std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale); // TODO(gsathya): Remove privateuse subtags from extensions. diff --git a/src/objects/intl-objects.h b/src/objects/intl-objects.h index 804f676eb5..647a7e529d 100644 --- a/src/objects/intl-objects.h +++ b/src/objects/intl-objects.h @@ -51,6 +51,8 @@ class Intl { static std::set BuildLocaleSet( const icu::Locale* icu_available_locales, int32_t count); + static std::string ToLanguageTag(const icu::Locale& locale); + // Get the name of the numbering system from locale. // ICU doesn't expose numbering system in any way, so we have to assume that // for given locale NumberingSystem constructor produces the same digits as diff --git a/src/objects/js-collator-inl.h b/src/objects/js-collator-inl.h index be02d9a92a..8862c77c0f 100644 --- a/src/objects/js-collator-inl.h +++ b/src/objects/js-collator-inl.h @@ -22,6 +22,7 @@ OBJECT_CONSTRUCTORS_IMPL(JSCollator, JSObject) ACCESSORS(JSCollator, icu_collator, Managed, kICUCollatorOffset) ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset); +ACCESSORS2(JSCollator, locale, String, kLocaleOffset) CAST_ACCESSOR2(JSCollator); diff --git a/src/objects/js-collator.cc b/src/objects/js-collator.cc index dff91b19a5..595f7f1d31 100644 --- a/src/objects/js-collator.cc +++ b/src/objects/js-collator.cc @@ -60,13 +60,6 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle options, .FromJust()); } -void toLanguageTag(const icu::Locale& locale, char* tag) { - UErrorCode status = U_ZERO_ERROR; - uloc_toLanguageTag(locale.getName(), tag, ULOC_FULLNAME_CAPACITY, FALSE, - &status); - CHECK(U_SUCCESS(status)); -} - } // anonymous namespace // static @@ -135,6 +128,8 @@ Handle JSCollator::ResolvedOptions(Isolate* isolate, status = U_ZERO_ERROR; + Handle locale = Handle(collator->locale(), isolate); + icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status)); CHECK(U_SUCCESS(status)); @@ -144,7 +139,6 @@ Handle JSCollator::ResolvedOptions(Isolate* isolate, const char* legacy_collation_key = uloc_toLegacyKey(collation_key); DCHECK_NOT_NULL(legacy_collation_key); - char bcp47_locale_tag[ULOC_FULLNAME_CAPACITY]; char legacy_collation_value[ULOC_FULLNAME_CAPACITY]; status = U_ZERO_ERROR; int32_t length = @@ -152,37 +146,13 @@ Handle JSCollator::ResolvedOptions(Isolate* isolate, ULOC_FULLNAME_CAPACITY, status); if (length > 0 && U_SUCCESS(status)) { - const char* collation_value = - uloc_toUnicodeLocaleType(collation_key, legacy_collation_value); - CHECK_NOT_NULL(collation_value); + collation = uloc_toUnicodeLocaleType(collation_key, legacy_collation_value); + CHECK_NOT_NULL(collation); - if (strcmp(collation_value, "search") == 0) { + if (strcmp(collation, "search") == 0) { usage = "search"; - - // Search is disallowed as a collation value per spec. Let's - // use `default`, instead. - // - // https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances collation = "default"; - - // We clone the icu::Locale because we don't want the - // icu_collator to be affected when we remove the collation key - // below. - icu::Locale new_icu_locale = icu_locale; - - // The spec forbids the search as a collation value in the - // locale tag, so let's filter it out. - status = U_ZERO_ERROR; - new_icu_locale.setKeywordValue(legacy_collation_key, nullptr, status); - CHECK(U_SUCCESS(status)); - - toLanguageTag(new_icu_locale, bcp47_locale_tag); - } else { - collation = collation_value; - toLanguageTag(icu_locale, bcp47_locale_tag); } - } else { - toLanguageTag(icu_locale, bcp47_locale_tag); } // 5. For each row of Table 2, except the header row, in table order, do @@ -196,8 +166,11 @@ Handle JSCollator::ResolvedOptions(Isolate* isolate, // [[Collation]] "collation" // [[Numeric]] "numeric" kn // [[CaseFirst]] "caseFirst" kf - CreateDataPropertyForOptions( - isolate, options, isolate->factory()->locale_string(), bcp47_locale_tag); + CHECK(JSReceiver::CreateDataProperty(isolate, options, + isolate->factory()->locale_string(), + locale, kDontThrow) + .FromJust()); + CreateDataPropertyForOptions(isolate, options, isolate->factory()->usage_string(), usage); CreateDataPropertyForOptions( @@ -235,6 +208,14 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) { } } +void SetNumericOption(icu::Collator* icu_collator, bool numeric) { + CHECK_NOT_NULL(icu_collator); + UErrorCode status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, + numeric ? UCOL_ON : UCOL_OFF, status); + CHECK(U_SUCCESS(status)); +} + void SetCaseFirstOption(icu::Collator* icu_collator, Intl::CaseFirst case_first) { CHECK_NOT_NULL(icu_collator); @@ -325,32 +306,14 @@ MaybeHandle JSCollator::Initialize(Isolate* isolate, requested_locales, matcher, relevant_extension_keys); // 18. Set collator.[[Locale]] to r.[[locale]]. + Handle locale_str = + isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); + collator->set_locale(*locale_str); + icu::Locale icu_locale = r.icu_locale; DCHECK(!icu_locale.isBogus()); - std::map extensions = r.extensions; - // 19. Let collation be r.[[co]]. - // - // r.[[co]] is already set as part of the icu::Locale creation as - // icu parses unicode extensions and sets the keywords. - // - // We need to sanitize the keywords based on certain ECMAScript rules. - // - // As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots: - // The values "standard" and "search" must not be used as elements - // in any [[SortLocaleData]][locale].co and - // [[SearchLocaleData]][locale].co list. - auto co_extension_it = extensions.find("co"); - if (co_extension_it != extensions.end()) { - const std::string& value = co_extension_it->second; - if ((value == "search") || (value == "standard")) { - UErrorCode status = U_ZERO_ERROR; - const char* key = uloc_toLegacyKey("co"); - icu_locale.setKeywordValue(key, nullptr, status); - CHECK(U_SUCCESS(status)); - } - } // 5. Set collator.[[Usage]] to usage. // @@ -410,19 +373,11 @@ MaybeHandle JSCollator::Initialize(Isolate* isolate, // passed in through the unicode extensions. status = U_ZERO_ERROR; if (found_numeric.FromJust()) { - icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, - numeric ? UCOL_ON : UCOL_OFF, status); - CHECK(U_SUCCESS(status)); + SetNumericOption(icu_collator.get(), numeric); } else { - auto kn_extension_it = extensions.find("kn"); - if (kn_extension_it != extensions.end()) { - const std::string& value = kn_extension_it->second; - - numeric = (value == "true"); - - icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, - numeric ? UCOL_ON : UCOL_OFF, status); - CHECK(U_SUCCESS(status)); + auto kn_extension_it = r.extensions.find("kn"); + if (kn_extension_it != r.extensions.end()) { + SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true")); } } @@ -435,10 +390,10 @@ MaybeHandle JSCollator::Initialize(Isolate* isolate, if (case_first != Intl::CaseFirst::kUndefined) { SetCaseFirstOption(icu_collator.get(), case_first); } else { - auto kf_extension_it = extensions.find("kf"); - if (kf_extension_it != extensions.end()) { - const std::string& value = kf_extension_it->second; - SetCaseFirstOption(icu_collator.get(), ToCaseFirst(value.c_str())); + auto kf_extension_it = r.extensions.find("kf"); + if (kf_extension_it != r.extensions.end()) { + SetCaseFirstOption(icu_collator.get(), + ToCaseFirst(kf_extension_it->second.c_str())); } } diff --git a/src/objects/js-collator.h b/src/objects/js-collator.h index 1031012953..884d422cca 100644 --- a/src/objects/js-collator.h +++ b/src/objects/js-collator.h @@ -50,6 +50,7 @@ class JSCollator : public JSObject { #define JS_COLLATOR_FIELDS(V) \ V(kICUCollatorOffset, kTaggedSize) \ V(kBoundCompareOffset, kTaggedSize) \ + V(kLocaleOffset, kTaggedSize) \ /* Total size. */ \ V(kSize, 0) @@ -58,6 +59,7 @@ class JSCollator : public JSObject { DECL_ACCESSORS(icu_collator, Managed) DECL_ACCESSORS(bound_compare, Object); + DECL_ACCESSORS2(locale, String) OBJECT_CONSTRUCTORS(JSCollator, JSObject); }; diff --git a/src/objects/js-date-time-format.cc b/src/objects/js-date-time-format.cc index c044147fb0..1d50d6a88d 100644 --- a/src/objects/js-date-time-format.cc +++ b/src/objects/js-date-time-format.cc @@ -846,10 +846,7 @@ MaybeHandle JSDateTimeFormat::Initialize( // ecma402/#sec-intl.datetimeformat-internal-slots // The value of the [[RelevantExtensionKeys]] internal slot is // « "ca", "nu", "hc" ». - // - // TODO(ftang): Add "hc" to this list of keys: - // https://bugs.chromium.org/p/v8/issues/detail?id=7482 - std::set relevant_extension_keys = {"nu", "ca"}; + std::set relevant_extension_keys = {"nu", "ca", "hc"}; // 10. Let localeData be %DateTimeFormat%.[[LocaleData]]. // 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]], @@ -860,8 +857,6 @@ MaybeHandle JSDateTimeFormat::Initialize( isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales, locale_matcher, relevant_extension_keys); - // TODO(ftang): Make sure that "nu" key doesn't have "native", - // "traditio" or "finance" values. icu::Locale icu_locale = r.icu_locale; DCHECK(!icu_locale.isBogus()); diff --git a/src/objects/js-number-format.cc b/src/objects/js-number-format.cc index 47d5b71e8d..646cbed8e7 100644 --- a/src/objects/js-number-format.cc +++ b/src/objects/js-number-format.cc @@ -263,26 +263,6 @@ MaybeHandle JSNumberFormat::Initialize( isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); number_format->set_locale(*locale_str); - icu::Locale icu_locale = r.icu_locale; - DCHECK(!icu_locale.isBogus()); - - std::map extensions = r.extensions; - - // The list that is the value of the "nu" field of any locale field of - // [[LocaleData]] must not include the values "native", "traditio", or - // "finance". - // - // See https://tc39.github.io/ecma402/#sec-intl.numberformat-internal-slots - if (extensions.find("nu") != extensions.end()) { - const std::string value = extensions.at("nu"); - if (value == "native" || value == "traditio" || value == "finance") { - // 10. Set numberFormat.[[NumberingSystem]] to r.[[nu]]. - UErrorCode status = U_ZERO_ERROR; - icu_locale.setKeywordValue("nu", nullptr, status); - CHECK(U_SUCCESS(status)); - } - } - // 11. Let dataLocale be r.[[dataLocale]]. // // 12. Let style be ? GetOption(options, "style", "string", « "decimal", @@ -356,20 +336,20 @@ MaybeHandle JSNumberFormat::Initialize( std::unique_ptr icu_number_format; if (style == Style::DECIMAL) { icu_number_format.reset( - icu::NumberFormat::createInstance(icu_locale, status)); + icu::NumberFormat::createInstance(r.icu_locale, status)); } else if (style == Style::PERCENT) { icu_number_format.reset( - icu::NumberFormat::createPercentInstance(icu_locale, status)); + icu::NumberFormat::createPercentInstance(r.icu_locale, status)); } else { DCHECK_EQ(style, Style::CURRENCY); icu_number_format.reset( - icu::NumberFormat::createInstance(icu_locale, format_style, status)); + icu::NumberFormat::createInstance(r.icu_locale, format_style, status)); } if (U_FAILURE(status) || icu_number_format.get() == nullptr) { status = U_ZERO_ERROR; // Remove extensions and try again. - icu::Locale no_extension_locale(icu_locale.getBaseName()); + icu::Locale no_extension_locale(r.icu_locale.getBaseName()); icu_number_format.reset( icu::NumberFormat::createInstance(no_extension_locale, status)); diff --git a/src/objects/js-plural-rules.cc b/src/objects/js-plural-rules.cc index 49f1129d25..59b52424ef 100644 --- a/src/objects/js-plural-rules.cc +++ b/src/objects/js-plural-rules.cc @@ -151,12 +151,6 @@ MaybeHandle JSPluralRules::Initialize( Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(), requested_locales, matcher, {}); - // 18. Set collator.[[Locale]] to r.[[locale]]. - icu::Locale icu_locale = r.icu_locale; - DCHECK(!icu_locale.isBogus()); - - std::map extensions = r.extensions; - // 12. Set pluralRules.[[Locale]] to the value of r.[[locale]]. Handle locale_str = isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); @@ -164,7 +158,7 @@ MaybeHandle JSPluralRules::Initialize( std::unique_ptr icu_plural_rules; std::unique_ptr icu_decimal_format; - InitializeICUPluralRules(isolate, icu_locale, type, &icu_plural_rules, + InitializeICUPluralRules(isolate, r.icu_locale, type, &icu_plural_rules, &icu_decimal_format); CHECK_NOT_NULL(icu_plural_rules.get()); CHECK_NOT_NULL(icu_decimal_format.get()); diff --git a/src/objects/js-segmenter.cc b/src/objects/js-segmenter.cc index 22083f098f..ca1b85ee1b 100644 --- a/src/objects/js-segmenter.cc +++ b/src/objects/js-segmenter.cc @@ -78,7 +78,7 @@ MaybeHandle JSSegmenter::Initialize( // requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]). Intl::ResolvedLocale r = Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(), - requested_locales, matcher, {}); + requested_locales, matcher, {"lb"}); // 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", « // "strict", "normal", "loose" », "normal"). diff --git a/test/intl/collator/check-co-option.js b/test/intl/collator/check-co-option.js new file mode 100644 index 0000000000..477d00a045 --- /dev/null +++ b/test/intl/collator/check-co-option.js @@ -0,0 +1,33 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +let invalid_co = [ + "invalid", + "search", + "standard", + "abce", +]; + +let valid_locales = [ + "zh-u-co-zhuyin", + "zh-u-co-stroke", + "ar-u-co-compat", + "en-u-co-emoji", + "en-u-co-eor", + "zh-Hant-u-co-pinyin", + "ko-u-co-searchjl", + "ja-u-co-unihan", +]; + +invalid_co.forEach(function(co) { + let col = new Intl.Collator(["en-u-co-" + co]); + assertEquals("en", col.resolvedOptions().locale); +} +); + +valid_locales.forEach(function(l) { + let col = new Intl.Collator([l + "-fo-obar"]); + assertEquals(l, col.resolvedOptions().locale); +} +); diff --git a/test/intl/collator/check-kf-option.js b/test/intl/collator/check-kf-option.js new file mode 100644 index 0000000000..45085c667e --- /dev/null +++ b/test/intl/collator/check-kf-option.js @@ -0,0 +1,36 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +let invalid_kf = [ + "invalid", + "abce", + "none", + "true", +]; + +let valid_kf= [ + "false", + "upper", + "lower", +]; + +let locales = [ + "en", + "fr", +]; + +invalid_kf.forEach(function(kf) { + let col = new Intl.Collator(["en-u-kf-" + kf + "-fo-obar"]); + assertEquals("en", col.resolvedOptions().locale); +} +); + +valid_kf.forEach(function(kf) { + locales.forEach(function(base) { + let l = base + "-u-kf-" + kf; + let col = new Intl.Collator([l + "-fo-obar"]); + assertEquals(l, col.resolvedOptions().locale); + }); +} +); diff --git a/test/intl/collator/check-kn-option.js b/test/intl/collator/check-kn-option.js new file mode 100644 index 0000000000..0e3a82fe26 --- /dev/null +++ b/test/intl/collator/check-kn-option.js @@ -0,0 +1,29 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +let invalid_kn = [ + "invalid", + "search", + "standard", + "abce", +]; + +let valid_kn = [ + ["en-u-kn", true, "en-u-kn"], + ["en-u-kn-true", true, "en-u-kn"], + ["en-u-kn-false",false, "en-u-kn-false"], +]; + +invalid_kn.forEach(function(kn) { + let col = new Intl.Collator(["en-u-kn-" + kn]); + assertEquals("en", col.resolvedOptions().locale); +} +); + +valid_kn.forEach(function(l) { + let col = new Intl.Collator([l[0] + "-fo-obar"]); + assertEquals(l[1], col.resolvedOptions().numeric); + assertEquals(l[2], col.resolvedOptions().locale); +} +); diff --git a/test/intl/date-format/check-ca-option.js b/test/intl/date-format/check-ca-option.js new file mode 100644 index 0000000000..d27ae44b48 --- /dev/null +++ b/test/intl/date-format/check-ca-option.js @@ -0,0 +1,51 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +let invalid_ca = [ + "invalid", + "abce", +]; + +// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml +let valid_ca= [ + "buddhist", + "chinese", + "coptic", + "dangi", + "ethioaa", + "ethiopic", + "gregory", + "hebrew", + "indian", + "islamic", + "islamic-umalqura", + "islamic-tbla", + "islamic-civil", + "islamic-rgsa", + "iso8601", + "japanese", + "persian", + "roc", +]; + +let locales = [ + "en", + "ar", +]; + +invalid_ca.forEach(function(ca) { + let df = new Intl.DateTimeFormat(["en-u-ca-" + ca + "-fo-obar"]); + assertEquals("en", df.resolvedOptions().locale); +} +); + +valid_ca.forEach(function(ca) { + locales.forEach(function(base) { + let l = base + "-u-ca-" + ca; + let df = new Intl.DateTimeFormat([l + "-fo-obar"]); + assertEquals(l, df.resolvedOptions().locale); + }); +} +); diff --git a/test/intl/date-format/check-nu-option.js b/test/intl/date-format/check-nu-option.js new file mode 100644 index 0000000000..7d4b4dc927 --- /dev/null +++ b/test/intl/date-format/check-nu-option.js @@ -0,0 +1,59 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +let invalid_nu = [ + "invalid", + "abce", + "finance", + "native", + "traditio", +]; + +// https://tc39.github.io/ecma402/#table-numbering-system-digits +let valid_nu= [ + "arab", + "arabext", + "bali", + "beng", + "deva", + "fullwide", + "gujr", + "guru", + "hanidec", + "khmr", + "knda", + "laoo", + "latn", + "limb", + "mlym", + "mong", + "mymr", + "orya", + "tamldec", + "telu", + "thai", + "tibt", +]; + +let locales = [ + "en", + "ar", +]; + + +invalid_nu.forEach(function(nu) { + let df = new Intl.DateTimeFormat(["en-u-nu-" + nu + "-fo-obar"]); + assertEquals("en", df.resolvedOptions().locale); +} +); + +valid_nu.forEach(function(nu) { + locales.forEach(function(base) { + let l = base + "-u-nu-" + nu; + let df = new Intl.DateTimeFormat([l + "-fo-obar"]); + assertEquals(l, df.resolvedOptions().locale); + }); +} +); diff --git a/test/intl/number-format/check-nu-option.js b/test/intl/number-format/check-nu-option.js new file mode 100644 index 0000000000..39c4cbb8cf --- /dev/null +++ b/test/intl/number-format/check-nu-option.js @@ -0,0 +1,59 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +let invalid_nu = [ + "invalid", + "abce", + "finance", + "native", + "traditio", +]; + +// https://tc39.github.io/ecma402/#table-numbering-system-digits +let valid_nu= [ + "arab", + "arabext", + "bali", + "beng", + "deva", + "fullwide", + "gujr", + "guru", + "hanidec", + "khmr", + "knda", + "laoo", + "latn", + "limb", + "mlym", + "mong", + "mymr", + "orya", + "tamldec", + "telu", + "thai", + "tibt", +]; + +let locales = [ + "en", + "ar", +]; + + +invalid_nu.forEach(function(nu) { + let nf = new Intl.NumberFormat(["en-u-nu-" + nu + "-fo-obar"]); + assertEquals("en", nf.resolvedOptions().locale); +} +); + +valid_nu.forEach(function(nu) { + locales.forEach(function(base) { + let l = base + "-u-nu-" + nu; + let nf = new Intl.NumberFormat([l + "-fo-obar"]); + assertEquals(l, nf.resolvedOptions().locale); + }); +} +); diff --git a/test/intl/regress-7481.js b/test/intl/regress-7481.js new file mode 100644 index 0000000000..c3441e35cb --- /dev/null +++ b/test/intl/regress-7481.js @@ -0,0 +1,39 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +assertEquals( + "en-u-hc-h11-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h11-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-hc-h12-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h12-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-hc-h23-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h23-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-hc-h24-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h24-nu-arab"]).resolvedOptions().locale +); + +// https://tc39.github.io/ecma402/#sec-intl.datetimeformat-internal-slots +// invalid hc should be removed +// [[LocaleData]][locale].hc must be « null, "h11", "h12", "h23", "h24" » for all locale values. +assertEquals( + "en-u-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h10-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h13-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h22-nu-arab"]).resolvedOptions().locale +); +assertEquals( + "en-u-nu-arab", + new Intl.DateTimeFormat(["en-u-hc-h25-nu-arab"]).resolvedOptions().locale +); diff --git a/test/intl/segmenter/check-lb-option.js b/test/intl/segmenter/check-lb-option.js new file mode 100644 index 0000000000..b56b76fc95 --- /dev/null +++ b/test/intl/segmenter/check-lb-option.js @@ -0,0 +1,41 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --harmony-intl-segmenter + +let invalid_lb = [ + "invalid", + "abce", + "breakall", + "keepall", + "none", + "standard", +]; + +let valid_lb= [ + "strict", + "normal", + "loose", +]; + +let locales = [ + "en", + "ja", + "zh", +]; + +invalid_lb.forEach(function(lb) { + let df = new Intl.Segmenter(["en-u-lb-" + lb + "-fo-obar"]); + assertEquals("en", df.resolvedOptions().locale); +} +); + +valid_lb.forEach(function(lb) { + locales.forEach(function(base) { + let l = base + "-u-lb-" + lb; + let df = new Intl.Segmenter([l + "-fo-obar"]); + assertEquals(l, df.resolvedOptions().locale); + }); +} +); diff --git a/test/mjsunit/regress/regress-6288.js b/test/mjsunit/regress/regress-6288.js index 5f550c31c8..96499d9378 100644 --- a/test/mjsunit/regress/regress-6288.js +++ b/test/mjsunit/regress/regress-6288.js @@ -4,10 +4,7 @@ // Environment Variables: LC_ALL=pt-BR.UTF8 -// The data files packaged with d8 currently have Brazillian Portuguese -// DateTimeFormat but not Collation - if (this.Intl) { - assertEquals('pt', Intl.Collator().resolvedOptions().locale); + assertEquals('pt-BR', Intl.Collator().resolvedOptions().locale); assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale); } diff --git a/test/test262/test262.status b/test/test262/test262.status index 4f515c7ca4..fde85c828a 100644 --- a/test/test262/test262.status +++ b/test/test262/test262.status @@ -586,8 +586,7 @@ 'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL], - # https://bugs.chromium.org/p/v8/issues/detail?id=7481 - 'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL], + # https://bugs.chromium.org/p/v8/issues/detail?id=8469 'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL], # https://bugs.chromium.org/p/v8/issues/detail?id=7482 @@ -604,7 +603,9 @@ # https://crbug.com/v8/7808 'intl402/String/prototype/localeCompare/returns-same-results-as-Collator': [SKIP], 'intl402/Collator/prototype/compare/bound-to-collator-instance': [SKIP], - 'intl402/Collator/ignore-invalid-unicode-ext-values': [SKIP], + + # https://github.com/tc39/ecma402/issues/223 + 'intl402/Collator/missing-unicode-ext-value-defaults-to-true': [FAIL], # https://bugs.chromium.org/p/v8/issues/detail?id=8260 'intl402/Locale/constructor-non-iana-canon': [FAIL],