Implement the localeMatcher: "best fit"
Implement the ECMA402 localeMatcher: "best fit" option by using ICU LocaleMatcher API. Bug: v8:7051 Change-Id: I3d7c1ee39a5c649a5f500429f2b41329346a1a78 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1943050 Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Frank Tang <ftang@chromium.org> Cr-Commit-Position: refs/heads/master@{#65507}
This commit is contained in:
parent
652108e860
commit
565c4fee1d
@ -34,6 +34,7 @@
|
||||
#include "unicode/decimfmt.h"
|
||||
#include "unicode/formattedvalue.h"
|
||||
#include "unicode/localebuilder.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/numberformatter.h"
|
||||
@ -1440,12 +1441,114 @@ std::vector<std::string> LookupSupportedLocales(
|
||||
return subset;
|
||||
}
|
||||
|
||||
icu::LocaleMatcher BuildLocaleMatcher(
|
||||
Isolate* isolate, const std::set<std::string>& available_locales,
|
||||
UErrorCode* status) {
|
||||
icu::Locale default_locale =
|
||||
icu::Locale::forLanguageTag(DefaultLocale(isolate), *status);
|
||||
CHECK(U_SUCCESS(*status));
|
||||
icu::LocaleMatcher::Builder builder;
|
||||
builder.setDefaultLocale(&default_locale);
|
||||
for (auto it = available_locales.begin(); it != available_locales.end();
|
||||
++it) {
|
||||
builder.addSupportedLocale(
|
||||
icu::Locale::forLanguageTag(it->c_str(), *status));
|
||||
}
|
||||
|
||||
return builder.build(*status);
|
||||
}
|
||||
|
||||
class Iterator : public icu::Locale::Iterator {
|
||||
public:
|
||||
Iterator(std::vector<std::string>::const_iterator begin,
|
||||
std::vector<std::string>::const_iterator end)
|
||||
: iter_(begin), end_(end) {}
|
||||
virtual ~Iterator() {}
|
||||
|
||||
UBool hasNext() const override { return iter_ != end_; }
|
||||
|
||||
const icu::Locale& next() override {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
|
||||
CHECK(U_SUCCESS(status));
|
||||
++iter_;
|
||||
return locale_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string>::const_iterator iter_;
|
||||
std::vector<std::string>::const_iterator end_;
|
||||
icu::Locale locale_;
|
||||
};
|
||||
|
||||
// ecma402/#sec-bestfitmatcher
|
||||
// The BestFitMatcher abstract operation compares requestedLocales, which must
|
||||
// be a List as returned by CanonicalizeLocaleList, against the locales in
|
||||
// availableLocales and determines the best available language to meet the
|
||||
// request. The algorithm is implementation dependent, but should produce
|
||||
// results that a typical user of the requested locales would perceive
|
||||
// as at least as good as those produced by the LookupMatcher abstract
|
||||
// operation. Options specified through Unicode locale extension sequences must
|
||||
// be ignored by the algorithm. Information about such subsequences is returned
|
||||
// separately. The abstract operation returns a record with a [[locale]] field,
|
||||
// whose value is the language tag of the selected locale, which must be an
|
||||
// element of availableLocales. If the language tag of the request locale that
|
||||
// led to the selected locale contained a Unicode locale extension sequence,
|
||||
// then the returned record also contains an [[extension]] field whose value is
|
||||
// the first Unicode locale extension sequence within the request locale
|
||||
// language tag.
|
||||
std::string BestFitMatcher(Isolate* isolate,
|
||||
const std::set<std::string>& available_locales,
|
||||
const std::vector<std::string>& requested_locales) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::LocaleMatcher matcher =
|
||||
BuildLocaleMatcher(isolate, available_locales, &status);
|
||||
CHECK(U_SUCCESS(status));
|
||||
|
||||
Iterator iter(requested_locales.cbegin(), requested_locales.cend());
|
||||
std::string bestfit =
|
||||
matcher.getBestMatch(iter, status)->toLanguageTag<std::string>(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return DefaultLocale(isolate);
|
||||
}
|
||||
// We need to return the extensions with it.
|
||||
for (auto it = requested_locales.begin(); it != requested_locales.end();
|
||||
++it) {
|
||||
if (it->find(bestfit) == 0) {
|
||||
return *it;
|
||||
}
|
||||
}
|
||||
return bestfit;
|
||||
}
|
||||
|
||||
// ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
|
||||
// https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
|
||||
std::vector<std::string> BestFitSupportedLocales(
|
||||
const std::set<std::string>& available_locales,
|
||||
Isolate* isolate, const std::set<std::string>& available_locales,
|
||||
const std::vector<std::string>& requested_locales) {
|
||||
return LookupSupportedLocales(available_locales, requested_locales);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::LocaleMatcher matcher =
|
||||
BuildLocaleMatcher(isolate, available_locales, &status);
|
||||
CHECK(U_SUCCESS(status));
|
||||
|
||||
std::string default_locale = DefaultLocale(isolate);
|
||||
std::vector<std::string> result;
|
||||
for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
|
||||
it++) {
|
||||
if (*it == default_locale) {
|
||||
result.push_back(*it);
|
||||
} else {
|
||||
status = U_ZERO_ERROR;
|
||||
icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
|
||||
std::string bestfit = matcher.getBestMatch(desired, status)
|
||||
->toLanguageTag<std::string>(status);
|
||||
// We need to return the extensions with it.
|
||||
if (U_SUCCESS(status) && it->find(bestfit) == 0) {
|
||||
result.push_back(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ecma262 #sec-createarrayfromlist
|
||||
@ -1470,6 +1573,10 @@ Handle<JSArray> CreateArrayFromList(Isolate* isolate,
|
||||
return array;
|
||||
}
|
||||
|
||||
// To mitigate the risk of bestfit locale matcher, we first check in without
|
||||
// turnning it on.
|
||||
static bool implement_bestfit = false;
|
||||
|
||||
// ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
|
||||
// https://tc39.github.io/ecma402/#sec-supportedlocales
|
||||
MaybeHandle<JSObject> SupportedLocales(
|
||||
@ -1499,14 +1606,13 @@ MaybeHandle<JSObject> SupportedLocales(
|
||||
// 3. If matcher is "best fit", then
|
||||
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
|
||||
// requestedLocales).
|
||||
if (matcher == Intl::MatcherOption::kBestFit) {
|
||||
if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
|
||||
supported_locales =
|
||||
BestFitSupportedLocales(available_locales, requested_locales);
|
||||
BestFitSupportedLocales(isolate, available_locales, requested_locales);
|
||||
} else {
|
||||
// 4. Else,
|
||||
// a. Let supportedLocales be LookupSupportedLocales(availableLocales,
|
||||
// requestedLocales).
|
||||
DCHECK_EQ(matcher, Intl::MatcherOption::kLookup);
|
||||
supported_locales =
|
||||
LookupSupportedLocales(available_locales, requested_locales);
|
||||
}
|
||||
@ -1756,10 +1862,9 @@ Intl::ResolvedLocale Intl::ResolveLocale(
|
||||
const std::vector<std::string>& requested_locales, MatcherOption matcher,
|
||||
const std::set<std::string>& relevant_extension_keys) {
|
||||
std::string locale;
|
||||
if (matcher == Intl::MatcherOption::kLookup) {
|
||||
locale = LookupMatcher(isolate, available_locales, requested_locales);
|
||||
} else if (matcher == Intl::MatcherOption::kBestFit) {
|
||||
// TODO(intl): Implement better lookup algorithm.
|
||||
if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
|
||||
locale = BestFitMatcher(isolate, available_locales, requested_locales);
|
||||
} else {
|
||||
locale = LookupMatcher(isolate, available_locales, requested_locales);
|
||||
}
|
||||
|
||||
@ -1982,8 +2087,8 @@ Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
|
||||
const char* method) {
|
||||
return Intl::GetStringOption<Intl::MatcherOption>(
|
||||
isolate, options, "localeMatcher", method, {"best fit", "lookup"},
|
||||
{Intl::MatcherOption::kLookup, Intl::MatcherOption::kBestFit},
|
||||
Intl::MatcherOption::kLookup);
|
||||
{Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
|
||||
Intl::MatcherOption::kBestFit);
|
||||
}
|
||||
|
||||
Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
|
||||
|
@ -17,4 +17,6 @@ var strLocale = Intl.DisplayNames.supportedLocalesOf('sr');
|
||||
assertEquals('sr', strLocale[0]);
|
||||
|
||||
var multiLocale = ['sr-Thai-RS', 'de', 'zh-CN'];
|
||||
assertEquals(multiLocale, Intl.DisplayNames.supportedLocalesOf(multiLocale));
|
||||
assertEquals(multiLocale,
|
||||
Intl.DisplayNames.supportedLocalesOf(multiLocale,
|
||||
{localeMatcher: "lookup"}));
|
||||
|
@ -51,7 +51,8 @@ for (const service of services) {
|
||||
assertEquals("sr", strLocale[0]);
|
||||
|
||||
var locales = ["sr-Thai-RS", "de", "zh-CN"];
|
||||
let multiLocale = service.supportedLocalesOf(locales);
|
||||
let multiLocale = service.supportedLocalesOf(
|
||||
locales, {localeMatcher: "lookup"});
|
||||
assertEquals("sr-Thai-RS", multiLocale[0]);
|
||||
assertEquals("de", multiLocale[1]);
|
||||
assertEquals("zh-CN", multiLocale[2]);
|
||||
|
@ -15,4 +15,5 @@ var strLocale = Intl.ListFormat.supportedLocalesOf('sr');
|
||||
assertEquals('sr', strLocale[0]);
|
||||
|
||||
var multiLocale = ['sr-Thai-RS', 'de', 'zh-CN'];
|
||||
assertEquals(multiLocale, Intl.ListFormat.supportedLocalesOf(multiLocale));
|
||||
assertEquals(multiLocale,
|
||||
Intl.ListFormat.supportedLocalesOf(multiLocale, {localeMatcher: "lookup"}));
|
||||
|
@ -15,4 +15,6 @@ var strLocale = Intl.RelativeTimeFormat.supportedLocalesOf('sr');
|
||||
assertEquals('sr', strLocale[0]);
|
||||
|
||||
var multiLocale = ['sr-Thai-RS', 'de', 'zh-CN'];
|
||||
assertEquals(multiLocale, Intl.RelativeTimeFormat.supportedLocalesOf(multiLocale));
|
||||
assertEquals(multiLocale,
|
||||
Intl.RelativeTimeFormat.supportedLocalesOf(multiLocale,
|
||||
{localeMatcher: "lookup"}));
|
||||
|
@ -19,4 +19,5 @@ const strLocale = Intl.Segmenter.supportedLocalesOf("sr");
|
||||
assertEquals("sr", strLocale[0]);
|
||||
|
||||
const multiLocale = ["sr-Thai-RS", "de", "zh-CN"];
|
||||
assertEquals(multiLocale, Intl.Segmenter.supportedLocalesOf(multiLocale));
|
||||
assertEquals(multiLocale,
|
||||
Intl.Segmenter.supportedLocalesOf(multiLocale, {localeMatcher: "lookup"}));
|
||||
|
Loading…
Reference in New Issue
Block a user