[Intl] Correctly pass usage option to Collator

The spec specifies that search and standard can not be valid values
for the collation extension keyword. Instead users are expected to use
the options bag to set the correct usage options.

But, ICU expects the usage option to be set through the collation
extension value.

In this patch, we set the usage option using the collation extension
value in ICU. For resolvedOptions, we filter out this extension value
using ICU to be spec compatible.

Previously, we stored the usage option on the JSCollator instance. But
this patch changes the logic to just look it up from the icu::Collator
when required. This saves one word of memory.

This fails a test262 that was incorrectly fixed. A follow on patch
will fix the test262 test.

Bug: v8:5751
Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Change-Id: I8c66c6286287e686f4cd152fa1301f9d51c38654
Reviewed-on: https://chromium-review.googlesource.com/1180488
Reviewed-by: Adam Klein <adamk@chromium.org>
Commit-Queue: Sathya Gunasekaran <gsathya@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55437}
This commit is contained in:
Sathya Gunasekaran 2018-08-27 14:32:49 -07:00 committed by Commit Bot
parent 812c0acd9a
commit 59c03fef01
9 changed files with 234 additions and 102 deletions

View File

@ -1240,7 +1240,6 @@ BUILTIN(CollatorConstructor) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, collator_obj,
JSObject::New(target, new_target));
Handle<JSCollator> collator = Handle<JSCollator>::cast(collator_obj);
collator->set_flags(0);
// 6. Return ? InitializeCollator(collator, locales, options).
RETURN_RESULT_OR_FAILURE(isolate, JSCollator::InitializeCollator(

View File

@ -1875,7 +1875,6 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) {
CHECK(IsJSCollator());
JSObjectVerify(isolate);
VerifyObjectField(isolate, kICUCollatorOffset);
VerifyObjectField(isolate, kFlagsOffset);
VerifyObjectField(isolate, kBoundCompareOffset);
}

View File

@ -1955,7 +1955,6 @@ void Script::ScriptPrint(std::ostream& os) { // NOLINT
#ifdef V8_INTL_SUPPORT
void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, this, "JSCollator");
os << "\n - usage: " << JSCollator::UsageToString(usage());
os << "\n - icu collator: " << Brief(icu_collator());
os << "\n - bound compare: " << Brief(bound_compare());
os << "\n";

View File

@ -20,18 +20,6 @@ namespace internal {
ACCESSORS(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset)
ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset);
SMI_ACCESSORS(JSCollator, flags, kFlagsOffset)
inline void JSCollator::set_usage(Usage usage) {
DCHECK_LT(usage, Usage::COUNT);
int hints = flags();
hints = UsageBits::update(hints, usage);
set_flags(hints);
}
inline JSCollator::Usage JSCollator::usage() const {
return UsageBits::decode(flags());
}
CAST_ACCESSOR(JSCollator);

View File

@ -22,6 +22,11 @@ namespace internal {
namespace {
enum class Usage {
SORT,
SEARCH,
};
// TODO(gsathya): Consider internalizing the value strings.
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
Handle<String> key, const char* value) {
@ -47,6 +52,13 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
.FromJust());
}
void toLanguageTag(const icu::Locale& locale, char* tag) {
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(locale.getName(), tag, ULOC_FULLNAME_CAPACITY, FALSE,
&status);
CHECK(U_SUCCESS(status));
}
} // anonymous namespace
// static
@ -55,11 +67,6 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
Handle<JSObject> options =
isolate->factory()->NewJSObject(isolate->object_function());
JSCollator::Usage usage = collator->usage();
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->usage_string(),
JSCollator::UsageToString(usage));
icu::Collator* icu_collator = collator->icu_collator()->raw();
CHECK_NOT_NULL(icu_collator);
@ -128,44 +135,65 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
ignore_punctuation);
status = U_ZERO_ERROR;
const char* collation;
std::unique_ptr<icu::StringEnumeration> collation_values(
icu_collator->getKeywordValues("co", status));
// Collation wasn't provided as a keyword to icu, use default.
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->collation_string(), "default");
icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
CHECK(U_SUCCESS(status));
const char* collation = "default";
const char* usage = "sort";
const char* collation_key = "co";
const char* legacy_collation_key = uloc_toLegacyKey(collation_key);
DCHECK_NOT_NULL(legacy_collation_key);
char bcp47_locale_tag[ULOC_FULLNAME_CAPACITY];
char legacy_collation_value[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR;
int32_t length =
icu_locale.getKeywordValue(legacy_collation_key, legacy_collation_value,
ULOC_FULLNAME_CAPACITY, status);
if (length > 0 && U_SUCCESS(status)) {
const char* collation_value =
uloc_toUnicodeLocaleType(collation_key, legacy_collation_value);
CHECK_NOT_NULL(collation_value);
if (strcmp(collation_value, "search") == 0) {
usage = "search";
// Search is disallowed as a collation value per spec. Let's
// use `default`, instead.
//
// https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
collation = "default";
// We clone the icu::Locale because we don't want the
// icu_collator to be affected when we remove the collation key
// below.
icu::Locale new_icu_locale = icu_locale;
// The spec forbids the search as a collation value in the
// locale tag, so let's filter it out.
status = U_ZERO_ERROR;
new_icu_locale.setKeywordValue(legacy_collation_key, NULL, status);
CHECK(U_SUCCESS(status));
toLanguageTag(new_icu_locale, bcp47_locale_tag);
} else {
collation = collation_value;
toLanguageTag(icu_locale, bcp47_locale_tag);
}
} else {
CHECK(U_SUCCESS(status));
CHECK_NOT_NULL(collation_values.get());
int32_t length;
status = U_ZERO_ERROR;
collation = collation_values->next(&length, status);
CHECK(U_SUCCESS(status));
// There has to be at least one value.
CHECK_NOT_NULL(collation);
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->collation_string(), collation);
status = U_ZERO_ERROR;
collation_values->reset(status);
CHECK(U_SUCCESS(status));
toLanguageTag(icu_locale, bcp47_locale_tag);
}
status = U_ZERO_ERROR;
icu::Locale icu_locale = icu_collator->getLocale(ULOC_VALID_LOCALE, status);
CHECK(U_SUCCESS(status));
char result[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR;
uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
FALSE, &status);
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->collation_string(), collation);
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->locale_string(), result);
isolate->factory()->usage_string(), usage);
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->locale_string(), bcp47_locale_tag);
return options;
}
@ -217,7 +245,7 @@ MaybeHandle<JSCollator> JSCollator::InitializeCollator(
// "search" », "sort").
std::vector<const char*> values = {"sort", "search"};
std::unique_ptr<char[]> usage_str = nullptr;
JSCollator::Usage usage = JSCollator::Usage::SORT;
Usage usage = Usage::SORT;
Maybe<bool> found_usage = Intl::GetStringOption(
isolate, options, "usage", values, "Intl.Collator", &usage_str);
MAYBE_RETURN(found_usage, MaybeHandle<JSCollator>());
@ -225,21 +253,10 @@ MaybeHandle<JSCollator> JSCollator::InitializeCollator(
if (found_usage.FromJust()) {
DCHECK_NOT_NULL(usage_str.get());
if (strcmp(usage_str.get(), "search") == 0) {
usage = JSCollator::Usage::SEARCH;
usage = Usage::SEARCH;
}
}
// 5. Set collator.[[Usage]] to usage.
collator->set_usage(usage);
// 6. If usage is "sort", then
// a. Let localeData be %Collator%.[[SortLocaleData]].
// 7. Else,
// a. Let localeData be %Collator%.[[SearchLocaleData]].
//
// The above two spec operations aren't required, the Intl spec is
// crazy. See https://github.com/tc39/ecma402/issues/256
// TODO(gsathya): This is currently done as part of the
// Intl::ResolveLocale call below. Fix this once resolveLocale is
// changed to not do the lookup.
@ -339,11 +356,38 @@ MaybeHandle<JSCollator> JSCollator::InitializeCollator(
const std::string& value = co_extension_it->second;
if ((value == "search") || (value == "standard")) {
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue("co", NULL, status);
const char* key = uloc_toLegacyKey("co");
icu_locale.setKeywordValue(key, NULL, status);
CHECK(U_SUCCESS(status));
}
}
// 5. Set collator.[[Usage]] to usage.
//
// 6. If usage is "sort", then
// a. Let localeData be %Collator%.[[SortLocaleData]].
// 7. Else,
// a. Let localeData be %Collator%.[[SearchLocaleData]].
//
// The Intl spec doesn't allow us to use "search" as an extension
// value for collation as per:
// https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
//
// But the only way to pass the value "search" for collation from
// the options object to ICU is to use the 'co' extension keyword.
//
// This will need to be filtered out when creating the
// resolvedOptions object.
if (usage == Usage::SEARCH) {
const char* key = uloc_toLegacyKey("co");
CHECK_NOT_NULL(key);
const char* value = uloc_toLegacyType(key, "search");
CHECK_NOT_NULL(value);
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue(key, value, status);
CHECK(U_SUCCESS(status));
}
// 20. If collation is null, let collation be "default".
// 21. Set collator.[[Collation]] to collation.
//
@ -478,17 +522,5 @@ MaybeHandle<JSCollator> JSCollator::InitializeCollator(
return collator;
}
// static
const char* JSCollator::UsageToString(Usage usage) {
switch (usage) {
case Usage::SORT:
return "sort";
case Usage::SEARCH:
return "search";
case Usage::COUNT:
UNREACHABLE();
}
}
} // namespace internal
} // namespace v8

View File

@ -36,22 +36,9 @@ class JSCollator : public JSObject {
DECL_PRINTER(JSCollator)
DECL_VERIFIER(JSCollator)
// [[Usage]] is one of the values "sort" or "search", identifying
// the collator usage.
enum class Usage {
SORT,
SEARCH,
COUNT
};
inline void set_usage(Usage usage);
inline Usage usage() const;
static const char* UsageToString(Usage usage);
// Layout description.
#define JS_COLLATOR_FIELDS(V) \
V(kICUCollatorOffset, kPointerSize) \
V(kFlagsOffset, kPointerSize) \
V(kBoundCompareOffset, kPointerSize) \
/* Total size. */ \
V(kSize, 0)
@ -67,18 +54,8 @@ class JSCollator : public JSObject {
kLength
};
// Bit positions in |flags|.
#define FLAGS_BIT_FIELDS(V, _) V(UsageBits, Usage, 1, _)
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
#undef FLAGS_BIT_FIELDS
STATIC_ASSERT(Usage::SORT <= UsageBits::kMax);
STATIC_ASSERT(Usage::SEARCH <= UsageBits::kMax);
DECL_ACCESSORS(icu_collator, Managed<icu::Collator>)
DECL_ACCESSORS(bound_compare, Object);
DECL_INT_ACCESSORS(flags)
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JSCollator);

View File

@ -42,3 +42,17 @@ assertEquals('flüße', result[5]);
assertEquals('FUSSE', result[6]);
assertEquals('Fuße', result[7]);
assertEquals('März', result[8]);
result = ["AE", "Ä"].sort(new Intl.Collator("de", {usage: "sort"}).compare)
assertEquals("Ä", result[0]);
assertEquals("AE", result[1]);
result = ["AE", "Ä"].sort(new Intl.Collator("de", {usage: "search"}).compare)
assertEquals("AE", result[0]);
assertEquals("Ä", result[1]);
var collator = new Intl.Collator("de", {usage: "search"});
collator.resolvedOptions() // This triggers the code that removes the u-co-search keyword
result = ["AE", "Ä"].sort(collator.compare)
assertEquals("AE", result[0]);
assertEquals("Ä", result[1]);

View File

@ -0,0 +1,121 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// No locale
var collatorWithOptions = new Intl.Collator(undefined);
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag(%GetDefaultICULocale(), locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator(undefined, {usage: 'sort'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag(%GetDefaultICULocale(), locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator(undefined, {usage: 'search'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertEquals('search', usage);
assertEquals('default', collation);
assertLanguageTag(%GetDefaultICULocale(), locale);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator(locale);
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag(%GetDefaultICULocale(), locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
// With Locale
collatorWithOptions = new Intl.Collator('en-US');
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US', {usage: 'sort'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US', {usage: 'search'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertEquals('search', usage);
assertEquals('default', collation);
assertLanguageTag('en-US', locale);
assertEquals(locale.indexOf('-co-search'), -1);
// With invalid collation value = 'search'
collatorWithOptions = new Intl.Collator('en-US-u-co-search');
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-search', {usage: 'sort'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-search', {usage: 'search'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('search', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
// With invalid collation value = 'standard'
collatorWithOptions = new Intl.Collator('en-US-u-co-standard');
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-standard', {usage: 'sort'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-standard', {usage: 'search'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('search', usage);
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);
// With valid collation value = 'emoji'
collatorWithOptions = new Intl.Collator('en-US-u-co-emoji');
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('emoji', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-emoji', {usage: 'sort'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('sort', usage);
assertEquals('emoji', collation);
assertEquals(locale.indexOf('-co-search'), -1);
collatorWithOptions = new Intl.Collator('en-US-u-co-emoji', {usage: 'search'});
var { locale, usage, collation } = collatorWithOptions.resolvedOptions();
assertLanguageTag('en-US', locale);
assertEquals('search', usage);
// usage = search overwrites emoji as a collation value.
assertEquals('default', collation);
assertEquals(locale.indexOf('-co-search'), -1);

View File

@ -432,6 +432,9 @@
# https://bugs.chromium.org/p/v8/issues/detail?id=7669
'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=8051
'intl402/Collator/unicode-ext-seq-in-private-tag': [FAIL],
# Tests assume that the sort order of "same elements" (comparator returns 0)
# is deterministic.
# https://crbug.com/v8/7808