[Intl] Optimize Intl.Collator

This patch ports most of the Intl.Collator from JS to C++.

The Intl.Collator object no longer stores all the resolved
values. Instead these are looked up on demand as part of
Intl.Collator.prototype.resolvedOptions(), saving several words. In
the future, we can cache the result of the resolvedOptions as well.

In this patch, we use ICU to do parsing of the unicode extension in
the bcp47 language tag instead of using a custom extension parser.

This patch also fixes several spec compliance bugs as well.

Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Change-Id: Iaaa7be4a628404da1bd83d882e04a2c6de70ebd9
Bug: v8:5751, v8:7480
Reviewed-on: https://chromium-review.googlesource.com/1165084
Commit-Queue: Sathya Gunasekaran <gsathya@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54965}
This commit is contained in:
Sathya Gunasekaran 2018-08-08 10:23:07 +01:00 committed by Commit Bot
parent 18a0a9746f
commit 363fe1eb66
24 changed files with 860 additions and 466 deletions

View File

@ -2158,6 +2158,9 @@ v8_source_set("v8_base") {
"src/objects/intl-objects.h",
"src/objects/js-array-inl.h",
"src/objects/js-array.h",
"src/objects/js-collator-inl.h",
"src/objects/js-collator.cc",
"src/objects/js-collator.h",
"src/objects/js-collection-inl.h",
"src/objects/js-collection.h",
"src/objects/js-generator-inl.h",
@ -2862,6 +2865,9 @@ v8_source_set("v8_base") {
"src/objects/intl-objects-inl.h",
"src/objects/intl-objects.cc",
"src/objects/intl-objects.h",
"src/objects/js-collator-inl.h",
"src/objects/js-collator.cc",
"src/objects/js-collator.h",
"src/objects/js-list-format-inl.h",
"src/objects/js-list-format.cc",
"src/objects/js-list-format.h",

View File

@ -24,6 +24,7 @@
#include "src/objects/hash-table-inl.h"
#ifdef V8_INTL_SUPPORT
#include "src/objects/intl-objects.h"
#include "src/objects/js-collator.h"
#include "src/objects/js-list-format.h"
#include "src/objects/js-locale.h"
#endif // V8_INTL_SUPPORT
@ -2935,9 +2936,11 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
{
Handle<JSFunction> collator_constructor = InstallFunction(
isolate_, intl, "Collator", JS_OBJECT_TYPE, Collator::kSize, 0,
factory->the_hole_value(), Builtins::kIllegal);
native_context()->set_intl_collator_function(*collator_constructor);
isolate_, intl, "Collator", JS_INTL_COLLATOR_TYPE, JSCollator::kSize,
0, factory->the_hole_value(), Builtins::kCollatorConstructor);
collator_constructor->shared()->DontAdaptArguments();
InstallWithIntrinsicDefaultProto(isolate_, collator_constructor,
Context::INTL_COLLATOR_FUNCTION_INDEX);
Handle<JSObject> prototype(
JSObject::cast(collator_constructor->prototype()), isolate_);

View File

@ -1324,6 +1324,8 @@ namespace internal {
BUILTIN_LIST_BASE(CPP, API, TFJ, TFC, TFS, TFH, ASM) \
BUILTIN_LIST_FROM_DSL(CPP, API, TFJ, TFC, TFS, TFH, ASM) \
\
/* ecma402 #sec-intl.collator */ \
CPP(CollatorConstructor) \
TFS(StringToLowerCaseIntl, kString) \
/* ES #sec-string.prototype.tolowercase */ \
TFJ(StringPrototypeToLowerCaseIntl, 0, kReceiver) \

View File

@ -15,6 +15,7 @@
#include "src/intl.h"
#include "src/objects-inl.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-list-format-inl.h"
#include "src/objects/js-locale-inl.h"
#include "src/objects/js-plural-rules-inl.h"
@ -1131,5 +1132,35 @@ BUILTIN(PluralRulesConstructor) {
locales, options));
}
BUILTIN(CollatorConstructor) {
HandleScope scope(isolate);
Handle<JSReceiver> new_target;
// 1. If NewTarget is undefined, let newTarget be the active
// function object, else let newTarget be NewTarget.
if (args.new_target()->IsUndefined(isolate)) {
new_target = args.target();
} else {
new_target = Handle<JSReceiver>::cast(args.new_target());
}
// [[Construct]]
Handle<JSFunction> target = args.target();
Handle<Object> locales = args.atOrUndefined(isolate, 1);
Handle<Object> options = args.atOrUndefined(isolate, 2);
// 5. Let collator be ? OrdinaryCreateFromConstructor(newTarget,
// "%CollatorPrototype%", internalSlotsList).
Handle<JSObject> collator_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, collator_obj,
JSObject::New(target, new_target));
Handle<JSCollator> collator = Handle<JSCollator>::cast(collator_obj);
collator->set_flags(0);
// 6. Return ? InitializeCollator(collator, locales, options).
RETURN_RESULT_OR_FAILURE(isolate, JSCollator::InitializeCollator(
isolate, collator, locales, options));
}
} // namespace internal
} // namespace v8

View File

@ -207,6 +207,7 @@ Type::bitset BitsetType::Lub(HeapObjectType const& type) {
case JS_MESSAGE_OBJECT_TYPE:
case JS_DATE_TYPE:
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
case JS_INTL_LIST_FORMAT_TYPE:
case JS_INTL_LOCALE_TYPE:
case JS_INTL_PLURAL_RULES_TYPE:

View File

@ -37,9 +37,11 @@
V(call_string, "call") \
V(callee_string, "callee") \
V(caller_string, "caller") \
V(caseFirst_string, "caseFirst") \
V(cell_value_string, "%cell_value") \
V(char_at_string, "CharAt") \
V(closure_string, "(closure)") \
V(collation_string, "collation") \
V(column_string, "column") \
V(CompileError_string, "CompileError") \
V(configurable_string, "configurable") \
@ -98,6 +100,7 @@
V(has_string, "has") \
V(hour_string, "hour") \
V(ignoreCase_string, "ignoreCase") \
V(ignorePunctuation_string, "ignorePunctuation") \
V(illegal_access_string, "illegal access") \
V(illegal_argument_string, "illegal argument") \
V(index_string, "index") \
@ -182,8 +185,9 @@
V(script_string, "script") \
V(short_string, "short") \
V(second_string, "second") \
V(set_space_string, "set ") \
V(Set_string, "Set") \
V(sensitivity_string, "sensitivity") \
V(set_space_string, "set ") \
V(set_string, "set") \
V(SetIterator_string, "Set Iterator") \
V(setPrototypeOf_string, "setPrototypeOf") \
@ -222,6 +226,7 @@
V(unicode_string, "unicode") \
V(unit_string, "unit") \
V(URIError_string, "URIError") \
V(usage_string, "usage") \
V(use_asm_string, "use asm") \
V(use_strict_string, "use strict") \
V(value_string, "value") \

View File

@ -426,7 +426,12 @@ function attemptSingleLookup(availableLocales, requestedLocale) {
var extensionMatch = %regexp_internal_match(
GetUnicodeExtensionRE(), requestedLocale);
var extension = IS_NULL(extensionMatch) ? '' : extensionMatch[0];
return {__proto__: null, locale: availableLocale, extension: extension};
return {
__proto__: null,
locale: availableLocale,
extension: extension,
localeWithExtension: availableLocale + extension,
};
}
return UNDEFINED;
}
@ -463,7 +468,8 @@ function lookupMatcher(service, requestedLocales) {
return {
__proto__: null,
locale: 'und',
extension: ''
extension: '',
localeWithExtension: 'und',
};
}
@ -780,141 +786,13 @@ DEFINE_METHOD(
}
);
/**
* Initializes the given object so it's a valid Collator instance.
* Useful for subclassing.
*/
function CreateCollator(locales, options) {
if (IS_UNDEFINED(options)) {
options = {__proto__: null};
}
var getOption = getGetOption(options, 'collator');
var internalOptions = {__proto__: null};
%DefineWEProperty(internalOptions, 'usage', getOption(
'usage', 'string', ['sort', 'search'], 'sort'));
var sensitivity = getOption('sensitivity', 'string',
['base', 'accent', 'case', 'variant']);
if (IS_UNDEFINED(sensitivity) && internalOptions.usage === 'sort') {
sensitivity = 'variant';
}
%DefineWEProperty(internalOptions, 'sensitivity', sensitivity);
%DefineWEProperty(internalOptions, 'ignorePunctuation', getOption(
'ignorePunctuation', 'boolean', UNDEFINED, false));
var locale = resolveLocale('collator', locales, options);
// TODO(jshin): ICU now can take kb, kc, etc. Switch over to using ICU
// directly. See Collator::InitializeCollator and
// Collator::CreateICUCollator in src/objects/intl-objects.cc
// ICU can't take kb, kc... parameters through localeID, so we need to pass
// them as options.
// One exception is -co- which has to be part of the extension, but only for
// usage: sort, and its value can't be 'standard' or 'search'.
var extensionMap = parseExtension(locale.extension);
/**
* Map of Unicode extensions to option properties, and their values and types,
* for a collator.
*/
var COLLATOR_KEY_MAP = {
__proto__: null,
'kn': { __proto__: null, 'property': 'numeric', 'type': 'boolean'},
'kf': { __proto__: null, 'property': 'caseFirst', 'type': 'string',
'values': ['false', 'lower', 'upper']}
};
setOptions(
options, extensionMap, COLLATOR_KEY_MAP, getOption, internalOptions);
var collation = 'default';
var extension = '';
if (HAS_OWN_PROPERTY(extensionMap, 'co') && internalOptions.usage === 'sort') {
/**
* Allowed -u-co- values. List taken from:
* http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
*/
var ALLOWED_CO_VALUES = [
'big5han', 'dict', 'direct', 'ducet', 'gb2312', 'phonebk', 'phonetic',
'pinyin', 'reformed', 'searchjl', 'stroke', 'trad', 'unihan', 'zhuyin'
];
if (%ArrayIndexOf(ALLOWED_CO_VALUES, extensionMap.co, 0) !== -1) {
extension = '-u-co-' + extensionMap.co;
// ICU can't tell us what the collation is, so save user's input.
collation = extensionMap.co;
}
} else if (internalOptions.usage === 'search') {
extension = '-u-co-search';
}
%DefineWEProperty(internalOptions, 'collation', collation);
var requestedLocale = locale.locale + extension;
// We define all properties C++ code may produce, to prevent security
// problems. If malicious user decides to redefine Object.prototype.locale
// we can't just use plain x.locale = 'us' or in C++ Set("locale", "us").
// %object_define_properties will either succeed defining or throw an error.
var resolved = %object_define_properties({__proto__: null}, {
caseFirst: {writable: true},
collation: {value: internalOptions.collation, writable: true},
ignorePunctuation: {writable: true},
locale: {writable: true},
numeric: {writable: true},
requestedLocale: {value: requestedLocale, writable: true},
sensitivity: {writable: true},
strength: {writable: true},
usage: {value: internalOptions.usage, writable: true}
});
var collator = %CreateCollator(requestedLocale, internalOptions, resolved);
%MarkAsInitializedIntlObjectOfType(collator, COLLATOR_TYPE);
collator[resolvedSymbol] = resolved;
return collator;
}
/**
* Constructs Intl.Collator object given optional locales and options
* parameters.
*
* @constructor
*/
function CollatorConstructor() {
return IntlConstruct(this, GlobalIntlCollator, CreateCollator, new.target,
arguments);
}
%SetCode(GlobalIntlCollator, CollatorConstructor);
/**
* Collator resolvedOptions method.
*/
DEFINE_METHOD(
GlobalIntlCollator.prototype,
resolvedOptions() {
var methodName = 'resolvedOptions';
if(!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver, methodName, this);
}
var coll = %IntlUnwrapReceiver(this, COLLATOR_TYPE, GlobalIntlCollator,
methodName, false);
return {
locale: coll[resolvedSymbol].locale,
usage: coll[resolvedSymbol].usage,
sensitivity: coll[resolvedSymbol].sensitivity,
ignorePunctuation: coll[resolvedSymbol].ignorePunctuation,
numeric: coll[resolvedSymbol].numeric,
caseFirst: coll[resolvedSymbol].caseFirst,
collation: coll[resolvedSymbol].collation
};
return %CollatorResolvedOptions(this);
}
);

View File

@ -726,6 +726,7 @@ ReturnType BodyDescriptorApply(InstanceType type, T1 p1, T2 p2, T3 p3, T4 p4) {
case JS_MESSAGE_OBJECT_TYPE:
case JS_BOUND_FUNCTION_TYPE:
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
case JS_INTL_LIST_FORMAT_TYPE:
case JS_INTL_LOCALE_TYPE:
case JS_INTL_PLURAL_RULES_TYPE:

View File

@ -15,6 +15,9 @@
#include "src/objects-inl.h"
#include "src/objects/arguments-inl.h"
#include "src/objects/bigint.h"
#ifdef V8_INTL_SUPPORT
#include "src/objects/js-collator-inl.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/data-handler-inl.h"
#include "src/objects/debug-objects-inl.h"
#include "src/objects/hash-table-inl.h"
@ -353,6 +356,9 @@ void HeapObject::HeapObjectVerify(Isolate* isolate) {
CodeDataContainer::cast(this)->CodeDataContainerVerify(isolate);
break;
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
JSCollator::cast(this)->JSCollatorVerify(isolate);
break;
case JS_INTL_LIST_FORMAT_TYPE:
JSListFormat::cast(this)->JSListFormatVerify(isolate);
break;
@ -1868,6 +1874,13 @@ void InterpreterData::InterpreterDataVerify(Isolate* isolate) {
}
#ifdef V8_INTL_SUPPORT
void JSCollator::JSCollatorVerify(Isolate* isolate) {
CHECK(IsJSCollator());
JSObjectVerify(isolate);
VerifyObjectField(isolate, kICUCollatorOffset);
VerifyObjectField(isolate, kFlagsOffset);
}
void JSListFormat::JSListFormatVerify(Isolate* isolate) {
JSObjectVerify(isolate);
VerifyObjectField(isolate, kLocaleOffset);

View File

@ -217,6 +217,7 @@ namespace internal {
#ifdef V8_INTL_SUPPORT
#define INSTANCE_TYPE_LIST(V) \
INSTANCE_TYPE_LIST_BEFORE_INTL(V) \
V(JS_INTL_COLLATOR_TYPE) \
V(JS_INTL_LIST_FORMAT_TYPE) \
V(JS_INTL_LOCALE_TYPE) \
V(JS_INTL_PLURAL_RULES_TYPE) \

View File

@ -14,6 +14,9 @@
#include "src/interpreter/bytecodes.h"
#include "src/objects-inl.h"
#include "src/objects/arguments-inl.h"
#ifdef V8_INTL_SUPPORT
#include "src/objects/js-collator-inl.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/data-handler-inl.h"
#include "src/objects/debug-objects-inl.h"
#include "src/objects/hash-table-inl.h"
@ -305,6 +308,9 @@ void HeapObject::HeapObjectPrint(std::ostream& os) { // NOLINT
JSDataView::cast(this)->JSDataViewPrint(os);
break;
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
JSCollator::cast(this)->JSCollatorPrint(os);
break;
case JS_INTL_LIST_FORMAT_TYPE:
JSListFormat::cast(this)->JSListFormatPrint(os);
break;
@ -1955,6 +1961,13 @@ void Script::ScriptPrint(std::ostream& os) { // NOLINT
}
#ifdef V8_INTL_SUPPORT
void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, this, "JSCollator");
os << "\n - usage: " << JSCollator::UsageToString(usage());
os << "\n - icu collator: " << Brief(icu_collator());
os << "\n";
}
void JSListFormat::JSListFormatPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, this, "JSListFormat");
os << "\n - locale: " << Brief(locale());

View File

@ -59,6 +59,9 @@
#include "src/objects/debug-objects-inl.h"
#include "src/objects/frame-array-inl.h"
#include "src/objects/hash-table-inl.h"
#ifdef V8_INTL_SUPPORT
#include "src/objects/js-collator.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/js-collection-inl.h"
#include "src/objects/js-generator-inl.h"
#ifdef V8_INTL_SUPPORT
@ -1419,6 +1422,8 @@ int JSObject::GetHeaderSize(InstanceType type,
case JS_MODULE_NAMESPACE_TYPE:
return JSModuleNamespace::kHeaderSize;
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
return JSCollator::kSize;
case JS_INTL_LIST_FORMAT_TYPE:
return JSListFormat::kSize;
case JS_INTL_LOCALE_TYPE:
@ -3171,6 +3176,7 @@ VisitorId Map::GetVisitorId(Map* map) {
case JS_REGEXP_TYPE:
case JS_REGEXP_STRING_ITERATOR_TYPE:
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
case JS_INTL_LIST_FORMAT_TYPE:
case JS_INTL_LOCALE_TYPE:
case JS_INTL_PLURAL_RULES_TYPE:
@ -13099,6 +13105,7 @@ bool CanSubclassHaveInobjectProperties(InstanceType instance_type) {
case JS_FUNCTION_TYPE:
case JS_GENERATOR_OBJECT_TYPE:
#ifdef V8_INTL_SUPPORT
case JS_INTL_COLLATOR_TYPE:
case JS_INTL_PLURAL_RULES_TYPE:
#endif
case JS_ASYNC_GENERATOR_OBJECT_TYPE:

View File

@ -75,9 +75,10 @@
// - JSDate
// - JSMessageObject
// - JSModuleNamespace
// - JSListFormat // If V8_INTL_SUPPORT enabled.
// - JSLocale // If V8_INTL_SUPPORT enabled.
// - JSPluralRules // If V8_INTL_SUPPORT enabled.
// - JSCollator // If V8_INTL_SUPPORT enabled.
// - JSListFormat // If V8_INTL_SUPPORT enabled.
// - JSLocale // If V8_INTL_SUPPORT enabled.
// - JSPluralRules // If V8_INTL_SUPPORT enabled.
// - JSRelativeTimeFormat // If V8_INTL_SUPPORT enabled.
// - WasmGlobalObject
// - WasmInstanceObject
@ -583,6 +584,7 @@ enum InstanceType : uint16_t {
JS_DATA_VIEW_TYPE,
#ifdef V8_INTL_SUPPORT
JS_INTL_COLLATOR_TYPE,
JS_INTL_LIST_FORMAT_TYPE,
JS_INTL_LOCALE_TYPE,
JS_INTL_PLURAL_RULES_TYPE,
@ -700,6 +702,7 @@ class JSAsyncGeneratorObject;
class JSGlobalObject;
class JSGlobalProxy;
#ifdef V8_INTL_SUPPORT
class JSCollator;
class JSListFormat;
class JSLocale;
class JSPluralRules;
@ -911,6 +914,7 @@ class ZoneForwardList;
#ifdef V8_INTL_SUPPORT
#define HEAP_OBJECT_ORDINARY_TYPE_LIST(V) \
HEAP_OBJECT_ORDINARY_TYPE_LIST_BASE(V) \
V(JSCollator) \
V(JSListFormat) \
V(JSLocale) \
V(JSPluralRules) \
@ -1031,6 +1035,7 @@ class ZoneForwardList;
#define INSTANCE_TYPE_CHECKERS_SINGLE(V) \
INSTANCE_TYPE_CHECKERS_SINGLE_BASE(V) \
V(JSCollator, JS_INTL_COLLATOR_TYPE) \
V(JSListFormat, JS_INTL_LIST_FORMAT_TYPE) \
V(JSLocale, JS_INTL_LOCALE_TYPE) \
V(JSPluralRules, JS_INTL_PLURAL_RULES_TYPE) \

View File

@ -19,6 +19,7 @@
#include "src/intl.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/managed.h"
#include "src/objects/string.h"
#include "src/property-descriptor.h"
@ -494,193 +495,6 @@ void SetResolvedNumberSettings(Isolate* isolate, const icu::Locale& icu_locale,
SetResolvedNumericSettings(isolate, icu_locale, number_format, resolved);
}
icu::Collator* CreateICUCollator(Isolate* isolate,
const icu::Locale& icu_locale,
Handle<JSObject> options) {
// Make collator from options.
icu::Collator* collator = nullptr;
UErrorCode status = U_ZERO_ERROR;
collator = icu::Collator::createInstance(icu_locale, status);
if (U_FAILURE(status)) {
delete collator;
return nullptr;
}
// Set flags first, and then override them with sensitivity if necessary.
bool numeric;
if (ExtractBooleanSetting(isolate, options, "numeric", &numeric)) {
collator->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF,
status);
}
// Normalization is always on, by the spec. We are free to optimize
// if the strings are already normalized (but we don't have a way to tell
// that right now).
collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
icu::UnicodeString case_first;
if (ExtractStringSetting(isolate, options, "caseFirst", &case_first)) {
if (case_first == UNICODE_STRING_SIMPLE("upper")) {
collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
} else if (case_first == UNICODE_STRING_SIMPLE("lower")) {
collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
} else {
// Default (false/off).
collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
}
}
icu::UnicodeString sensitivity;
if (ExtractStringSetting(isolate, options, "sensitivity", &sensitivity)) {
if (sensitivity == UNICODE_STRING_SIMPLE("base")) {
collator->setStrength(icu::Collator::PRIMARY);
} else if (sensitivity == UNICODE_STRING_SIMPLE("accent")) {
collator->setStrength(icu::Collator::SECONDARY);
} else if (sensitivity == UNICODE_STRING_SIMPLE("case")) {
collator->setStrength(icu::Collator::PRIMARY);
collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
} else {
// variant (default)
collator->setStrength(icu::Collator::TERTIARY);
}
}
bool ignore;
if (ExtractBooleanSetting(isolate, options, "ignorePunctuation", &ignore)) {
if (ignore) {
collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
}
}
return collator;
}
void SetResolvedCollatorSettings(Isolate* isolate,
const icu::Locale& icu_locale,
icu::Collator* collator,
Handle<JSObject> resolved) {
Factory* factory = isolate->factory();
UErrorCode status = U_ZERO_ERROR;
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("numeric"),
factory->ToBoolean(
collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON),
LanguageMode::kSloppy)
.Assert();
switch (collator->getAttribute(UCOL_CASE_FIRST, status)) {
case UCOL_LOWER_FIRST:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("caseFirst"),
factory->NewStringFromStaticChars("lower"), LanguageMode::kSloppy)
.Assert();
break;
case UCOL_UPPER_FIRST:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("caseFirst"),
factory->NewStringFromStaticChars("upper"), LanguageMode::kSloppy)
.Assert();
break;
default:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("caseFirst"),
factory->NewStringFromStaticChars("false"), LanguageMode::kSloppy)
.Assert();
}
switch (collator->getAttribute(UCOL_STRENGTH, status)) {
case UCOL_PRIMARY: {
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("strength"),
factory->NewStringFromStaticChars("primary"), LanguageMode::kSloppy)
.Assert();
// case level: true + s1 -> case, s1 -> base.
if (UCOL_ON == collator->getAttribute(UCOL_CASE_LEVEL, status)) {
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("case"), LanguageMode::kSloppy)
.Assert();
} else {
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("base"), LanguageMode::kSloppy)
.Assert();
}
break;
}
case UCOL_SECONDARY:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("strength"),
factory->NewStringFromStaticChars("secondary"), LanguageMode::kSloppy)
.Assert();
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("accent"), LanguageMode::kSloppy)
.Assert();
break;
case UCOL_TERTIARY:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("strength"),
factory->NewStringFromStaticChars("tertiary"), LanguageMode::kSloppy)
.Assert();
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy)
.Assert();
break;
case UCOL_QUATERNARY:
// We shouldn't get quaternary and identical from ICU, but if we do
// put them into variant.
JSObject::SetProperty(isolate, resolved,
factory->NewStringFromStaticChars("strength"),
factory->NewStringFromStaticChars("quaternary"),
LanguageMode::kSloppy)
.Assert();
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy)
.Assert();
break;
default:
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("strength"),
factory->NewStringFromStaticChars("identical"), LanguageMode::kSloppy)
.Assert();
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("sensitivity"),
factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy)
.Assert();
}
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("ignorePunctuation"),
factory->ToBoolean(collator->getAttribute(UCOL_ALTERNATE_HANDLING,
status) == UCOL_SHIFTED),
LanguageMode::kSloppy)
.Assert();
// Set the locale
char result[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR;
uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
FALSE, &status);
if (U_SUCCESS(status)) {
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("locale"),
factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
.Assert();
} else {
// This would never happen, since we got the locale from ICU.
JSObject::SetProperty(
isolate, resolved, factory->NewStringFromStaticChars("locale"),
factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
.Assert();
}
}
icu::BreakIterator* CreateICUBreakIterator(Isolate* isolate,
const icu::Locale& icu_locale,
Handle<JSObject> options) {
@ -845,38 +659,6 @@ void NumberFormat::DeleteNumberFormat(const v8::WeakCallbackInfo<void>& data) {
GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
}
icu::Collator* Collator::InitializeCollator(Isolate* isolate,
Handle<String> locale,
Handle<JSObject> options,
Handle<JSObject> resolved) {
icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
DCHECK(!icu_locale.isBogus());
icu::Collator* collator = CreateICUCollator(isolate, icu_locale, options);
if (!collator) {
// Remove extensions and try again.
icu::Locale no_extension_locale(icu_locale.getBaseName());
collator = CreateICUCollator(isolate, no_extension_locale, options);
if (!collator) {
FATAL("Failed to create ICU collator, are ICU data files missing?");
}
// Set resolved settings (pattern, numbering system).
SetResolvedCollatorSettings(isolate, no_extension_locale, collator,
resolved);
} else {
SetResolvedCollatorSettings(isolate, icu_locale, collator, resolved);
}
CHECK_NOT_NULL(collator);
return collator;
}
icu::Collator* Collator::UnpackCollator(Handle<JSObject> obj) {
return Managed<icu::Collator>::cast(obj->GetEmbedderField(0))->raw();
}
icu::BreakIterator* V8BreakIterator::InitializeBreakIterator(
Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
Handle<JSObject> resolved) {
@ -1158,6 +940,9 @@ MaybeHandle<JSObject> Intl::UnwrapReceiver(Isolate* isolate,
Intl::Type type,
Handle<String> method_name,
bool check_legacy_constructor) {
DCHECK(type == Intl::Type::kCollator || type == Intl::Type::kNumberFormat ||
type == Intl::Type::kDateTimeFormat ||
type == Intl::Type::kBreakIterator);
Handle<Object> new_receiver = receiver;
if (check_legacy_constructor) {
ASSIGN_RETURN_ON_EXCEPTION(
@ -1165,6 +950,20 @@ MaybeHandle<JSObject> Intl::UnwrapReceiver(Isolate* isolate,
LegacyUnwrapReceiver(isolate, receiver, constructor, type), JSObject);
}
// Collator has been ported to use regular instance types. We
// shouldn't be using Intl::IsObjectOfType anymore.
if (type == Intl::Type::kCollator) {
if (!receiver->IsJSCollator()) {
// 3. a. Throw a TypeError exception.
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
method_name, receiver),
JSObject);
}
return Handle<JSCollator>::cast(receiver);
}
DCHECK_NE(type, Intl::Type::kCollator);
// 3. If Type(new_receiver) is not Object or nf does not have an
// [[Initialized...]] internal slot, then
if (!Intl::IsObjectOfType(isolate, new_receiver, type)) {
@ -1886,23 +1685,24 @@ MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
Handle<Object> locales,
Handle<Object> options) {
Factory* factory = isolate->factory();
Handle<JSObject> collator_holder;
Handle<JSObject> collator;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, collator_holder,
isolate, collator,
CachedOrNewService(isolate, factory->NewStringFromStaticChars("collator"),
locales, options),
Object);
DCHECK(Intl::IsObjectOfType(isolate, collator_holder, Intl::kCollator));
return Intl::InternalCompare(isolate, collator_holder, string1, string2);
CHECK(collator->IsJSCollator());
return Intl::InternalCompare(isolate, Handle<JSCollator>::cast(collator),
string1, string2);
}
Handle<Object> Intl::InternalCompare(Isolate* isolate,
Handle<JSObject> collator_holder,
Handle<JSCollator> collator,
Handle<String> string1,
Handle<String> string2) {
Factory* factory = isolate->factory();
icu::Collator* collator = Collator::UnpackCollator(collator_holder);
CHECK_NOT_NULL(collator);
icu::Collator* icu_collator = collator->icu_collator()->raw();
CHECK_NOT_NULL(icu_collator);
string1 = String::Flatten(isolate, string1);
string2 = String::Flatten(isolate, string2);
@ -1921,7 +1721,7 @@ Handle<Object> Intl::InternalCompare(Isolate* isolate,
FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
icu::UnicodeString string_val2(
FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
result = collator->compare(string_val1, string_val2, status);
result = icu_collator->compare(string_val1, string_val2, status);
}
DCHECK(U_SUCCESS(status));

View File

@ -117,26 +117,6 @@ class NumberFormat {
NumberFormat();
};
class Collator {
public:
// Create a collator for the specificied locale and options. Stores the
// collator in the provided collator_holder.
static icu::Collator* InitializeCollator(Isolate* isolate,
Handle<String> locale,
Handle<JSObject> options,
Handle<JSObject> resolved);
// Unpacks collator object from corresponding JavaScript object.
static icu::Collator* UnpackCollator(Handle<JSObject> obj);
// Layout description.
static const int kCollator = JSObject::kHeaderSize;
static const int kSize = kCollator + kPointerSize;
private:
Collator();
};
class V8BreakIterator {
public:
// Create a BreakIterator for the specificied locale and options. Returns the
@ -313,7 +293,7 @@ class Intl {
Handle<Object> locales, Handle<Object> options);
V8_WARN_UNUSED_RESULT static Handle<Object> InternalCompare(
Isolate* isolate, Handle<JSObject> collator, Handle<String> s1,
Isolate* isolate, Handle<JSCollator> collator, Handle<String> s1,
Handle<String> s2);
// ecma402/#sup-properties-of-the-number-prototype-object

View File

@ -0,0 +1,42 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_COLLATOR_INL_H_
#define V8_OBJECTS_JS_COLLATOR_INL_H_
#include "src/objects-inl.h"
#include "src/objects/js-collator.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace v8 {
namespace internal {
ACCESSORS(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset)
SMI_ACCESSORS(JSCollator, flags, kFlagsOffset)
inline void JSCollator::set_usage(Usage usage) {
DCHECK_LT(usage, Usage::COUNT);
int hints = flags();
hints = UsageBits::update(hints, usage);
set_flags(hints);
}
inline JSCollator::Usage JSCollator::usage() const {
return UsageBits::decode(flags());
}
CAST_ACCESSOR(JSCollator);
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_COLLATOR_INL_H_

535
src/objects/js-collator.cc Normal file
View File

@ -0,0 +1,535 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-collator.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
#include "src/objects/js-collator-inl.h"
#include "unicode/coll.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/ucol.h"
#include "unicode/uloc.h"
namespace v8 {
namespace internal {
namespace {
// TODO(gsathya): Consider internalizing the value strings.
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
Handle<String> key, const char* value) {
CHECK_NOT_NULL(value);
Handle<String> value_str =
isolate->factory()->NewStringFromAsciiChecked(value);
// This is a brand new JSObject that shouldn't already have the same
// key so this shouldn't fail.
CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str,
kDontThrow)
.FromJust());
}
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
Handle<String> key, bool value) {
Handle<Object> value_obj = isolate->factory()->ToBoolean(value);
// This is a brand new JSObject that shouldn't already have the same
// key so this shouldn't fail.
CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj,
kDontThrow)
.FromJust());
}
} // anonymous namespace
// static
Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
Handle<JSCollator> collator) {
Handle<JSObject> options =
isolate->factory()->NewJSObject(isolate->object_function());
JSCollator::Usage usage = collator->usage();
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->usage_string(),
JSCollator::UsageToString(usage));
icu::Collator* icu_collator = collator->icu_collator()->raw();
CHECK_NOT_NULL(icu_collator);
UErrorCode status = U_ZERO_ERROR;
bool numeric =
icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON;
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->numeric_string(), numeric);
const char* case_first = nullptr;
status = U_ZERO_ERROR;
switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) {
case UCOL_LOWER_FIRST:
case_first = "lower";
break;
case UCOL_UPPER_FIRST:
case_first = "upper";
break;
default:
case_first = "false";
}
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->caseFirst_string(), case_first);
const char* sensitivity = nullptr;
status = U_ZERO_ERROR;
switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) {
case UCOL_PRIMARY: {
CHECK(U_SUCCESS(status));
status = U_ZERO_ERROR;
// case level: true + s1 -> case, s1 -> base.
if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) {
sensitivity = "case";
} else {
sensitivity = "base";
}
CHECK(U_SUCCESS(status));
break;
}
case UCOL_SECONDARY:
sensitivity = "accent";
break;
case UCOL_TERTIARY:
sensitivity = "variant";
break;
case UCOL_QUATERNARY:
// We shouldn't get quaternary and identical from ICU, but if we do
// put them into variant.
sensitivity = "variant";
break;
default:
sensitivity = "variant";
}
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->sensitivity_string(), sensitivity);
status = U_ZERO_ERROR;
bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING,
status) == UCOL_SHIFTED;
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->ignorePunctuation_string(),
ignore_punctuation);
status = U_ZERO_ERROR;
const char* collation;
std::unique_ptr<icu::StringEnumeration> collation_values(
icu_collator->getKeywordValues("co", status));
// Collation wasn't provided as a keyword to icu, use default.
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->collation_string(), "default");
} else {
CHECK(U_SUCCESS(status));
CHECK_NOT_NULL(collation_values.get());
int32_t length;
status = U_ZERO_ERROR;
collation = collation_values->next(&length, status);
CHECK(U_SUCCESS(status));
// There has to be at least one value.
CHECK_NOT_NULL(collation);
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->collation_string(), collation);
status = U_ZERO_ERROR;
collation_values->reset(status);
CHECK(U_SUCCESS(status));
}
status = U_ZERO_ERROR;
icu::Locale icu_locale = icu_collator->getLocale(ULOC_VALID_LOCALE, status);
CHECK(U_SUCCESS(status));
char result[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR;
uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
FALSE, &status);
CHECK(U_SUCCESS(status));
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->locale_string(), result);
return options;
}
namespace {
std::map<const char*, const char*> LookupUnicodeExtensions(
icu::Locale& icu_locale, std::set<const char*>& relevant_keys) {
std::map<const char*, const char*> extensions;
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::StringEnumeration> keywords(
icu_locale.createKeywords(status));
if (U_FAILURE(status)) return extensions;
if (!keywords) return extensions;
char value[ULOC_FULLNAME_CAPACITY];
int32_t length;
status = U_ZERO_ERROR;
for (const char* keyword = keywords->next(&length, status);
keyword != nullptr; keyword = keywords->next(&length, status)) {
// Ignore failures in ICU and skip to the next keyword.
//
// This is fine.™
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
}
icu_locale.getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
// Ignore failures in ICU and skip to the next keyword.
//
// This is fine.™
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
}
const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
// Ignore keywords that we don't recognize - spec allows that.
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
extensions.insert(
std::pair<const char*, const char*>(bcp47_key, bcp47_value));
}
}
return extensions;
}
void SetCaseFirstOption(icu::Collator* icu_collator, const char* value) {
CHECK_NOT_NULL(icu_collator);
CHECK_NOT_NULL(value);
UErrorCode status = U_ZERO_ERROR;
if (strncmp(value, "upper", 5) == 0) {
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
} else if (strncmp(value, "lower", 5) == 0) {
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
} else {
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
}
CHECK(U_SUCCESS(status));
}
} // anonymous namespace
// static
MaybeHandle<JSCollator> JSCollator::InitializeCollator(
Isolate* isolate, Handle<JSCollator> collator, Handle<Object> locales,
Handle<Object> options_obj) {
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
Handle<JSObject> requested_locales;
ASSIGN_RETURN_ON_EXCEPTION(isolate, requested_locales,
Intl::CanonicalizeLocaleListJS(isolate, locales),
JSCollator);
// 2. If options is undefined, then
if (options_obj->IsUndefined(isolate)) {
// 2. a. Let options be ObjectCreate(null).
options_obj = isolate->factory()->NewJSObjectWithNullProto();
} else {
// 3. Else
// 3. a. Let options be ? ToObject(options).
ASSIGN_RETURN_ON_EXCEPTION(
isolate, options_obj,
Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator);
}
// At this point, options_obj can either be a JSObject or a JSProxy only.
Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj);
// 4. Let usage be ? GetOption(options, "usage", "string", « "sort",
// "search" », "sort").
std::vector<const char*> values = {"sort", "search"};
std::unique_ptr<char[]> usage_str = nullptr;
JSCollator::Usage usage = JSCollator::Usage::SORT;
Maybe<bool> found_usage = Intl::GetStringOption(
isolate, options, "usage", values, "Intl.Collator", &usage_str);
MAYBE_RETURN(found_usage, MaybeHandle<JSCollator>());
if (found_usage.FromJust()) {
DCHECK_NOT_NULL(usage_str.get());
if (strncmp(usage_str.get(), "search", 6) == 0) {
usage = JSCollator::Usage::SEARCH;
}
}
// 5. Set collator.[[Usage]] to usage.
collator->set_usage(usage);
// 6. If usage is "sort", then
// a. Let localeData be %Collator%.[[SortLocaleData]].
// 7. Else,
// a. Let localeData be %Collator%.[[SearchLocaleData]].
//
// The above two spec operations aren't required, the Intl spec is
// crazy. See https://github.com/tc39/ecma402/issues/256
// TODO(gsathya): This is currently done as part of the
// Intl::ResolveLocale call below. Fix this once resolveLocale is
// changed to not do the lookup.
//
// 9. Let matcher be ? GetOption(options, "localeMatcher", "string",
// « "lookup", "best fit" », "best fit").
// 10. Set opt.[[localeMatcher]] to matcher.
// 11. Let numeric be ? GetOption(options, "numeric", "boolean",
// undefined, undefined).
// 12. If numeric is not undefined, then
// a. Let numeric be ! ToString(numeric).
//
// Note: We omit the ToString(numeric) operation as it's not
// observable. Intl::GetBoolOption returns a Boolean and
// ToString(Boolean) is not side-effecting.
//
// 13. Set opt.[[kn]] to numeric.
bool numeric;
Maybe<bool> found_numeric = Intl::GetBoolOption(isolate, options, "numeric",
"Intl.Collator", &numeric);
MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>());
// 14. Let caseFirst be ? GetOption(options, "caseFirst", "string",
// « "upper", "lower", "false" », undefined).
// 15. Set opt.[[kf]] to caseFirst.
values = {"upper", "lower", "false"};
std::unique_ptr<char[]> case_first_str = nullptr;
Maybe<bool> found_case_first = Intl::GetStringOption(
isolate, options, "caseFirst", values, "Intl.Collator", &case_first_str);
MAYBE_RETURN(found_case_first, MaybeHandle<JSCollator>());
// The relevant unicode extensions accepted by Collator as specified here:
// https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
//
// 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
std::set<const char*> relevant_extension_keys{"co", "kn", "kf"};
// We don't pass the relevant_extension_keys to ResolveLocale here
// as per the spec.
//
// In ResolveLocale, the spec makes sure we only pick and use the
// relevant extension keys and ignore any other keys. Also, in
// ResolveLocale, the spec makes sure that if a given key has both a
// value in the options object and an unicode extension value, then
// we pick the value provided in the options object.
// For example: in the case of `new Intl.Collator('en-u-kn-true', {
// numeric: false })` the value `false` is used for the `numeric`
// key.
//
// Instead of performing all this validation in ResolveLocale, we
// just perform it inline below. In the future when we port
// ResolveLocale to C++, we can make all these validations generic
// and move it ResolveLocale.
//
// 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
// requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]],
// localeData).
// 18. Set collator.[[Locale]] to r.[[locale]].
Handle<JSObject> r;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, r,
Intl::ResolveLocale(isolate, "collator", requested_locales, options),
JSCollator);
Handle<String> locale_with_extension_str =
isolate->factory()->NewStringFromStaticChars("localeWithExtension");
Handle<Object> locale_with_extension_obj =
JSObject::GetDataProperty(r, locale_with_extension_str);
// The locale_with_extension has to be a string. Either a user
// provided canonicalized string or the default locale.
CHECK(locale_with_extension_obj->IsString());
Handle<String> locale_with_extension =
Handle<String>::cast(locale_with_extension_obj);
icu::Locale icu_locale =
Intl::CreateICULocale(isolate, locale_with_extension);
DCHECK(!icu_locale.isBogus());
std::map<const char*, const char*> extensions =
LookupUnicodeExtensions(icu_locale, relevant_extension_keys);
// 19. Let collation be r.[[co]].
//
// r.[[co]] is already set as part of the icu::Locale creation as
// icu parses unicode extensions and sets the keywords.
//
// We need to sanitize the keywords based on certain ECMAScript rules.
//
// As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots:
// The values "standard" and "search" must not be used as elements
// in any [[SortLocaleData]][locale].co and
// [[SearchLocaleData]][locale].co list.
if (extensions.find("co") != extensions.end()) {
const char* value = extensions.at("co");
if (strncmp(value, "search", 6) == 0 ||
strncmp(value, "standard", 8) == 0) {
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue("co", NULL, status);
CHECK(U_SUCCESS(status));
}
}
// 20. If collation is null, let collation be "default".
// 21. Set collator.[[Collation]] to collation.
//
// We don't store the collation value as per the above two steps
// here. The collation value can be looked up from icu::Collator on
// demand, as part of Intl.Collator.prototype.resolvedOptions.
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Collator> icu_collator(
icu::Collator::createInstance(icu_locale, status));
if (U_FAILURE(status) || icu_collator.get() == nullptr) {
status = U_ZERO_ERROR;
// Remove extensions and try again.
icu::Locale no_extension_locale(icu_locale.getBaseName());
icu_collator.reset(
icu::Collator::createInstance(no_extension_locale, status));
if (U_FAILURE(status) || icu_collator.get() == nullptr) {
FATAL("Failed to create ICU collator, are ICU data files missing?");
}
}
DCHECK(U_SUCCESS(status));
CHECK_NOT_NULL(icu_collator.get());
// 22. If relevantExtensionKeys contains "kn", then
// a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
//
// If the numeric value is passed in through the options object,
// then we use it. Otherwise, we check if the numeric value is
// passed in through the unicode extensions.
status = U_ZERO_ERROR;
if (found_numeric.FromJust()) {
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
} else if (extensions.find("kn") != extensions.end()) {
const char* value = extensions.at("kn");
numeric = (strncmp(value, "true", 4) == 0);
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
}
// 23. If relevantExtensionKeys contains "kf", then
// a. Set collator.[[CaseFirst]] to r.[[kf]].
//
// If the caseFirst value is passed in through the options object,
// then we use it. Otherwise, we check if the caseFirst value is
// passed in through the unicode extensions.
if (found_case_first.FromJust()) {
const char* case_first_cstr = case_first_str.get();
SetCaseFirstOption(icu_collator.get(), case_first_cstr);
} else if (extensions.find("kf") != extensions.end()) {
const char* value = extensions.at("kf");
SetCaseFirstOption(icu_collator.get(), value);
}
// Normalization is always on, by the spec. We are free to optimize
// if the strings are already normalized (but we don't have a way to tell
// that right now).
status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
CHECK(U_SUCCESS(status));
// 24. Let sensitivity be ? GetOption(options, "sensitivity",
// "string", « "base", "accent", "case", "variant" », undefined).
values = {"base", "accent", "case", "variant"};
std::unique_ptr<char[]> sensitivity_str = nullptr;
Maybe<bool> found_sensitivity =
Intl::GetStringOption(isolate, options, "sensitivity", values,
"Intl.Collator", &sensitivity_str);
MAYBE_RETURN(found_sensitivity, MaybeHandle<JSCollator>());
// 25. If sensitivity is undefined, then
if (!found_sensitivity.FromJust()) {
// 25. a. If usage is "sort", then
if (usage == Usage::SORT) {
// 25. a. i. Let sensitivity be "variant".
// 26. Set collator.[[Sensitivity]] to sensitivity.
icu_collator->setStrength(icu::Collator::TERTIARY);
}
} else {
DCHECK(found_sensitivity.FromJust());
const char* sensitivity_cstr = sensitivity_str.get();
DCHECK_NOT_NULL(sensitivity_cstr);
// 26. Set collator.[[Sensitivity]] to sensitivity.
if (strncmp(sensitivity_cstr, "base", 4) == 0) {
icu_collator->setStrength(icu::Collator::PRIMARY);
} else if (strncmp(sensitivity_cstr, "accent", 6) == 0) {
icu_collator->setStrength(icu::Collator::SECONDARY);
} else if (strncmp(sensitivity_cstr, "case", 4) == 0) {
icu_collator->setStrength(icu::Collator::PRIMARY);
status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
CHECK(U_SUCCESS(status));
} else {
DCHECK_EQ(0, strncmp(sensitivity_cstr, "variant", 7));
icu_collator->setStrength(icu::Collator::TERTIARY);
}
}
// 27.Let ignorePunctuation be ? GetOption(options,
// "ignorePunctuation", "boolean", undefined, false).
bool ignore_punctuation;
Maybe<bool> found_ignore_punctuation =
Intl::GetBoolOption(isolate, options, "ignorePunctuation",
"Intl.Collator", &ignore_punctuation);
MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>());
// 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
if (found_ignore_punctuation.FromJust() && ignore_punctuation) {
status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
CHECK(U_SUCCESS(status));
}
Handle<Managed<icu::Collator>> managed_collator =
Managed<icu::Collator>::FromUniquePtr(isolate, 0,
std::move(icu_collator));
collator->set_icu_collator(*managed_collator);
// 29. Return collator.
return collator;
}
// static
const char* JSCollator::UsageToString(Usage usage) {
switch (usage) {
case Usage::SORT:
return "sort";
case Usage::SEARCH:
return "search";
case Usage::COUNT:
UNREACHABLE();
}
}
} // namespace internal
} // namespace v8

82
src/objects/js-collator.h Normal file
View File

@ -0,0 +1,82 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_COLLATOR_H_
#define V8_OBJECTS_JS_COLLATOR_H_
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects.h"
#include "src/objects/intl-objects.h"
#include "src/objects/managed.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace v8 {
namespace internal {
class JSCollator : public JSObject {
public:
// ecma402/#sec-initializecollator
V8_WARN_UNUSED_RESULT static MaybeHandle<JSCollator> InitializeCollator(
Isolate* isolate, Handle<JSCollator> collator, Handle<Object> locales,
Handle<Object> options);
// ecma402/#sec-intl.collator.prototype.resolvedoptions
static Handle<JSObject> ResolvedOptions(Isolate* isolate,
Handle<JSCollator> collator);
DECL_CAST(JSCollator)
DECL_PRINTER(JSCollator)
DECL_VERIFIER(JSCollator)
// [[Usage]] is one of the values "sort" or "search", identifying
// the collator usage.
enum class Usage {
SORT,
SEARCH,
COUNT
};
inline void set_usage(Usage usage);
inline Usage usage() const;
static const char* UsageToString(Usage usage);
// Layout description.
#define JS_COLLATOR_FIELDS(V) \
V(kICUCollatorOffset, kPointerSize) \
V(kFlagsOffset, kPointerSize) \
/* Total size. */ \
V(kSize, 0)
DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize, JS_COLLATOR_FIELDS)
#undef JS_COLLATOR_FIELDS
// Bit positions in |flags|.
#define FLAGS_BIT_FIELDS(V, _) V(UsageBits, Usage, 1, _)
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
#undef FLAGS_BIT_FIELDS
STATIC_ASSERT(Usage::SORT <= UsageBits::kMax);
STATIC_ASSERT(Usage::SEARCH <= UsageBits::kMax);
DECL_ACCESSORS(icu_collator, Managed<icu::Collator>)
DECL_INT_ACCESSORS(flags)
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JSCollator);
};
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_COLLATOR_H_

View File

@ -20,6 +20,7 @@
#include "src/messages.h"
#include "src/objects/intl-objects-inl.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-plural-rules-inl.h"
#include "src/objects/managed.h"
#include "src/runtime/runtime-utils.h"
@ -250,42 +251,37 @@ RUNTIME_FUNCTION(Runtime_CurrencyDigits) {
return *Intl::CurrencyDigits(isolate, currency);
}
RUNTIME_FUNCTION(Runtime_CreateCollator) {
HandleScope scope(isolate);
DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
Handle<JSFunction> constructor(
isolate->native_context()->intl_collator_function(), isolate);
Handle<JSObject> collator_holder;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, collator_holder,
JSObject::New(constructor, constructor));
icu::Collator* collator =
Collator::InitializeCollator(isolate, locale, options, resolved);
CHECK_NOT_NULL(collator);
Handle<Managed<icu::Collator>> managed =
Managed<icu::Collator>::FromRawPtr(isolate, 0, collator);
collator_holder->SetEmbedderField(0, *managed);
return *collator_holder;
}
RUNTIME_FUNCTION(Runtime_InternalCompare) {
HandleScope scope(isolate);
DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(JSCollator, collator, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
return *Intl::InternalCompare(isolate, collator_holder, string1, string2);
return *Intl::InternalCompare(isolate, collator, string1, string2);
}
RUNTIME_FUNCTION(Runtime_CollatorResolvedOptions) {
HandleScope scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(Object, collator_obj, 0);
// 3. If pr does not have an [[InitializedCollator]] internal
// slot, throw a TypeError exception.
if (!collator_obj->IsJSCollator()) {
Handle<String> method_str = isolate->factory()->NewStringFromStaticChars(
"Intl.Collator.prototype.resolvedOptions");
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
method_str, collator_obj));
}
Handle<JSCollator> collator = Handle<JSCollator>::cast(collator_obj);
return *JSCollator::ResolvedOptions(isolate, collator);
}
RUNTIME_FUNCTION(Runtime_PluralRulesResolvedOptions) {

View File

@ -209,8 +209,8 @@ namespace internal {
F(BreakIteratorFirst, 1, 1) \
F(BreakIteratorNext, 1, 1) \
F(CanonicalizeLanguageTag, 1, 1) \
F(CollatorResolvedOptions, 1, 1) \
F(CreateBreakIterator, 3, 1) \
F(CreateCollator, 3, 1) \
F(CreateDateTimeFormat, 3, 1) \
F(CreateNumberFormat, 3, 1) \
F(DefineWEProperty, 3, 1) \
@ -231,7 +231,7 @@ namespace internal {
F(StringToUpperCaseIntl, 1, 1)
#else
#define FOR_EACH_INTRINSIC_INTL(F)
#endif
#endif // V8_INTL_SUPPORT
#define FOR_EACH_INTRINSIC_INTERNAL(F) \
F(AllocateInNewSpace, 1, 1) \

View File

@ -48,8 +48,6 @@ var collatorBraket = new Intl.Collator({});
assertEquals(options.locale, collatorBraket.resolvedOptions().locale);
var collatorWithOptions = new Intl.Collator(undefined, {usage: 'search'});
assertLanguageTag(%GetDefaultICULocale(),
collatorWithOptions.resolvedOptions().locale);
assertNotNull(
%regexp_internal_match(/-u(-[a-zA-Z]+-[a-zA-Z]+)*-co-search/,
collatorWithOptions.resolvedOptions().locale));
var locale = collatorWithOptions.resolvedOptions().locale;
assertLanguageTag(%GetDefaultICULocale(), locale);
assertEquals(locale.indexOf('-co-search'), -1);

View File

@ -61,5 +61,3 @@ properties.forEach(function(prop) {
});
taintProperties(properties);
var locale = Intl.Collator().resolvedOptions().locale;

View File

@ -419,10 +419,6 @@
'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL],
'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7480
'intl402/Collator/unicode-ext-seq-in-private-tag': [FAIL],
'intl402/Collator/unicode-ext-seq-with-attribute': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7481
'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL],
'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL],

View File

@ -159,17 +159,18 @@ INSTANCE_TYPES = {
1081: "JS_WEAK_SET_TYPE",
1082: "JS_TYPED_ARRAY_TYPE",
1083: "JS_DATA_VIEW_TYPE",
1084: "JS_INTL_LIST_FORMAT_TYPE",
1085: "JS_INTL_LOCALE_TYPE",
1086: "JS_INTL_PLURAL_RULES_TYPE",
1087: "JS_INTL_RELATIVE_TIME_FORMAT_TYPE",
1088: "WASM_GLOBAL_TYPE",
1089: "WASM_INSTANCE_TYPE",
1090: "WASM_MEMORY_TYPE",
1091: "WASM_MODULE_TYPE",
1092: "WASM_TABLE_TYPE",
1093: "JS_BOUND_FUNCTION_TYPE",
1094: "JS_FUNCTION_TYPE",
1084: "JS_INTL_COLLATOR_TYPE",
1085: "JS_INTL_LIST_FORMAT_TYPE",
1086: "JS_INTL_LOCALE_TYPE",
1087: "JS_INTL_PLURAL_RULES_TYPE",
1088: "JS_INTL_RELATIVE_TIME_FORMAT_TYPE",
1089: "WASM_GLOBAL_TYPE",
1090: "WASM_INSTANCE_TYPE",
1091: "WASM_MEMORY_TYPE",
1092: "WASM_MODULE_TYPE",
1093: "WASM_TABLE_TYPE",
1094: "JS_BOUND_FUNCTION_TYPE",
1095: "JS_FUNCTION_TYPE",
}
# List of known V8 maps.
@ -284,33 +285,33 @@ KNOWN_MAPS = {
("RO_SPACE", 0x047b1): (171, "Tuple2Map"),
("RO_SPACE", 0x04ae9): (161, "InterceptorInfoMap"),
("RO_SPACE", 0x04bf1): (169, "ScriptMap"),
("RO_SPACE", 0x09a19): (154, "AccessorInfoMap"),
("RO_SPACE", 0x09a69): (153, "AccessCheckInfoMap"),
("RO_SPACE", 0x09ab9): (155, "AccessorPairMap"),
("RO_SPACE", 0x09b09): (156, "AliasedArgumentsEntryMap"),
("RO_SPACE", 0x09b59): (157, "AllocationMementoMap"),
("RO_SPACE", 0x09ba9): (158, "AsyncGeneratorRequestMap"),
("RO_SPACE", 0x09bf9): (159, "DebugInfoMap"),
("RO_SPACE", 0x09c49): (160, "FunctionTemplateInfoMap"),
("RO_SPACE", 0x09c99): (162, "InterpreterDataMap"),
("RO_SPACE", 0x09ce9): (163, "ModuleInfoEntryMap"),
("RO_SPACE", 0x09d39): (164, "ModuleMap"),
("RO_SPACE", 0x09d89): (165, "ObjectTemplateInfoMap"),
("RO_SPACE", 0x09dd9): (166, "PromiseCapabilityMap"),
("RO_SPACE", 0x09e29): (167, "PromiseReactionMap"),
("RO_SPACE", 0x09e79): (168, "PrototypeInfoMap"),
("RO_SPACE", 0x09ec9): (170, "StackFrameInfoMap"),
("RO_SPACE", 0x09f19): (172, "Tuple3Map"),
("RO_SPACE", 0x09f69): (173, "ArrayBoilerplateDescriptionMap"),
("RO_SPACE", 0x09fb9): (174, "WasmDebugInfoMap"),
("RO_SPACE", 0x0a009): (175, "WasmExportedFunctionDataMap"),
("RO_SPACE", 0x0a059): (176, "CallableTaskMap"),
("RO_SPACE", 0x0a0a9): (177, "CallbackTaskMap"),
("RO_SPACE", 0x0a0f9): (178, "PromiseFulfillReactionJobTaskMap"),
("RO_SPACE", 0x0a149): (179, "PromiseRejectReactionJobTaskMap"),
("RO_SPACE", 0x0a199): (180, "PromiseResolveThenableJobTaskMap"),
("RO_SPACE", 0x0a1e9): (181, "AllocationSiteMap"),
("RO_SPACE", 0x0a239): (181, "AllocationSiteMap"),
("RO_SPACE", 0x09ae1): (154, "AccessorInfoMap"),
("RO_SPACE", 0x09b31): (153, "AccessCheckInfoMap"),
("RO_SPACE", 0x09b81): (155, "AccessorPairMap"),
("RO_SPACE", 0x09bd1): (156, "AliasedArgumentsEntryMap"),
("RO_SPACE", 0x09c21): (157, "AllocationMementoMap"),
("RO_SPACE", 0x09c71): (158, "AsyncGeneratorRequestMap"),
("RO_SPACE", 0x09cc1): (159, "DebugInfoMap"),
("RO_SPACE", 0x09d11): (160, "FunctionTemplateInfoMap"),
("RO_SPACE", 0x09d61): (162, "InterpreterDataMap"),
("RO_SPACE", 0x09db1): (163, "ModuleInfoEntryMap"),
("RO_SPACE", 0x09e01): (164, "ModuleMap"),
("RO_SPACE", 0x09e51): (165, "ObjectTemplateInfoMap"),
("RO_SPACE", 0x09ea1): (166, "PromiseCapabilityMap"),
("RO_SPACE", 0x09ef1): (167, "PromiseReactionMap"),
("RO_SPACE", 0x09f41): (168, "PrototypeInfoMap"),
("RO_SPACE", 0x09f91): (170, "StackFrameInfoMap"),
("RO_SPACE", 0x09fe1): (172, "Tuple3Map"),
("RO_SPACE", 0x0a031): (173, "ArrayBoilerplateDescriptionMap"),
("RO_SPACE", 0x0a081): (174, "WasmDebugInfoMap"),
("RO_SPACE", 0x0a0d1): (175, "WasmExportedFunctionDataMap"),
("RO_SPACE", 0x0a121): (176, "CallableTaskMap"),
("RO_SPACE", 0x0a171): (177, "CallbackTaskMap"),
("RO_SPACE", 0x0a1c1): (178, "PromiseFulfillReactionJobTaskMap"),
("RO_SPACE", 0x0a211): (179, "PromiseRejectReactionJobTaskMap"),
("RO_SPACE", 0x0a261): (180, "PromiseResolveThenableJobTaskMap"),
("RO_SPACE", 0x0a2b1): (181, "AllocationSiteMap"),
("RO_SPACE", 0x0a301): (181, "AllocationSiteMap"),
("MAP_SPACE", 0x02201): (1057, "ExternalMap"),
("MAP_SPACE", 0x02251): (1072, "JSMessageObjectMap"),
}