diff --git a/BUILD.gn b/BUILD.gn index 27a7e5fc67..87ba7d1e6f 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -2158,6 +2158,9 @@ v8_source_set("v8_base") { "src/objects/intl-objects.h", "src/objects/js-array-inl.h", "src/objects/js-array.h", + "src/objects/js-collator-inl.h", + "src/objects/js-collator.cc", + "src/objects/js-collator.h", "src/objects/js-collection-inl.h", "src/objects/js-collection.h", "src/objects/js-generator-inl.h", @@ -2862,6 +2865,9 @@ v8_source_set("v8_base") { "src/objects/intl-objects-inl.h", "src/objects/intl-objects.cc", "src/objects/intl-objects.h", + "src/objects/js-collator-inl.h", + "src/objects/js-collator.cc", + "src/objects/js-collator.h", "src/objects/js-list-format-inl.h", "src/objects/js-list-format.cc", "src/objects/js-list-format.h", diff --git a/src/bootstrapper.cc b/src/bootstrapper.cc index 21c77fdd15..0d0c4c024a 100644 --- a/src/bootstrapper.cc +++ b/src/bootstrapper.cc @@ -24,6 +24,7 @@ #include "src/objects/hash-table-inl.h" #ifdef V8_INTL_SUPPORT #include "src/objects/intl-objects.h" +#include "src/objects/js-collator.h" #include "src/objects/js-list-format.h" #include "src/objects/js-locale.h" #endif // V8_INTL_SUPPORT @@ -2935,9 +2936,11 @@ void Genesis::InitializeGlobal(Handle global_object, { Handle collator_constructor = InstallFunction( - isolate_, intl, "Collator", JS_OBJECT_TYPE, Collator::kSize, 0, - factory->the_hole_value(), Builtins::kIllegal); - native_context()->set_intl_collator_function(*collator_constructor); + isolate_, intl, "Collator", JS_INTL_COLLATOR_TYPE, JSCollator::kSize, + 0, factory->the_hole_value(), Builtins::kCollatorConstructor); + collator_constructor->shared()->DontAdaptArguments(); + InstallWithIntrinsicDefaultProto(isolate_, collator_constructor, + Context::INTL_COLLATOR_FUNCTION_INDEX); Handle prototype( JSObject::cast(collator_constructor->prototype()), isolate_); diff --git a/src/builtins/builtins-definitions.h b/src/builtins/builtins-definitions.h index c0dc7f9dad..1f9ee70aa8 100644 --- a/src/builtins/builtins-definitions.h +++ b/src/builtins/builtins-definitions.h @@ -1324,6 +1324,8 @@ namespace internal { BUILTIN_LIST_BASE(CPP, API, TFJ, TFC, TFS, TFH, ASM) \ BUILTIN_LIST_FROM_DSL(CPP, API, TFJ, TFC, TFS, TFH, ASM) \ \ + /* ecma402 #sec-intl.collator */ \ + CPP(CollatorConstructor) \ TFS(StringToLowerCaseIntl, kString) \ /* ES #sec-string.prototype.tolowercase */ \ TFJ(StringPrototypeToLowerCaseIntl, 0, kReceiver) \ diff --git a/src/builtins/builtins-intl.cc b/src/builtins/builtins-intl.cc index d59e4978e7..564fec15dc 100644 --- a/src/builtins/builtins-intl.cc +++ b/src/builtins/builtins-intl.cc @@ -15,6 +15,7 @@ #include "src/intl.h" #include "src/objects-inl.h" #include "src/objects/intl-objects.h" +#include "src/objects/js-collator-inl.h" #include "src/objects/js-list-format-inl.h" #include "src/objects/js-locale-inl.h" #include "src/objects/js-plural-rules-inl.h" @@ -1131,5 +1132,35 @@ BUILTIN(PluralRulesConstructor) { locales, options)); } +BUILTIN(CollatorConstructor) { + HandleScope scope(isolate); + Handle new_target; + // 1. If NewTarget is undefined, let newTarget be the active + // function object, else let newTarget be NewTarget. + if (args.new_target()->IsUndefined(isolate)) { + new_target = args.target(); + } else { + new_target = Handle::cast(args.new_target()); + } + + // [[Construct]] + Handle target = args.target(); + + Handle locales = args.atOrUndefined(isolate, 1); + Handle options = args.atOrUndefined(isolate, 2); + + // 5. Let collator be ? OrdinaryCreateFromConstructor(newTarget, + // "%CollatorPrototype%", internalSlotsList). + Handle collator_obj; + ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, collator_obj, + JSObject::New(target, new_target)); + Handle collator = Handle::cast(collator_obj); + collator->set_flags(0); + + // 6. Return ? InitializeCollator(collator, locales, options). + RETURN_RESULT_OR_FAILURE(isolate, JSCollator::InitializeCollator( + isolate, collator, locales, options)); +} + } // namespace internal } // namespace v8 diff --git a/src/compiler/types.cc b/src/compiler/types.cc index 610d090d21..408e733d7c 100644 --- a/src/compiler/types.cc +++ b/src/compiler/types.cc @@ -207,6 +207,7 @@ Type::bitset BitsetType::Lub(HeapObjectType const& type) { case JS_MESSAGE_OBJECT_TYPE: case JS_DATE_TYPE: #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: case JS_INTL_LIST_FORMAT_TYPE: case JS_INTL_LOCALE_TYPE: case JS_INTL_PLURAL_RULES_TYPE: diff --git a/src/heap-symbols.h b/src/heap-symbols.h index 5e046c8a50..51a217c09d 100644 --- a/src/heap-symbols.h +++ b/src/heap-symbols.h @@ -37,9 +37,11 @@ V(call_string, "call") \ V(callee_string, "callee") \ V(caller_string, "caller") \ + V(caseFirst_string, "caseFirst") \ V(cell_value_string, "%cell_value") \ V(char_at_string, "CharAt") \ V(closure_string, "(closure)") \ + V(collation_string, "collation") \ V(column_string, "column") \ V(CompileError_string, "CompileError") \ V(configurable_string, "configurable") \ @@ -98,6 +100,7 @@ V(has_string, "has") \ V(hour_string, "hour") \ V(ignoreCase_string, "ignoreCase") \ + V(ignorePunctuation_string, "ignorePunctuation") \ V(illegal_access_string, "illegal access") \ V(illegal_argument_string, "illegal argument") \ V(index_string, "index") \ @@ -182,8 +185,9 @@ V(script_string, "script") \ V(short_string, "short") \ V(second_string, "second") \ - V(set_space_string, "set ") \ V(Set_string, "Set") \ + V(sensitivity_string, "sensitivity") \ + V(set_space_string, "set ") \ V(set_string, "set") \ V(SetIterator_string, "Set Iterator") \ V(setPrototypeOf_string, "setPrototypeOf") \ @@ -222,6 +226,7 @@ V(unicode_string, "unicode") \ V(unit_string, "unit") \ V(URIError_string, "URIError") \ + V(usage_string, "usage") \ V(use_asm_string, "use asm") \ V(use_strict_string, "use strict") \ V(value_string, "value") \ diff --git a/src/js/intl.js b/src/js/intl.js index 77a52ec80e..9fd9fc62c8 100644 --- a/src/js/intl.js +++ b/src/js/intl.js @@ -426,7 +426,12 @@ function attemptSingleLookup(availableLocales, requestedLocale) { var extensionMatch = %regexp_internal_match( GetUnicodeExtensionRE(), requestedLocale); var extension = IS_NULL(extensionMatch) ? '' : extensionMatch[0]; - return {__proto__: null, locale: availableLocale, extension: extension}; + return { + __proto__: null, + locale: availableLocale, + extension: extension, + localeWithExtension: availableLocale + extension, + }; } return UNDEFINED; } @@ -463,7 +468,8 @@ function lookupMatcher(service, requestedLocales) { return { __proto__: null, locale: 'und', - extension: '' + extension: '', + localeWithExtension: 'und', }; } @@ -780,141 +786,13 @@ DEFINE_METHOD( } ); -/** - * Initializes the given object so it's a valid Collator instance. - * Useful for subclassing. - */ -function CreateCollator(locales, options) { - if (IS_UNDEFINED(options)) { - options = {__proto__: null}; - } - - var getOption = getGetOption(options, 'collator'); - - var internalOptions = {__proto__: null}; - - %DefineWEProperty(internalOptions, 'usage', getOption( - 'usage', 'string', ['sort', 'search'], 'sort')); - - var sensitivity = getOption('sensitivity', 'string', - ['base', 'accent', 'case', 'variant']); - if (IS_UNDEFINED(sensitivity) && internalOptions.usage === 'sort') { - sensitivity = 'variant'; - } - %DefineWEProperty(internalOptions, 'sensitivity', sensitivity); - - %DefineWEProperty(internalOptions, 'ignorePunctuation', getOption( - 'ignorePunctuation', 'boolean', UNDEFINED, false)); - - var locale = resolveLocale('collator', locales, options); - - // TODO(jshin): ICU now can take kb, kc, etc. Switch over to using ICU - // directly. See Collator::InitializeCollator and - // Collator::CreateICUCollator in src/objects/intl-objects.cc - // ICU can't take kb, kc... parameters through localeID, so we need to pass - // them as options. - // One exception is -co- which has to be part of the extension, but only for - // usage: sort, and its value can't be 'standard' or 'search'. - var extensionMap = parseExtension(locale.extension); - - /** - * Map of Unicode extensions to option properties, and their values and types, - * for a collator. - */ - var COLLATOR_KEY_MAP = { - __proto__: null, - 'kn': { __proto__: null, 'property': 'numeric', 'type': 'boolean'}, - 'kf': { __proto__: null, 'property': 'caseFirst', 'type': 'string', - 'values': ['false', 'lower', 'upper']} - }; - - setOptions( - options, extensionMap, COLLATOR_KEY_MAP, getOption, internalOptions); - - var collation = 'default'; - var extension = ''; - if (HAS_OWN_PROPERTY(extensionMap, 'co') && internalOptions.usage === 'sort') { - - /** - * Allowed -u-co- values. List taken from: - * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml - */ - var ALLOWED_CO_VALUES = [ - 'big5han', 'dict', 'direct', 'ducet', 'gb2312', 'phonebk', 'phonetic', - 'pinyin', 'reformed', 'searchjl', 'stroke', 'trad', 'unihan', 'zhuyin' - ]; - - if (%ArrayIndexOf(ALLOWED_CO_VALUES, extensionMap.co, 0) !== -1) { - extension = '-u-co-' + extensionMap.co; - // ICU can't tell us what the collation is, so save user's input. - collation = extensionMap.co; - } - } else if (internalOptions.usage === 'search') { - extension = '-u-co-search'; - } - %DefineWEProperty(internalOptions, 'collation', collation); - - var requestedLocale = locale.locale + extension; - - // We define all properties C++ code may produce, to prevent security - // problems. If malicious user decides to redefine Object.prototype.locale - // we can't just use plain x.locale = 'us' or in C++ Set("locale", "us"). - // %object_define_properties will either succeed defining or throw an error. - var resolved = %object_define_properties({__proto__: null}, { - caseFirst: {writable: true}, - collation: {value: internalOptions.collation, writable: true}, - ignorePunctuation: {writable: true}, - locale: {writable: true}, - numeric: {writable: true}, - requestedLocale: {value: requestedLocale, writable: true}, - sensitivity: {writable: true}, - strength: {writable: true}, - usage: {value: internalOptions.usage, writable: true} - }); - - var collator = %CreateCollator(requestedLocale, internalOptions, resolved); - - %MarkAsInitializedIntlObjectOfType(collator, COLLATOR_TYPE); - collator[resolvedSymbol] = resolved; - - return collator; -} - - -/** - * Constructs Intl.Collator object given optional locales and options - * parameters. - * - * @constructor - */ -function CollatorConstructor() { - return IntlConstruct(this, GlobalIntlCollator, CreateCollator, new.target, - arguments); -} -%SetCode(GlobalIntlCollator, CollatorConstructor); - - /** * Collator resolvedOptions method. */ DEFINE_METHOD( GlobalIntlCollator.prototype, resolvedOptions() { - var methodName = 'resolvedOptions'; - if(!IS_RECEIVER(this)) { - throw %make_type_error(kIncompatibleMethodReceiver, methodName, this); - } - var coll = %IntlUnwrapReceiver(this, COLLATOR_TYPE, GlobalIntlCollator, - methodName, false); - return { - locale: coll[resolvedSymbol].locale, - usage: coll[resolvedSymbol].usage, - sensitivity: coll[resolvedSymbol].sensitivity, - ignorePunctuation: coll[resolvedSymbol].ignorePunctuation, - numeric: coll[resolvedSymbol].numeric, - caseFirst: coll[resolvedSymbol].caseFirst, - collation: coll[resolvedSymbol].collation - }; + return %CollatorResolvedOptions(this); } ); diff --git a/src/objects-body-descriptors-inl.h b/src/objects-body-descriptors-inl.h index fde2c4a1d0..d8351bc1c1 100644 --- a/src/objects-body-descriptors-inl.h +++ b/src/objects-body-descriptors-inl.h @@ -726,6 +726,7 @@ ReturnType BodyDescriptorApply(InstanceType type, T1 p1, T2 p2, T3 p3, T4 p4) { case JS_MESSAGE_OBJECT_TYPE: case JS_BOUND_FUNCTION_TYPE: #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: case JS_INTL_LIST_FORMAT_TYPE: case JS_INTL_LOCALE_TYPE: case JS_INTL_PLURAL_RULES_TYPE: diff --git a/src/objects-debug.cc b/src/objects-debug.cc index b6118be1ca..fbebcd5350 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -15,6 +15,9 @@ #include "src/objects-inl.h" #include "src/objects/arguments-inl.h" #include "src/objects/bigint.h" +#ifdef V8_INTL_SUPPORT +#include "src/objects/js-collator-inl.h" +#endif // V8_INTL_SUPPORT #include "src/objects/data-handler-inl.h" #include "src/objects/debug-objects-inl.h" #include "src/objects/hash-table-inl.h" @@ -353,6 +356,9 @@ void HeapObject::HeapObjectVerify(Isolate* isolate) { CodeDataContainer::cast(this)->CodeDataContainerVerify(isolate); break; #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: + JSCollator::cast(this)->JSCollatorVerify(isolate); + break; case JS_INTL_LIST_FORMAT_TYPE: JSListFormat::cast(this)->JSListFormatVerify(isolate); break; @@ -1868,6 +1874,13 @@ void InterpreterData::InterpreterDataVerify(Isolate* isolate) { } #ifdef V8_INTL_SUPPORT +void JSCollator::JSCollatorVerify(Isolate* isolate) { + CHECK(IsJSCollator()); + JSObjectVerify(isolate); + VerifyObjectField(isolate, kICUCollatorOffset); + VerifyObjectField(isolate, kFlagsOffset); +} + void JSListFormat::JSListFormatVerify(Isolate* isolate) { JSObjectVerify(isolate); VerifyObjectField(isolate, kLocaleOffset); diff --git a/src/objects-definitions.h b/src/objects-definitions.h index 2a51a34eae..501c02a1f5 100644 --- a/src/objects-definitions.h +++ b/src/objects-definitions.h @@ -217,6 +217,7 @@ namespace internal { #ifdef V8_INTL_SUPPORT #define INSTANCE_TYPE_LIST(V) \ INSTANCE_TYPE_LIST_BEFORE_INTL(V) \ + V(JS_INTL_COLLATOR_TYPE) \ V(JS_INTL_LIST_FORMAT_TYPE) \ V(JS_INTL_LOCALE_TYPE) \ V(JS_INTL_PLURAL_RULES_TYPE) \ diff --git a/src/objects-printer.cc b/src/objects-printer.cc index a58e760d07..38be36eef5 100644 --- a/src/objects-printer.cc +++ b/src/objects-printer.cc @@ -14,6 +14,9 @@ #include "src/interpreter/bytecodes.h" #include "src/objects-inl.h" #include "src/objects/arguments-inl.h" +#ifdef V8_INTL_SUPPORT +#include "src/objects/js-collator-inl.h" +#endif // V8_INTL_SUPPORT #include "src/objects/data-handler-inl.h" #include "src/objects/debug-objects-inl.h" #include "src/objects/hash-table-inl.h" @@ -305,6 +308,9 @@ void HeapObject::HeapObjectPrint(std::ostream& os) { // NOLINT JSDataView::cast(this)->JSDataViewPrint(os); break; #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: + JSCollator::cast(this)->JSCollatorPrint(os); + break; case JS_INTL_LIST_FORMAT_TYPE: JSListFormat::cast(this)->JSListFormatPrint(os); break; @@ -1955,6 +1961,13 @@ void Script::ScriptPrint(std::ostream& os) { // NOLINT } #ifdef V8_INTL_SUPPORT +void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT + JSObjectPrintHeader(os, this, "JSCollator"); + os << "\n - usage: " << JSCollator::UsageToString(usage()); + os << "\n - icu collator: " << Brief(icu_collator()); + os << "\n"; +} + void JSListFormat::JSListFormatPrint(std::ostream& os) { // NOLINT JSObjectPrintHeader(os, this, "JSListFormat"); os << "\n - locale: " << Brief(locale()); diff --git a/src/objects.cc b/src/objects.cc index 88fc927b95..f84389291c 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -59,6 +59,9 @@ #include "src/objects/debug-objects-inl.h" #include "src/objects/frame-array-inl.h" #include "src/objects/hash-table-inl.h" +#ifdef V8_INTL_SUPPORT +#include "src/objects/js-collator.h" +#endif // V8_INTL_SUPPORT #include "src/objects/js-collection-inl.h" #include "src/objects/js-generator-inl.h" #ifdef V8_INTL_SUPPORT @@ -1419,6 +1422,8 @@ int JSObject::GetHeaderSize(InstanceType type, case JS_MODULE_NAMESPACE_TYPE: return JSModuleNamespace::kHeaderSize; #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: + return JSCollator::kSize; case JS_INTL_LIST_FORMAT_TYPE: return JSListFormat::kSize; case JS_INTL_LOCALE_TYPE: @@ -3171,6 +3176,7 @@ VisitorId Map::GetVisitorId(Map* map) { case JS_REGEXP_TYPE: case JS_REGEXP_STRING_ITERATOR_TYPE: #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: case JS_INTL_LIST_FORMAT_TYPE: case JS_INTL_LOCALE_TYPE: case JS_INTL_PLURAL_RULES_TYPE: @@ -13099,6 +13105,7 @@ bool CanSubclassHaveInobjectProperties(InstanceType instance_type) { case JS_FUNCTION_TYPE: case JS_GENERATOR_OBJECT_TYPE: #ifdef V8_INTL_SUPPORT + case JS_INTL_COLLATOR_TYPE: case JS_INTL_PLURAL_RULES_TYPE: #endif case JS_ASYNC_GENERATOR_OBJECT_TYPE: diff --git a/src/objects.h b/src/objects.h index bf27eee12d..1e8578204e 100644 --- a/src/objects.h +++ b/src/objects.h @@ -75,9 +75,10 @@ // - JSDate // - JSMessageObject // - JSModuleNamespace -// - JSListFormat // If V8_INTL_SUPPORT enabled. -// - JSLocale // If V8_INTL_SUPPORT enabled. -// - JSPluralRules // If V8_INTL_SUPPORT enabled. +// - JSCollator // If V8_INTL_SUPPORT enabled. +// - JSListFormat // If V8_INTL_SUPPORT enabled. +// - JSLocale // If V8_INTL_SUPPORT enabled. +// - JSPluralRules // If V8_INTL_SUPPORT enabled. // - JSRelativeTimeFormat // If V8_INTL_SUPPORT enabled. // - WasmGlobalObject // - WasmInstanceObject @@ -583,6 +584,7 @@ enum InstanceType : uint16_t { JS_DATA_VIEW_TYPE, #ifdef V8_INTL_SUPPORT + JS_INTL_COLLATOR_TYPE, JS_INTL_LIST_FORMAT_TYPE, JS_INTL_LOCALE_TYPE, JS_INTL_PLURAL_RULES_TYPE, @@ -700,6 +702,7 @@ class JSAsyncGeneratorObject; class JSGlobalObject; class JSGlobalProxy; #ifdef V8_INTL_SUPPORT +class JSCollator; class JSListFormat; class JSLocale; class JSPluralRules; @@ -911,6 +914,7 @@ class ZoneForwardList; #ifdef V8_INTL_SUPPORT #define HEAP_OBJECT_ORDINARY_TYPE_LIST(V) \ HEAP_OBJECT_ORDINARY_TYPE_LIST_BASE(V) \ + V(JSCollator) \ V(JSListFormat) \ V(JSLocale) \ V(JSPluralRules) \ @@ -1031,6 +1035,7 @@ class ZoneForwardList; #define INSTANCE_TYPE_CHECKERS_SINGLE(V) \ INSTANCE_TYPE_CHECKERS_SINGLE_BASE(V) \ + V(JSCollator, JS_INTL_COLLATOR_TYPE) \ V(JSListFormat, JS_INTL_LIST_FORMAT_TYPE) \ V(JSLocale, JS_INTL_LOCALE_TYPE) \ V(JSPluralRules, JS_INTL_PLURAL_RULES_TYPE) \ diff --git a/src/objects/intl-objects.cc b/src/objects/intl-objects.cc index 3bd80532be..55787403d5 100644 --- a/src/objects/intl-objects.cc +++ b/src/objects/intl-objects.cc @@ -19,6 +19,7 @@ #include "src/intl.h" #include "src/isolate.h" #include "src/objects-inl.h" +#include "src/objects/js-collator-inl.h" #include "src/objects/managed.h" #include "src/objects/string.h" #include "src/property-descriptor.h" @@ -494,193 +495,6 @@ void SetResolvedNumberSettings(Isolate* isolate, const icu::Locale& icu_locale, SetResolvedNumericSettings(isolate, icu_locale, number_format, resolved); } -icu::Collator* CreateICUCollator(Isolate* isolate, - const icu::Locale& icu_locale, - Handle options) { - // Make collator from options. - icu::Collator* collator = nullptr; - UErrorCode status = U_ZERO_ERROR; - collator = icu::Collator::createInstance(icu_locale, status); - - if (U_FAILURE(status)) { - delete collator; - return nullptr; - } - - // Set flags first, and then override them with sensitivity if necessary. - bool numeric; - if (ExtractBooleanSetting(isolate, options, "numeric", &numeric)) { - collator->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, - status); - } - - // Normalization is always on, by the spec. We are free to optimize - // if the strings are already normalized (but we don't have a way to tell - // that right now). - collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); - - icu::UnicodeString case_first; - if (ExtractStringSetting(isolate, options, "caseFirst", &case_first)) { - if (case_first == UNICODE_STRING_SIMPLE("upper")) { - collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); - } else if (case_first == UNICODE_STRING_SIMPLE("lower")) { - collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); - } else { - // Default (false/off). - collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); - } - } - - icu::UnicodeString sensitivity; - if (ExtractStringSetting(isolate, options, "sensitivity", &sensitivity)) { - if (sensitivity == UNICODE_STRING_SIMPLE("base")) { - collator->setStrength(icu::Collator::PRIMARY); - } else if (sensitivity == UNICODE_STRING_SIMPLE("accent")) { - collator->setStrength(icu::Collator::SECONDARY); - } else if (sensitivity == UNICODE_STRING_SIMPLE("case")) { - collator->setStrength(icu::Collator::PRIMARY); - collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status); - } else { - // variant (default) - collator->setStrength(icu::Collator::TERTIARY); - } - } - - bool ignore; - if (ExtractBooleanSetting(isolate, options, "ignorePunctuation", &ignore)) { - if (ignore) { - collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); - } - } - - return collator; -} - -void SetResolvedCollatorSettings(Isolate* isolate, - const icu::Locale& icu_locale, - icu::Collator* collator, - Handle resolved) { - Factory* factory = isolate->factory(); - UErrorCode status = U_ZERO_ERROR; - - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("numeric"), - factory->ToBoolean( - collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON), - LanguageMode::kSloppy) - .Assert(); - - switch (collator->getAttribute(UCOL_CASE_FIRST, status)) { - case UCOL_LOWER_FIRST: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("caseFirst"), - factory->NewStringFromStaticChars("lower"), LanguageMode::kSloppy) - .Assert(); - break; - case UCOL_UPPER_FIRST: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("caseFirst"), - factory->NewStringFromStaticChars("upper"), LanguageMode::kSloppy) - .Assert(); - break; - default: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("caseFirst"), - factory->NewStringFromStaticChars("false"), LanguageMode::kSloppy) - .Assert(); - } - - switch (collator->getAttribute(UCOL_STRENGTH, status)) { - case UCOL_PRIMARY: { - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("strength"), - factory->NewStringFromStaticChars("primary"), LanguageMode::kSloppy) - .Assert(); - - // case level: true + s1 -> case, s1 -> base. - if (UCOL_ON == collator->getAttribute(UCOL_CASE_LEVEL, status)) { - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("case"), LanguageMode::kSloppy) - .Assert(); - } else { - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("base"), LanguageMode::kSloppy) - .Assert(); - } - break; - } - case UCOL_SECONDARY: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("strength"), - factory->NewStringFromStaticChars("secondary"), LanguageMode::kSloppy) - .Assert(); - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("accent"), LanguageMode::kSloppy) - .Assert(); - break; - case UCOL_TERTIARY: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("strength"), - factory->NewStringFromStaticChars("tertiary"), LanguageMode::kSloppy) - .Assert(); - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy) - .Assert(); - break; - case UCOL_QUATERNARY: - // We shouldn't get quaternary and identical from ICU, but if we do - // put them into variant. - JSObject::SetProperty(isolate, resolved, - factory->NewStringFromStaticChars("strength"), - factory->NewStringFromStaticChars("quaternary"), - LanguageMode::kSloppy) - .Assert(); - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy) - .Assert(); - break; - default: - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("strength"), - factory->NewStringFromStaticChars("identical"), LanguageMode::kSloppy) - .Assert(); - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("sensitivity"), - factory->NewStringFromStaticChars("variant"), LanguageMode::kSloppy) - .Assert(); - } - - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("ignorePunctuation"), - factory->ToBoolean(collator->getAttribute(UCOL_ALTERNATE_HANDLING, - status) == UCOL_SHIFTED), - LanguageMode::kSloppy) - .Assert(); - - // Set the locale - char result[ULOC_FULLNAME_CAPACITY]; - status = U_ZERO_ERROR; - uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY, - FALSE, &status); - if (U_SUCCESS(status)) { - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("locale"), - factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy) - .Assert(); - } else { - // This would never happen, since we got the locale from ICU. - JSObject::SetProperty( - isolate, resolved, factory->NewStringFromStaticChars("locale"), - factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy) - .Assert(); - } -} - icu::BreakIterator* CreateICUBreakIterator(Isolate* isolate, const icu::Locale& icu_locale, Handle options) { @@ -845,38 +659,6 @@ void NumberFormat::DeleteNumberFormat(const v8::WeakCallbackInfo& data) { GlobalHandles::Destroy(reinterpret_cast(data.GetParameter())); } -icu::Collator* Collator::InitializeCollator(Isolate* isolate, - Handle locale, - Handle options, - Handle resolved) { - icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale); - DCHECK(!icu_locale.isBogus()); - - icu::Collator* collator = CreateICUCollator(isolate, icu_locale, options); - if (!collator) { - // Remove extensions and try again. - icu::Locale no_extension_locale(icu_locale.getBaseName()); - collator = CreateICUCollator(isolate, no_extension_locale, options); - - if (!collator) { - FATAL("Failed to create ICU collator, are ICU data files missing?"); - } - - // Set resolved settings (pattern, numbering system). - SetResolvedCollatorSettings(isolate, no_extension_locale, collator, - resolved); - } else { - SetResolvedCollatorSettings(isolate, icu_locale, collator, resolved); - } - - CHECK_NOT_NULL(collator); - return collator; -} - -icu::Collator* Collator::UnpackCollator(Handle obj) { - return Managed::cast(obj->GetEmbedderField(0))->raw(); -} - icu::BreakIterator* V8BreakIterator::InitializeBreakIterator( Isolate* isolate, Handle locale, Handle options, Handle resolved) { @@ -1158,6 +940,9 @@ MaybeHandle Intl::UnwrapReceiver(Isolate* isolate, Intl::Type type, Handle method_name, bool check_legacy_constructor) { + DCHECK(type == Intl::Type::kCollator || type == Intl::Type::kNumberFormat || + type == Intl::Type::kDateTimeFormat || + type == Intl::Type::kBreakIterator); Handle new_receiver = receiver; if (check_legacy_constructor) { ASSIGN_RETURN_ON_EXCEPTION( @@ -1165,6 +950,20 @@ MaybeHandle Intl::UnwrapReceiver(Isolate* isolate, LegacyUnwrapReceiver(isolate, receiver, constructor, type), JSObject); } + // Collator has been ported to use regular instance types. We + // shouldn't be using Intl::IsObjectOfType anymore. + if (type == Intl::Type::kCollator) { + if (!receiver->IsJSCollator()) { + // 3. a. Throw a TypeError exception. + THROW_NEW_ERROR(isolate, + NewTypeError(MessageTemplate::kIncompatibleMethodReceiver, + method_name, receiver), + JSObject); + } + return Handle::cast(receiver); + } + + DCHECK_NE(type, Intl::Type::kCollator); // 3. If Type(new_receiver) is not Object or nf does not have an // [[Initialized...]] internal slot, then if (!Intl::IsObjectOfType(isolate, new_receiver, type)) { @@ -1886,23 +1685,24 @@ MaybeHandle Intl::StringLocaleCompare(Isolate* isolate, Handle locales, Handle options) { Factory* factory = isolate->factory(); - Handle collator_holder; + Handle collator; ASSIGN_RETURN_ON_EXCEPTION( - isolate, collator_holder, + isolate, collator, CachedOrNewService(isolate, factory->NewStringFromStaticChars("collator"), locales, options), Object); - DCHECK(Intl::IsObjectOfType(isolate, collator_holder, Intl::kCollator)); - return Intl::InternalCompare(isolate, collator_holder, string1, string2); + CHECK(collator->IsJSCollator()); + return Intl::InternalCompare(isolate, Handle::cast(collator), + string1, string2); } Handle Intl::InternalCompare(Isolate* isolate, - Handle collator_holder, + Handle collator, Handle string1, Handle string2) { Factory* factory = isolate->factory(); - icu::Collator* collator = Collator::UnpackCollator(collator_holder); - CHECK_NOT_NULL(collator); + icu::Collator* icu_collator = collator->icu_collator()->raw(); + CHECK_NOT_NULL(icu_collator); string1 = String::Flatten(isolate, string1); string2 = String::Flatten(isolate, string2); @@ -1921,7 +1721,7 @@ Handle Intl::InternalCompare(Isolate* isolate, FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1); icu::UnicodeString string_val2( FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2); - result = collator->compare(string_val1, string_val2, status); + result = icu_collator->compare(string_val1, string_val2, status); } DCHECK(U_SUCCESS(status)); diff --git a/src/objects/intl-objects.h b/src/objects/intl-objects.h index 3666d604aa..bdda6a225c 100644 --- a/src/objects/intl-objects.h +++ b/src/objects/intl-objects.h @@ -117,26 +117,6 @@ class NumberFormat { NumberFormat(); }; -class Collator { - public: - // Create a collator for the specificied locale and options. Stores the - // collator in the provided collator_holder. - static icu::Collator* InitializeCollator(Isolate* isolate, - Handle locale, - Handle options, - Handle resolved); - - // Unpacks collator object from corresponding JavaScript object. - static icu::Collator* UnpackCollator(Handle obj); - - // Layout description. - static const int kCollator = JSObject::kHeaderSize; - static const int kSize = kCollator + kPointerSize; - - private: - Collator(); -}; - class V8BreakIterator { public: // Create a BreakIterator for the specificied locale and options. Returns the @@ -313,7 +293,7 @@ class Intl { Handle locales, Handle options); V8_WARN_UNUSED_RESULT static Handle InternalCompare( - Isolate* isolate, Handle collator, Handle s1, + Isolate* isolate, Handle collator, Handle s1, Handle s2); // ecma402/#sup-properties-of-the-number-prototype-object diff --git a/src/objects/js-collator-inl.h b/src/objects/js-collator-inl.h new file mode 100644 index 0000000000..fb25809345 --- /dev/null +++ b/src/objects/js-collator-inl.h @@ -0,0 +1,42 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#ifndef V8_OBJECTS_JS_COLLATOR_INL_H_ +#define V8_OBJECTS_JS_COLLATOR_INL_H_ + +#include "src/objects-inl.h" +#include "src/objects/js-collator.h" + +// Has to be the last include (doesn't have include guards): +#include "src/objects/object-macros.h" + +namespace v8 { +namespace internal { + +ACCESSORS(JSCollator, icu_collator, Managed, kICUCollatorOffset) +SMI_ACCESSORS(JSCollator, flags, kFlagsOffset) + +inline void JSCollator::set_usage(Usage usage) { + DCHECK_LT(usage, Usage::COUNT); + int hints = flags(); + hints = UsageBits::update(hints, usage); + set_flags(hints); +} + +inline JSCollator::Usage JSCollator::usage() const { + return UsageBits::decode(flags()); +} + +CAST_ACCESSOR(JSCollator); + +} // namespace internal +} // namespace v8 + +#include "src/objects/object-macros-undef.h" + +#endif // V8_OBJECTS_JS_COLLATOR_INL_H_ diff --git a/src/objects/js-collator.cc b/src/objects/js-collator.cc new file mode 100644 index 0000000000..7786857bef --- /dev/null +++ b/src/objects/js-collator.cc @@ -0,0 +1,535 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#include "src/objects/js-collator.h" + +#include "src/isolate.h" +#include "src/objects-inl.h" +#include "src/objects/js-collator-inl.h" +#include "unicode/coll.h" +#include "unicode/locid.h" +#include "unicode/strenum.h" +#include "unicode/ucol.h" +#include "unicode/uloc.h" + +namespace v8 { +namespace internal { + +namespace { + +// TODO(gsathya): Consider internalizing the value strings. +void CreateDataPropertyForOptions(Isolate* isolate, Handle options, + Handle key, const char* value) { + CHECK_NOT_NULL(value); + Handle value_str = + isolate->factory()->NewStringFromAsciiChecked(value); + + // This is a brand new JSObject that shouldn't already have the same + // key so this shouldn't fail. + CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str, + kDontThrow) + .FromJust()); +} + +void CreateDataPropertyForOptions(Isolate* isolate, Handle options, + Handle key, bool value) { + Handle value_obj = isolate->factory()->ToBoolean(value); + + // This is a brand new JSObject that shouldn't already have the same + // key so this shouldn't fail. + CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj, + kDontThrow) + .FromJust()); +} + +} // anonymous namespace + +// static +Handle JSCollator::ResolvedOptions(Isolate* isolate, + Handle collator) { + Handle options = + isolate->factory()->NewJSObject(isolate->object_function()); + + JSCollator::Usage usage = collator->usage(); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->usage_string(), + JSCollator::UsageToString(usage)); + + icu::Collator* icu_collator = collator->icu_collator()->raw(); + CHECK_NOT_NULL(icu_collator); + + UErrorCode status = U_ZERO_ERROR; + bool numeric = + icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON; + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->numeric_string(), numeric); + + const char* case_first = nullptr; + status = U_ZERO_ERROR; + switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) { + case UCOL_LOWER_FIRST: + case_first = "lower"; + break; + case UCOL_UPPER_FIRST: + case_first = "upper"; + break; + default: + case_first = "false"; + } + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->caseFirst_string(), case_first); + + const char* sensitivity = nullptr; + status = U_ZERO_ERROR; + switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) { + case UCOL_PRIMARY: { + CHECK(U_SUCCESS(status)); + status = U_ZERO_ERROR; + // case level: true + s1 -> case, s1 -> base. + if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) { + sensitivity = "case"; + } else { + sensitivity = "base"; + } + CHECK(U_SUCCESS(status)); + break; + } + case UCOL_SECONDARY: + sensitivity = "accent"; + break; + case UCOL_TERTIARY: + sensitivity = "variant"; + break; + case UCOL_QUATERNARY: + // We shouldn't get quaternary and identical from ICU, but if we do + // put them into variant. + sensitivity = "variant"; + break; + default: + sensitivity = "variant"; + } + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->sensitivity_string(), sensitivity); + + status = U_ZERO_ERROR; + bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING, + status) == UCOL_SHIFTED; + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->ignorePunctuation_string(), + ignore_punctuation); + + status = U_ZERO_ERROR; + const char* collation; + std::unique_ptr collation_values( + icu_collator->getKeywordValues("co", status)); + // Collation wasn't provided as a keyword to icu, use default. + if (status == U_ILLEGAL_ARGUMENT_ERROR) { + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->collation_string(), "default"); + } else { + CHECK(U_SUCCESS(status)); + CHECK_NOT_NULL(collation_values.get()); + + int32_t length; + status = U_ZERO_ERROR; + collation = collation_values->next(&length, status); + CHECK(U_SUCCESS(status)); + + // There has to be at least one value. + CHECK_NOT_NULL(collation); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->collation_string(), collation); + + status = U_ZERO_ERROR; + collation_values->reset(status); + CHECK(U_SUCCESS(status)); + } + + status = U_ZERO_ERROR; + icu::Locale icu_locale = icu_collator->getLocale(ULOC_VALID_LOCALE, status); + CHECK(U_SUCCESS(status)); + + char result[ULOC_FULLNAME_CAPACITY]; + status = U_ZERO_ERROR; + uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY, + FALSE, &status); + CHECK(U_SUCCESS(status)); + + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->locale_string(), result); + + return options; +} + +namespace { + +std::map LookupUnicodeExtensions( + icu::Locale& icu_locale, std::set& relevant_keys) { + std::map extensions; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr keywords( + icu_locale.createKeywords(status)); + if (U_FAILURE(status)) return extensions; + + if (!keywords) return extensions; + char value[ULOC_FULLNAME_CAPACITY]; + + int32_t length; + status = U_ZERO_ERROR; + for (const char* keyword = keywords->next(&length, status); + keyword != nullptr; keyword = keywords->next(&length, status)) { + // Ignore failures in ICU and skip to the next keyword. + // + // This is fine.™ + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + + icu_locale.getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status); + + // Ignore failures in ICU and skip to the next keyword. + // + // This is fine.™ + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + + const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword); + + // Ignore keywords that we don't recognize - spec allows that. + if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { + const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); + extensions.insert( + std::pair(bcp47_key, bcp47_value)); + } + } + + return extensions; +} + +void SetCaseFirstOption(icu::Collator* icu_collator, const char* value) { + CHECK_NOT_NULL(icu_collator); + CHECK_NOT_NULL(value); + UErrorCode status = U_ZERO_ERROR; + if (strncmp(value, "upper", 5) == 0) { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); + } else if (strncmp(value, "lower", 5) == 0) { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); + } else { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); + } + CHECK(U_SUCCESS(status)); +} + +} // anonymous namespace + +// static +MaybeHandle JSCollator::InitializeCollator( + Isolate* isolate, Handle collator, Handle locales, + Handle options_obj) { + // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). + Handle requested_locales; + ASSIGN_RETURN_ON_EXCEPTION(isolate, requested_locales, + Intl::CanonicalizeLocaleListJS(isolate, locales), + JSCollator); + + // 2. If options is undefined, then + if (options_obj->IsUndefined(isolate)) { + // 2. a. Let options be ObjectCreate(null). + options_obj = isolate->factory()->NewJSObjectWithNullProto(); + } else { + // 3. Else + // 3. a. Let options be ? ToObject(options). + ASSIGN_RETURN_ON_EXCEPTION( + isolate, options_obj, + Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator); + } + + // At this point, options_obj can either be a JSObject or a JSProxy only. + Handle options = Handle::cast(options_obj); + + // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", + // "search" », "sort"). + std::vector values = {"sort", "search"}; + std::unique_ptr usage_str = nullptr; + JSCollator::Usage usage = JSCollator::Usage::SORT; + Maybe found_usage = Intl::GetStringOption( + isolate, options, "usage", values, "Intl.Collator", &usage_str); + MAYBE_RETURN(found_usage, MaybeHandle()); + + if (found_usage.FromJust()) { + DCHECK_NOT_NULL(usage_str.get()); + if (strncmp(usage_str.get(), "search", 6) == 0) { + usage = JSCollator::Usage::SEARCH; + } + } + + // 5. Set collator.[[Usage]] to usage. + collator->set_usage(usage); + + // 6. If usage is "sort", then + // a. Let localeData be %Collator%.[[SortLocaleData]]. + // 7. Else, + // a. Let localeData be %Collator%.[[SearchLocaleData]]. + // + // The above two spec operations aren't required, the Intl spec is + // crazy. See https://github.com/tc39/ecma402/issues/256 + + // TODO(gsathya): This is currently done as part of the + // Intl::ResolveLocale call below. Fix this once resolveLocale is + // changed to not do the lookup. + // + // 9. Let matcher be ? GetOption(options, "localeMatcher", "string", + // « "lookup", "best fit" », "best fit"). + // 10. Set opt.[[localeMatcher]] to matcher. + + // 11. Let numeric be ? GetOption(options, "numeric", "boolean", + // undefined, undefined). + // 12. If numeric is not undefined, then + // a. Let numeric be ! ToString(numeric). + // + // Note: We omit the ToString(numeric) operation as it's not + // observable. Intl::GetBoolOption returns a Boolean and + // ToString(Boolean) is not side-effecting. + // + // 13. Set opt.[[kn]] to numeric. + bool numeric; + Maybe found_numeric = Intl::GetBoolOption(isolate, options, "numeric", + "Intl.Collator", &numeric); + MAYBE_RETURN(found_numeric, MaybeHandle()); + + // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", + // « "upper", "lower", "false" », undefined). + // 15. Set opt.[[kf]] to caseFirst. + values = {"upper", "lower", "false"}; + std::unique_ptr case_first_str = nullptr; + Maybe found_case_first = Intl::GetStringOption( + isolate, options, "caseFirst", values, "Intl.Collator", &case_first_str); + MAYBE_RETURN(found_case_first, MaybeHandle()); + + // The relevant unicode extensions accepted by Collator as specified here: + // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots + // + // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]]. + std::set relevant_extension_keys{"co", "kn", "kf"}; + + // We don't pass the relevant_extension_keys to ResolveLocale here + // as per the spec. + // + // In ResolveLocale, the spec makes sure we only pick and use the + // relevant extension keys and ignore any other keys. Also, in + // ResolveLocale, the spec makes sure that if a given key has both a + // value in the options object and an unicode extension value, then + // we pick the value provided in the options object. + // For example: in the case of `new Intl.Collator('en-u-kn-true', { + // numeric: false })` the value `false` is used for the `numeric` + // key. + // + // Instead of performing all this validation in ResolveLocale, we + // just perform it inline below. In the future when we port + // ResolveLocale to C++, we can make all these validations generic + // and move it ResolveLocale. + // + // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], + // requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]], + // localeData). + // 18. Set collator.[[Locale]] to r.[[locale]]. + Handle r; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, r, + Intl::ResolveLocale(isolate, "collator", requested_locales, options), + JSCollator); + + Handle locale_with_extension_str = + isolate->factory()->NewStringFromStaticChars("localeWithExtension"); + Handle locale_with_extension_obj = + JSObject::GetDataProperty(r, locale_with_extension_str); + + // The locale_with_extension has to be a string. Either a user + // provided canonicalized string or the default locale. + CHECK(locale_with_extension_obj->IsString()); + Handle locale_with_extension = + Handle::cast(locale_with_extension_obj); + + icu::Locale icu_locale = + Intl::CreateICULocale(isolate, locale_with_extension); + DCHECK(!icu_locale.isBogus()); + + std::map extensions = + LookupUnicodeExtensions(icu_locale, relevant_extension_keys); + + // 19. Let collation be r.[[co]]. + // + // r.[[co]] is already set as part of the icu::Locale creation as + // icu parses unicode extensions and sets the keywords. + // + // We need to sanitize the keywords based on certain ECMAScript rules. + // + // As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots: + // The values "standard" and "search" must not be used as elements + // in any [[SortLocaleData]][locale].co and + // [[SearchLocaleData]][locale].co list. + if (extensions.find("co") != extensions.end()) { + const char* value = extensions.at("co"); + if (strncmp(value, "search", 6) == 0 || + strncmp(value, "standard", 8) == 0) { + UErrorCode status = U_ZERO_ERROR; + icu_locale.setKeywordValue("co", NULL, status); + CHECK(U_SUCCESS(status)); + } + } + + // 20. If collation is null, let collation be "default". + // 21. Set collator.[[Collation]] to collation. + // + // We don't store the collation value as per the above two steps + // here. The collation value can be looked up from icu::Collator on + // demand, as part of Intl.Collator.prototype.resolvedOptions. + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr icu_collator( + icu::Collator::createInstance(icu_locale, status)); + if (U_FAILURE(status) || icu_collator.get() == nullptr) { + status = U_ZERO_ERROR; + // Remove extensions and try again. + icu::Locale no_extension_locale(icu_locale.getBaseName()); + icu_collator.reset( + icu::Collator::createInstance(no_extension_locale, status)); + + if (U_FAILURE(status) || icu_collator.get() == nullptr) { + FATAL("Failed to create ICU collator, are ICU data files missing?"); + } + } + DCHECK(U_SUCCESS(status)); + CHECK_NOT_NULL(icu_collator.get()); + + // 22. If relevantExtensionKeys contains "kn", then + // a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true"). + // + // If the numeric value is passed in through the options object, + // then we use it. Otherwise, we check if the numeric value is + // passed in through the unicode extensions. + status = U_ZERO_ERROR; + if (found_numeric.FromJust()) { + icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, + numeric ? UCOL_ON : UCOL_OFF, status); + CHECK(U_SUCCESS(status)); + } else if (extensions.find("kn") != extensions.end()) { + const char* value = extensions.at("kn"); + + numeric = (strncmp(value, "true", 4) == 0); + + icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, + numeric ? UCOL_ON : UCOL_OFF, status); + CHECK(U_SUCCESS(status)); + } + + // 23. If relevantExtensionKeys contains "kf", then + // a. Set collator.[[CaseFirst]] to r.[[kf]]. + // + // If the caseFirst value is passed in through the options object, + // then we use it. Otherwise, we check if the caseFirst value is + // passed in through the unicode extensions. + if (found_case_first.FromJust()) { + const char* case_first_cstr = case_first_str.get(); + SetCaseFirstOption(icu_collator.get(), case_first_cstr); + } else if (extensions.find("kf") != extensions.end()) { + const char* value = extensions.at("kf"); + SetCaseFirstOption(icu_collator.get(), value); + } + + // Normalization is always on, by the spec. We are free to optimize + // if the strings are already normalized (but we don't have a way to tell + // that right now). + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); + CHECK(U_SUCCESS(status)); + + // 24. Let sensitivity be ? GetOption(options, "sensitivity", + // "string", « "base", "accent", "case", "variant" », undefined). + values = {"base", "accent", "case", "variant"}; + std::unique_ptr sensitivity_str = nullptr; + Maybe found_sensitivity = + Intl::GetStringOption(isolate, options, "sensitivity", values, + "Intl.Collator", &sensitivity_str); + MAYBE_RETURN(found_sensitivity, MaybeHandle()); + + // 25. If sensitivity is undefined, then + if (!found_sensitivity.FromJust()) { + // 25. a. If usage is "sort", then + if (usage == Usage::SORT) { + // 25. a. i. Let sensitivity be "variant". + // 26. Set collator.[[Sensitivity]] to sensitivity. + icu_collator->setStrength(icu::Collator::TERTIARY); + } + } else { + DCHECK(found_sensitivity.FromJust()); + const char* sensitivity_cstr = sensitivity_str.get(); + DCHECK_NOT_NULL(sensitivity_cstr); + + // 26. Set collator.[[Sensitivity]] to sensitivity. + if (strncmp(sensitivity_cstr, "base", 4) == 0) { + icu_collator->setStrength(icu::Collator::PRIMARY); + } else if (strncmp(sensitivity_cstr, "accent", 6) == 0) { + icu_collator->setStrength(icu::Collator::SECONDARY); + } else if (strncmp(sensitivity_cstr, "case", 4) == 0) { + icu_collator->setStrength(icu::Collator::PRIMARY); + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status); + CHECK(U_SUCCESS(status)); + } else { + DCHECK_EQ(0, strncmp(sensitivity_cstr, "variant", 7)); + icu_collator->setStrength(icu::Collator::TERTIARY); + } + } + + // 27.Let ignorePunctuation be ? GetOption(options, + // "ignorePunctuation", "boolean", undefined, false). + bool ignore_punctuation; + Maybe found_ignore_punctuation = + Intl::GetBoolOption(isolate, options, "ignorePunctuation", + "Intl.Collator", &ignore_punctuation); + MAYBE_RETURN(found_ignore_punctuation, MaybeHandle()); + + // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation. + if (found_ignore_punctuation.FromJust() && ignore_punctuation) { + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); + CHECK(U_SUCCESS(status)); + } + + Handle> managed_collator = + Managed::FromUniquePtr(isolate, 0, + std::move(icu_collator)); + collator->set_icu_collator(*managed_collator); + + // 29. Return collator. + return collator; +} + +// static +const char* JSCollator::UsageToString(Usage usage) { + switch (usage) { + case Usage::SORT: + return "sort"; + case Usage::SEARCH: + return "search"; + case Usage::COUNT: + UNREACHABLE(); + } +} + +} // namespace internal +} // namespace v8 diff --git a/src/objects/js-collator.h b/src/objects/js-collator.h new file mode 100644 index 0000000000..fa6719cc2b --- /dev/null +++ b/src/objects/js-collator.h @@ -0,0 +1,82 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#ifndef V8_OBJECTS_JS_COLLATOR_H_ +#define V8_OBJECTS_JS_COLLATOR_H_ + +#include "src/heap/factory.h" +#include "src/isolate.h" +#include "src/objects.h" +#include "src/objects/intl-objects.h" +#include "src/objects/managed.h" + +// Has to be the last include (doesn't have include guards): +#include "src/objects/object-macros.h" + +namespace v8 { +namespace internal { + +class JSCollator : public JSObject { + public: + // ecma402/#sec-initializecollator + V8_WARN_UNUSED_RESULT static MaybeHandle InitializeCollator( + Isolate* isolate, Handle collator, Handle locales, + Handle options); + + // ecma402/#sec-intl.collator.prototype.resolvedoptions + static Handle ResolvedOptions(Isolate* isolate, + Handle collator); + + DECL_CAST(JSCollator) + DECL_PRINTER(JSCollator) + DECL_VERIFIER(JSCollator) + + // [[Usage]] is one of the values "sort" or "search", identifying + // the collator usage. + enum class Usage { + SORT, + SEARCH, + + COUNT + }; + inline void set_usage(Usage usage); + inline Usage usage() const; + static const char* UsageToString(Usage usage); + +// Layout description. +#define JS_COLLATOR_FIELDS(V) \ + V(kICUCollatorOffset, kPointerSize) \ + V(kFlagsOffset, kPointerSize) \ + /* Total size. */ \ + V(kSize, 0) + + DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize, JS_COLLATOR_FIELDS) +#undef JS_COLLATOR_FIELDS + +// Bit positions in |flags|. +#define FLAGS_BIT_FIELDS(V, _) V(UsageBits, Usage, 1, _) + + DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS) +#undef FLAGS_BIT_FIELDS + + STATIC_ASSERT(Usage::SORT <= UsageBits::kMax); + STATIC_ASSERT(Usage::SEARCH <= UsageBits::kMax); + + DECL_ACCESSORS(icu_collator, Managed) + DECL_INT_ACCESSORS(flags) + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(JSCollator); +}; + +} // namespace internal +} // namespace v8 + +#include "src/objects/object-macros-undef.h" + +#endif // V8_OBJECTS_JS_COLLATOR_H_ diff --git a/src/runtime/runtime-intl.cc b/src/runtime/runtime-intl.cc index 47ec0ecd03..3cc51d704e 100644 --- a/src/runtime/runtime-intl.cc +++ b/src/runtime/runtime-intl.cc @@ -20,6 +20,7 @@ #include "src/messages.h" #include "src/objects/intl-objects-inl.h" #include "src/objects/intl-objects.h" +#include "src/objects/js-collator-inl.h" #include "src/objects/js-plural-rules-inl.h" #include "src/objects/managed.h" #include "src/runtime/runtime-utils.h" @@ -250,42 +251,37 @@ RUNTIME_FUNCTION(Runtime_CurrencyDigits) { return *Intl::CurrencyDigits(isolate, currency); } -RUNTIME_FUNCTION(Runtime_CreateCollator) { - HandleScope scope(isolate); - - DCHECK_EQ(3, args.length()); - - CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); - CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); - CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); - - Handle constructor( - isolate->native_context()->intl_collator_function(), isolate); - - Handle collator_holder; - ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, collator_holder, - JSObject::New(constructor, constructor)); - - icu::Collator* collator = - Collator::InitializeCollator(isolate, locale, options, resolved); - CHECK_NOT_NULL(collator); - - Handle> managed = - Managed::FromRawPtr(isolate, 0, collator); - collator_holder->SetEmbedderField(0, *managed); - - return *collator_holder; -} - RUNTIME_FUNCTION(Runtime_InternalCompare) { HandleScope scope(isolate); DCHECK_EQ(3, args.length()); - CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); + CONVERT_ARG_HANDLE_CHECKED(JSCollator, collator, 0); CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); - return *Intl::InternalCompare(isolate, collator_holder, string1, string2); + + return *Intl::InternalCompare(isolate, collator, string1, string2); +} + +RUNTIME_FUNCTION(Runtime_CollatorResolvedOptions) { + HandleScope scope(isolate); + + DCHECK_EQ(1, args.length()); + CONVERT_ARG_HANDLE_CHECKED(Object, collator_obj, 0); + + // 3. If pr does not have an [[InitializedCollator]] internal + // slot, throw a TypeError exception. + if (!collator_obj->IsJSCollator()) { + Handle method_str = isolate->factory()->NewStringFromStaticChars( + "Intl.Collator.prototype.resolvedOptions"); + THROW_NEW_ERROR_RETURN_FAILURE( + isolate, NewTypeError(MessageTemplate::kIncompatibleMethodReceiver, + method_str, collator_obj)); + } + + Handle collator = Handle::cast(collator_obj); + + return *JSCollator::ResolvedOptions(isolate, collator); } RUNTIME_FUNCTION(Runtime_PluralRulesResolvedOptions) { diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index d1239bcb1e..5845432c8b 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -209,8 +209,8 @@ namespace internal { F(BreakIteratorFirst, 1, 1) \ F(BreakIteratorNext, 1, 1) \ F(CanonicalizeLanguageTag, 1, 1) \ + F(CollatorResolvedOptions, 1, 1) \ F(CreateBreakIterator, 3, 1) \ - F(CreateCollator, 3, 1) \ F(CreateDateTimeFormat, 3, 1) \ F(CreateNumberFormat, 3, 1) \ F(DefineWEProperty, 3, 1) \ @@ -231,7 +231,7 @@ namespace internal { F(StringToUpperCaseIntl, 1, 1) #else #define FOR_EACH_INTRINSIC_INTL(F) -#endif +#endif // V8_INTL_SUPPORT #define FOR_EACH_INTRINSIC_INTERNAL(F) \ F(AllocateInNewSpace, 1, 1) \ diff --git a/test/intl/collator/default-locale.js b/test/intl/collator/default-locale.js index 5fc6ff4665..fd964f0620 100644 --- a/test/intl/collator/default-locale.js +++ b/test/intl/collator/default-locale.js @@ -48,8 +48,6 @@ var collatorBraket = new Intl.Collator({}); assertEquals(options.locale, collatorBraket.resolvedOptions().locale); var collatorWithOptions = new Intl.Collator(undefined, {usage: 'search'}); -assertLanguageTag(%GetDefaultICULocale(), - collatorWithOptions.resolvedOptions().locale); -assertNotNull( - %regexp_internal_match(/-u(-[a-zA-Z]+-[a-zA-Z]+)*-co-search/, - collatorWithOptions.resolvedOptions().locale)); +var locale = collatorWithOptions.resolvedOptions().locale; +assertLanguageTag(%GetDefaultICULocale(), locale); +assertEquals(locale.indexOf('-co-search'), -1); diff --git a/test/intl/collator/property-override.js b/test/intl/collator/property-override.js index bed4d7773d..1e17b1e741 100644 --- a/test/intl/collator/property-override.js +++ b/test/intl/collator/property-override.js @@ -61,5 +61,3 @@ properties.forEach(function(prop) { }); taintProperties(properties); - -var locale = Intl.Collator().resolvedOptions().locale; diff --git a/test/test262/test262.status b/test/test262/test262.status index 51c76ec5c8..396937d850 100644 --- a/test/test262/test262.status +++ b/test/test262/test262.status @@ -419,10 +419,6 @@ 'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL], - # https://bugs.chromium.org/p/v8/issues/detail?id=7480 - 'intl402/Collator/unicode-ext-seq-in-private-tag': [FAIL], - 'intl402/Collator/unicode-ext-seq-with-attribute': [FAIL], - # https://bugs.chromium.org/p/v8/issues/detail?id=7481 'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL], 'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL], diff --git a/tools/v8heapconst.py b/tools/v8heapconst.py index fc3fdceccf..40b896ffb9 100644 --- a/tools/v8heapconst.py +++ b/tools/v8heapconst.py @@ -159,17 +159,18 @@ INSTANCE_TYPES = { 1081: "JS_WEAK_SET_TYPE", 1082: "JS_TYPED_ARRAY_TYPE", 1083: "JS_DATA_VIEW_TYPE", - 1084: "JS_INTL_LIST_FORMAT_TYPE", - 1085: "JS_INTL_LOCALE_TYPE", - 1086: "JS_INTL_PLURAL_RULES_TYPE", - 1087: "JS_INTL_RELATIVE_TIME_FORMAT_TYPE", - 1088: "WASM_GLOBAL_TYPE", - 1089: "WASM_INSTANCE_TYPE", - 1090: "WASM_MEMORY_TYPE", - 1091: "WASM_MODULE_TYPE", - 1092: "WASM_TABLE_TYPE", - 1093: "JS_BOUND_FUNCTION_TYPE", - 1094: "JS_FUNCTION_TYPE", + 1084: "JS_INTL_COLLATOR_TYPE", + 1085: "JS_INTL_LIST_FORMAT_TYPE", + 1086: "JS_INTL_LOCALE_TYPE", + 1087: "JS_INTL_PLURAL_RULES_TYPE", + 1088: "JS_INTL_RELATIVE_TIME_FORMAT_TYPE", + 1089: "WASM_GLOBAL_TYPE", + 1090: "WASM_INSTANCE_TYPE", + 1091: "WASM_MEMORY_TYPE", + 1092: "WASM_MODULE_TYPE", + 1093: "WASM_TABLE_TYPE", + 1094: "JS_BOUND_FUNCTION_TYPE", + 1095: "JS_FUNCTION_TYPE", } # List of known V8 maps. @@ -284,33 +285,33 @@ KNOWN_MAPS = { ("RO_SPACE", 0x047b1): (171, "Tuple2Map"), ("RO_SPACE", 0x04ae9): (161, "InterceptorInfoMap"), ("RO_SPACE", 0x04bf1): (169, "ScriptMap"), - ("RO_SPACE", 0x09a19): (154, "AccessorInfoMap"), - ("RO_SPACE", 0x09a69): (153, "AccessCheckInfoMap"), - ("RO_SPACE", 0x09ab9): (155, "AccessorPairMap"), - ("RO_SPACE", 0x09b09): (156, "AliasedArgumentsEntryMap"), - ("RO_SPACE", 0x09b59): (157, "AllocationMementoMap"), - ("RO_SPACE", 0x09ba9): (158, "AsyncGeneratorRequestMap"), - ("RO_SPACE", 0x09bf9): (159, "DebugInfoMap"), - ("RO_SPACE", 0x09c49): (160, "FunctionTemplateInfoMap"), - ("RO_SPACE", 0x09c99): (162, "InterpreterDataMap"), - ("RO_SPACE", 0x09ce9): (163, "ModuleInfoEntryMap"), - ("RO_SPACE", 0x09d39): (164, "ModuleMap"), - ("RO_SPACE", 0x09d89): (165, "ObjectTemplateInfoMap"), - ("RO_SPACE", 0x09dd9): (166, "PromiseCapabilityMap"), - ("RO_SPACE", 0x09e29): (167, "PromiseReactionMap"), - ("RO_SPACE", 0x09e79): (168, "PrototypeInfoMap"), - ("RO_SPACE", 0x09ec9): (170, "StackFrameInfoMap"), - ("RO_SPACE", 0x09f19): (172, "Tuple3Map"), - ("RO_SPACE", 0x09f69): (173, "ArrayBoilerplateDescriptionMap"), - ("RO_SPACE", 0x09fb9): (174, "WasmDebugInfoMap"), - ("RO_SPACE", 0x0a009): (175, "WasmExportedFunctionDataMap"), - ("RO_SPACE", 0x0a059): (176, "CallableTaskMap"), - ("RO_SPACE", 0x0a0a9): (177, "CallbackTaskMap"), - ("RO_SPACE", 0x0a0f9): (178, "PromiseFulfillReactionJobTaskMap"), - ("RO_SPACE", 0x0a149): (179, "PromiseRejectReactionJobTaskMap"), - ("RO_SPACE", 0x0a199): (180, "PromiseResolveThenableJobTaskMap"), - ("RO_SPACE", 0x0a1e9): (181, "AllocationSiteMap"), - ("RO_SPACE", 0x0a239): (181, "AllocationSiteMap"), + ("RO_SPACE", 0x09ae1): (154, "AccessorInfoMap"), + ("RO_SPACE", 0x09b31): (153, "AccessCheckInfoMap"), + ("RO_SPACE", 0x09b81): (155, "AccessorPairMap"), + ("RO_SPACE", 0x09bd1): (156, "AliasedArgumentsEntryMap"), + ("RO_SPACE", 0x09c21): (157, "AllocationMementoMap"), + ("RO_SPACE", 0x09c71): (158, "AsyncGeneratorRequestMap"), + ("RO_SPACE", 0x09cc1): (159, "DebugInfoMap"), + ("RO_SPACE", 0x09d11): (160, "FunctionTemplateInfoMap"), + ("RO_SPACE", 0x09d61): (162, "InterpreterDataMap"), + ("RO_SPACE", 0x09db1): (163, "ModuleInfoEntryMap"), + ("RO_SPACE", 0x09e01): (164, "ModuleMap"), + ("RO_SPACE", 0x09e51): (165, "ObjectTemplateInfoMap"), + ("RO_SPACE", 0x09ea1): (166, "PromiseCapabilityMap"), + ("RO_SPACE", 0x09ef1): (167, "PromiseReactionMap"), + ("RO_SPACE", 0x09f41): (168, "PrototypeInfoMap"), + ("RO_SPACE", 0x09f91): (170, "StackFrameInfoMap"), + ("RO_SPACE", 0x09fe1): (172, "Tuple3Map"), + ("RO_SPACE", 0x0a031): (173, "ArrayBoilerplateDescriptionMap"), + ("RO_SPACE", 0x0a081): (174, "WasmDebugInfoMap"), + ("RO_SPACE", 0x0a0d1): (175, "WasmExportedFunctionDataMap"), + ("RO_SPACE", 0x0a121): (176, "CallableTaskMap"), + ("RO_SPACE", 0x0a171): (177, "CallbackTaskMap"), + ("RO_SPACE", 0x0a1c1): (178, "PromiseFulfillReactionJobTaskMap"), + ("RO_SPACE", 0x0a211): (179, "PromiseRejectReactionJobTaskMap"), + ("RO_SPACE", 0x0a261): (180, "PromiseResolveThenableJobTaskMap"), + ("RO_SPACE", 0x0a2b1): (181, "AllocationSiteMap"), + ("RO_SPACE", 0x0a301): (181, "AllocationSiteMap"), ("MAP_SPACE", 0x02201): (1057, "ExternalMap"), ("MAP_SPACE", 0x02251): (1072, "JSMessageObjectMap"), }