Port BuildLanguageTagREs from Javascript to C++.

Bug: v8:5751
Test: None (no caller yet)

Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Change-Id: Ifbe243b945ae6e1750e9db8430da178ae137e34f
Reviewed-on: https://chromium-review.googlesource.com/1117303
Commit-Queue: Brian Stell <bstell@chromium.org>
Reviewed-by: Sergiy Byelozyorov <sergiyb@chromium.org>
Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org>
Reviewed-by: Jungshik Shin <jshin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54336}
This commit is contained in:
Brian Stell 2018-07-09 10:04:28 -07:00 committed by Commit Bot
parent 6335989f65
commit 3ba5445b8a
4 changed files with 181 additions and 2 deletions

View File

@ -952,6 +952,12 @@ v8_source_set("torque_generated_initializers") {
":run_torque",
]
if (v8_enable_i18n_support) {
public_deps = [
"//third_party/icu",
]
}
sources = []
foreach(module, torque_modules) {
sources += [
@ -1431,6 +1437,11 @@ v8_source_set("v8_init") {
### gcmole(all) ###
"src/setup-isolate-full.cc",
]
if (v8_enable_i18n_support) {
public_deps = [
"//third_party/icu",
]
}
configs = [ ":internal_config" ]
}
@ -3072,6 +3083,10 @@ v8_source_set("fuzzer_support") {
":v8_libbase",
":v8_libplatform",
]
if (v8_enable_i18n_support) {
public_deps += [ "//third_party/icu" ]
}
}
###############################################################################
@ -3550,6 +3565,12 @@ v8_source_set("wasm_module_runner") {
":torque_generated_core",
]
if (v8_enable_i18n_support) {
public_deps = [
"//third_party/icu",
]
}
configs = [
":external_config",
":internal_config_base",
@ -3627,6 +3648,12 @@ v8_source_set("lib_wasm_fuzzer_common") {
":torque_generated_core",
]
if (v8_enable_i18n_support) {
public_deps = [
"//third_party/icu",
]
}
configs = [
":external_config",
":internal_config_base",

View File

@ -62,6 +62,9 @@
#include "src/wasm/wasm-engine.h"
#include "src/wasm/wasm-objects.h"
#include "src/zone/accounting-allocator.h"
#ifdef V8_INTL_SUPPORT
#include "unicode/regex.h"
#endif // V8_INTL_SUPPORT
namespace v8 {
namespace internal {
@ -2511,6 +2514,11 @@ Isolate::Isolate()
host_import_module_dynamically_callback_(nullptr),
host_initialize_import_meta_object_callback_(nullptr),
load_start_time_ms_(0),
#ifdef V8_INTL_SUPPORT
language_singleton_regexp_matcher_(nullptr),
language_tag_regexp_matcher_(nullptr),
language_variant_regexp_matcher_(nullptr),
#endif // V8_INTL_SUPPORT
serializer_enabled_(false),
has_fatal_error_(false),
initialized_from_snapshot_(false),
@ -2733,6 +2741,17 @@ Isolate::~Isolate() {
delete date_cache_;
date_cache_ = nullptr;
#ifdef V8_INTL_SUPPORT
delete language_singleton_regexp_matcher_;
language_singleton_regexp_matcher_ = nullptr;
delete language_tag_regexp_matcher_;
language_tag_regexp_matcher_ = nullptr;
delete language_variant_regexp_matcher_;
language_variant_regexp_matcher_ = nullptr;
#endif // V8_INTL_SUPPORT
delete regexp_stack_;
regexp_stack_ = nullptr;

View File

@ -32,6 +32,16 @@
#include "src/runtime/runtime.h"
#include "src/unicode.h"
#ifdef V8_INTL_SUPPORT
#include "unicode/uversion.h" // Define U_ICU_NAMESPACE.
// 'icu' does not work. Use U_ICU_NAMESPACE.
namespace U_ICU_NAMESPACE {
class RegexMatcher;
} // namespace U_ICU_NAMESPACE
#endif // V8_INTL_SUPPORT
namespace v8 {
namespace base {
@ -1085,6 +1095,32 @@ class Isolate : private HiddenFactory {
date_cache_ = date_cache;
}
#ifdef V8_INTL_SUPPORT
icu::RegexMatcher* language_singleton_regexp_matcher() {
return language_singleton_regexp_matcher_;
}
icu::RegexMatcher* language_tag_regexp_matcher() {
return language_tag_regexp_matcher_;
}
icu::RegexMatcher* language_variant_regexp_matcher() {
return language_variant_regexp_matcher_;
}
void set_language_tag_regexp_matchers(
icu::RegexMatcher* language_singleton_regexp_matcher,
icu::RegexMatcher* language_tag_regexp_matcher,
icu::RegexMatcher* language_variant_regexp_matcher) {
DCHECK_NULL(language_singleton_regexp_matcher_);
DCHECK_NULL(language_tag_regexp_matcher_);
DCHECK_NULL(language_variant_regexp_matcher_);
language_singleton_regexp_matcher_ = language_singleton_regexp_matcher;
language_tag_regexp_matcher_ = language_tag_regexp_matcher;
language_variant_regexp_matcher_ = language_variant_regexp_matcher;
}
#endif // V8_INTL_SUPPORT
static const int kProtectorValid = 1;
static const int kProtectorInvalid = 0;
@ -1577,6 +1613,12 @@ class Isolate : private HiddenFactory {
base::Mutex rail_mutex_;
double load_start_time_ms_;
#ifdef V8_INTL_SUPPORT
icu::RegexMatcher* language_singleton_regexp_matcher_;
icu::RegexMatcher* language_tag_regexp_matcher_;
icu::RegexMatcher* language_variant_regexp_matcher_;
#endif // V8_INTL_SUPPORT
// Whether the isolate has been created for snapshotting.
bool serializer_enabled_;

View File

@ -34,6 +34,7 @@
#include "unicode/numsys.h"
#include "unicode/plurrule.h"
#include "unicode/rbbi.h"
#include "unicode/regex.h"
#include "unicode/smpdtfmt.h"
#include "unicode/timezone.h"
#include "unicode/uchar.h"
@ -1237,8 +1238,98 @@ MaybeHandle<JSReceiver> Intl::UnwrapReceiver(Isolate* isolate,
return Handle<JSReceiver>::cast(new_receiver);
}
// TODO(bstell): Convert this to C++ instead of calling out to the
// JS implementation.
// TODO(bstell): enable this anonymous namespace once these routines are called:
// * GetLanguageSingletonRegexMatcher,
// * GetLanguageTagRegexMatcher
// * GetLanguageVariantRegexMatcher
// namespace {
// TODO(bstell): Make all these a constexpr on the Intl class.
void BuildLanguageTagRegexes(Isolate* isolate) {
std::string alpha = "[a-zA-Z]";
std::string digit = "[0-9]";
std::string alphanum = "(" + alpha + "|" + digit + ")";
std::string regular =
"(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|"
"zh-min|zh-min-nan|zh-xiang)";
std::string irregular =
"(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|"
"i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|"
"i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)";
std::string grandfathered = "(" + irregular + "|" + regular + ")";
std::string private_use = "(x(-" + alphanum + "{1,8})+)";
std::string singleton = "(" + digit + "|[A-WY-Za-wy-z])";
std::string language_singleton_regexp = "^" + singleton + "$";
std::string extension = "(" + singleton + "(-" + alphanum + "{2,8})+)";
std::string variant = "(" + alphanum + "{5,8}|(" + digit + alphanum + "{3}))";
std::string language_variant_regexp = "^" + variant + "$";
std::string region = "(" + alpha + "{2}|" + digit + "{3})";
std::string script = "(" + alpha + "{4})";
std::string ext_lang = "(" + alpha + "{3}(-" + alpha + "{3}){0,2})";
std::string language = "(" + alpha + "{2,3}(-" + ext_lang + ")?|" + alpha +
"{4}|" + alpha + "{5,8})";
std::string lang_tag = language + "(-" + script + ")?(-" + region + ")?(-" +
variant + ")*(-" + extension + ")*(-" + private_use +
")?";
std::string language_tag =
"^(" + lang_tag + "|" + private_use + "|" + grandfathered + ")$";
std::string language_tag_regexp = std::string(language_tag);
UErrorCode status = U_ZERO_ERROR;
icu::RegexMatcher* language_singleton_regexp_matcher = new icu::RegexMatcher(
icu::UnicodeString::fromUTF8(language_singleton_regexp), 0, status);
icu::RegexMatcher* language_tag_regexp_matcher = new icu::RegexMatcher(
icu::UnicodeString::fromUTF8(language_tag_regexp), 0, status);
icu::RegexMatcher* language_variant_regexp_matcher = new icu::RegexMatcher(
icu::UnicodeString::fromUTF8(language_variant_regexp), 0, status);
if (!U_SUCCESS(status)) {
return;
}
isolate->set_language_tag_regexp_matchers(language_singleton_regexp_matcher,
language_tag_regexp_matcher,
language_variant_regexp_matcher);
}
icu::RegexMatcher* GetLanguageSingletonRegexMatcher(Isolate* isolate) {
icu::RegexMatcher* language_singleton_regexp_matcher =
isolate->language_singleton_regexp_matcher();
if (language_singleton_regexp_matcher == nullptr) {
BuildLanguageTagRegexes(isolate);
language_singleton_regexp_matcher =
isolate->language_singleton_regexp_matcher();
}
return language_singleton_regexp_matcher;
}
icu::RegexMatcher* GetLanguageTagRegexMatcher(Isolate* isolate) {
icu::RegexMatcher* language_tag_regexp_matcher =
isolate->language_tag_regexp_matcher();
if (language_tag_regexp_matcher == nullptr) {
BuildLanguageTagRegexes(isolate);
language_tag_regexp_matcher = isolate->language_tag_regexp_matcher();
}
return language_tag_regexp_matcher;
}
icu::RegexMatcher* GetLanguageVariantRegexMatcher(Isolate* isolate) {
icu::RegexMatcher* language_variant_regexp_matcher =
isolate->language_variant_regexp_matcher();
if (language_variant_regexp_matcher == nullptr) {
BuildLanguageTagRegexes(isolate);
language_variant_regexp_matcher =
isolate->language_variant_regexp_matcher();
}
return language_variant_regexp_matcher;
}
// } // anonymous namespace
MaybeHandle<JSObject> Intl::ResolveLocale(Isolate* isolate, const char* service,
Handle<Object> requestedLocales,
Handle<Object> options) {