v8/test/cctest/test-intl.cc
Tobias Tebbi 6181ce59fc [builtins] add Torque fast-path for String.prototype.localeCompare
This fast path works for ASCII-only strings and is similar to the
existing fast-path in C++. Important differences:
- The locale check is done at Turbofan optimization time instead of
  at runtime
- Use tables of size 256 instead of 128 to save a bounds-check when
  handling one-byte strings.
- It first performs an equality check that's optimized for detecting
  inequality quickly by comparing the strings from both ends. If the
  equality check succeeds, we are done. Otherwise chances are high
  that the strings differ according to collation level L1 already.
  Therefore, we first do an L1 check and perform the L3 check
  only when L1 didn't find a difference. This is based on the assumption
  that few strings are identical except for different capitalization.
- Use the Torque version of string flattening instead of the runtime
  version.

Bug: v8:12196
Change-Id: I2d043c1138846783f6d567b736d34063ba9301e5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3268465
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77946}
2021-11-17 12:55:13 +00:00

316 lines
11 KiB
C++

// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef V8_INTL_SUPPORT
#include "src/objects/intl-objects.h"
#include "src/objects/js-break-iterator.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-date-time-format.h"
#include "src/objects/js-list-format.h"
#include "src/objects/js-number-format.h"
#include "src/objects/js-plural-rules.h"
#include "src/objects/js-relative-time-format.h"
#include "src/objects/js-segmenter.h"
#include "src/objects/lookup.h"
#include "src/objects/objects-inl.h"
#include "src/objects/option-utils.h"
#include "test/cctest/cctest.h"
#include "unicode/coll.h"
namespace v8 {
namespace internal {
// This operator overloading enables CHECK_EQ to be used with
// std::vector<NumberFormatSpan>
bool operator==(const NumberFormatSpan& lhs, const NumberFormatSpan& rhs) {
return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
}
template <typename _CharT, typename _Traits>
std::basic_ostream<_CharT, _Traits>& operator<<(
std::basic_ostream<_CharT, _Traits>& self, const NumberFormatSpan& part) {
return self << "{" << part.field_id << "," << part.begin_pos << ","
<< part.end_pos << "}";
}
void test_flatten_regions_to_parts(
const std::vector<NumberFormatSpan>& regions,
const std::vector<NumberFormatSpan>& expected_parts) {
std::vector<NumberFormatSpan> mutable_regions = regions;
std::vector<NumberFormatSpan> parts = FlattenRegionsToParts(&mutable_regions);
CHECK_EQ(expected_parts, parts);
}
TEST(FlattenRegionsToParts) {
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(-1, 0, 10), NumberFormatSpan(1, 2, 8),
NumberFormatSpan(2, 2, 4), NumberFormatSpan(3, 6, 8),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(-1, 0, 2), NumberFormatSpan(2, 2, 4),
NumberFormatSpan(1, 4, 6), NumberFormatSpan(3, 6, 8),
NumberFormatSpan(-1, 8, 10),
});
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1),
});
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(-1, 0, 1), NumberFormatSpan(0, 0, 1),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1),
});
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1), NumberFormatSpan(-1, 0, 1),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1),
});
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(-1, 0, 10), NumberFormatSpan(1, 0, 1),
NumberFormatSpan(2, 0, 2), NumberFormatSpan(3, 0, 3),
NumberFormatSpan(4, 0, 4), NumberFormatSpan(5, 0, 5),
NumberFormatSpan(15, 5, 10), NumberFormatSpan(16, 6, 10),
NumberFormatSpan(17, 7, 10), NumberFormatSpan(18, 8, 10),
NumberFormatSpan(19, 9, 10),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(1, 0, 1), NumberFormatSpan(2, 1, 2),
NumberFormatSpan(3, 2, 3), NumberFormatSpan(4, 3, 4),
NumberFormatSpan(5, 4, 5), NumberFormatSpan(15, 5, 6),
NumberFormatSpan(16, 6, 7), NumberFormatSpan(17, 7, 8),
NumberFormatSpan(18, 8, 9), NumberFormatSpan(19, 9, 10),
});
// : 4
// : 22 33 3
// : 11111 22
// input regions: 0000000 111
// : ------------
// output parts: 0221340--231
test_flatten_regions_to_parts(
std::vector<NumberFormatSpan>{
NumberFormatSpan(-1, 0, 12), NumberFormatSpan(0, 0, 7),
NumberFormatSpan(1, 9, 12), NumberFormatSpan(1, 1, 6),
NumberFormatSpan(2, 9, 11), NumberFormatSpan(2, 1, 3),
NumberFormatSpan(3, 10, 11), NumberFormatSpan(3, 4, 6),
NumberFormatSpan(4, 5, 6),
},
std::vector<NumberFormatSpan>{
NumberFormatSpan(0, 0, 1), NumberFormatSpan(2, 1, 3),
NumberFormatSpan(1, 3, 4), NumberFormatSpan(3, 4, 5),
NumberFormatSpan(4, 5, 6), NumberFormatSpan(0, 6, 7),
NumberFormatSpan(-1, 7, 9), NumberFormatSpan(2, 9, 10),
NumberFormatSpan(3, 10, 11), NumberFormatSpan(1, 11, 12),
});
}
TEST(GetStringOption) {
LocalContext env;
Isolate* isolate = CcTest::i_isolate();
v8::Isolate* v8_isolate = env->GetIsolate();
v8::HandleScope handle_scope(v8_isolate);
Handle<JSObject> options = isolate->factory()->NewJSObjectWithNullProto();
{
// No value found
std::unique_ptr<char[]> result = nullptr;
Maybe<bool> found =
GetStringOption(isolate, options, "foo", std::vector<const char*>{},
"service", &result);
CHECK(!found.FromJust());
CHECK_NULL(result);
}
Handle<String> key = isolate->factory()->NewStringFromAsciiChecked("foo");
LookupIterator it(isolate, options, key);
CHECK(Object::SetProperty(&it, Handle<Smi>(Smi::FromInt(42), isolate),
StoreOrigin::kMaybeKeyed,
Just(ShouldThrow::kThrowOnError))
.FromJust());
{
// Value found
std::unique_ptr<char[]> result = nullptr;
Maybe<bool> found =
GetStringOption(isolate, options, "foo", std::vector<const char*>{},
"service", &result);
CHECK(found.FromJust());
CHECK_NOT_NULL(result);
CHECK_EQ(0, strcmp("42", result.get()));
}
{
// No expected value in values array
std::unique_ptr<char[]> result = nullptr;
Maybe<bool> found =
GetStringOption(isolate, options, "foo",
std::vector<const char*>{"bar"}, "service", &result);
CHECK(isolate->has_pending_exception());
CHECK(found.IsNothing());
CHECK_NULL(result);
isolate->clear_pending_exception();
}
{
// Expected value in values array
std::unique_ptr<char[]> result = nullptr;
Maybe<bool> found =
GetStringOption(isolate, options, "foo", std::vector<const char*>{"42"},
"service", &result);
CHECK(found.FromJust());
CHECK_NOT_NULL(result);
CHECK_EQ(0, strcmp("42", result.get()));
}
}
TEST(GetBoolOption) {
LocalContext env;
Isolate* isolate = CcTest::i_isolate();
v8::Isolate* v8_isolate = env->GetIsolate();
v8::HandleScope handle_scope(v8_isolate);
Handle<JSObject> options = isolate->factory()->NewJSObjectWithNullProto();
{
bool result = false;
Maybe<bool> found =
GetBoolOption(isolate, options, "foo", "service", &result);
CHECK(!found.FromJust());
CHECK(!result);
}
Handle<String> key = isolate->factory()->NewStringFromAsciiChecked("foo");
{
LookupIterator it(isolate, options, key);
Handle<Object> false_value =
handle(i::ReadOnlyRoots(isolate).false_value(), isolate);
Object::SetProperty(isolate, options, key, false_value,
StoreOrigin::kMaybeKeyed,
Just(ShouldThrow::kThrowOnError))
.Assert();
bool result = false;
Maybe<bool> found =
GetBoolOption(isolate, options, "foo", "service", &result);
CHECK(found.FromJust());
CHECK(!result);
}
{
LookupIterator it(isolate, options, key);
Handle<Object> true_value =
handle(i::ReadOnlyRoots(isolate).true_value(), isolate);
Object::SetProperty(isolate, options, key, true_value,
StoreOrigin::kMaybeKeyed,
Just(ShouldThrow::kThrowOnError))
.Assert();
bool result = false;
Maybe<bool> found =
GetBoolOption(isolate, options, "foo", "service", &result);
CHECK(found.FromJust());
CHECK(result);
}
}
TEST(GetAvailableLocales) {
std::set<std::string> locales;
locales = JSV8BreakIterator::GetAvailableLocales();
CHECK(locales.count("en-US"));
CHECK(!locales.count("abcdefg"));
locales = JSCollator::GetAvailableLocales();
CHECK(locales.count("en-US"));
locales = JSDateTimeFormat::GetAvailableLocales();
CHECK(locales.count("en-US"));
locales = JSListFormat::GetAvailableLocales();
CHECK(locales.count("en-US"));
locales = JSNumberFormat::GetAvailableLocales();
CHECK(locales.count("en-US"));
locales = JSPluralRules::GetAvailableLocales();
CHECK(locales.count("en"));
locales = JSRelativeTimeFormat::GetAvailableLocales();
CHECK(locales.count("en-US"));
locales = JSSegmenter::GetAvailableLocales();
CHECK(locales.count("en-US"));
CHECK(!locales.count("abcdefg"));
}
// Tests that the LocaleCompare fast path and generic path return the same
// comparison results for all ASCII strings.
TEST(StringLocaleCompareFastPath) {
LocalContext env;
Isolate* isolate = CcTest::i_isolate();
HandleScope handle_scope(isolate);
// We compare all single-char strings of printable ASCII characters.
std::vector<Handle<String>> ascii_strings;
for (int c = 0; c <= 0x7F; c++) {
if (!std::isprint(c)) continue;
ascii_strings.push_back(
isolate->factory()->LookupSingleCharacterStringFromCode(c));
}
Handle<JSFunction> collator_constructor = Handle<JSFunction>(
JSFunction::cast(
isolate->context().native_context().intl_collator_function()),
isolate);
Handle<Map> constructor_map =
JSFunction::GetDerivedMap(isolate, collator_constructor,
collator_constructor)
.ToHandleChecked();
Handle<Object> options(ReadOnlyRoots(isolate).undefined_value(), isolate);
static const char* const kMethodName = "StringLocaleCompareFastPath";
// For all fast locales, exhaustively compare within the printable ASCII
// range.
const std::set<std::string>& locales = JSCollator::GetAvailableLocales();
for (const std::string& locale : locales) {
Handle<String> locale_string =
isolate->factory()->NewStringFromAsciiChecked(locale.c_str());
if (Intl::CompareStringsOptionsFor(isolate->AsLocalIsolate(), locale_string,
options) !=
Intl::CompareStringsOptions::kTryFastPath) {
continue;
}
Handle<JSCollator> collator =
JSCollator::New(isolate, constructor_map, locale_string, options,
kMethodName)
.ToHandleChecked();
for (size_t i = 0; i < ascii_strings.size(); i++) {
Handle<String> lhs = ascii_strings[i];
for (size_t j = i + 1; j < ascii_strings.size(); j++) {
Handle<String> rhs = ascii_strings[j];
CHECK_EQ(
Intl::CompareStrings(isolate, *collator->icu_collator().raw(), lhs,
rhs, Intl::CompareStringsOptions::kNone),
Intl::CompareStrings(isolate, *collator->icu_collator().raw(), lhs,
rhs,
Intl::CompareStringsOptions::kTryFastPath));
}
}
}
}
} // namespace internal
} // namespace v8
#endif // V8_INTL_SUPPORT