Reland "[Intl] Sync Intl.Segmenter to latest version"
This is a reland of 482c3bbf1e
Original change's description:
> [Intl] Sync Intl.Segmenter to latest version
>
> https://tc39.es/proposal-intl-segmenter/
>
> TC39 passed Intl.Segmenter to stage 3 in Jul 21.
> This CL move our earlier prototype to the current spec.
>
> Bug: v8:6891
> Change-Id: I07234beed54f671c26bdbfb3983c5bc2fa5a29b0
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2219413
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Frank Tang <ftang@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Commit-Queue: Frank Tang <ftang@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#69080}
Bug: v8:6891
Change-Id: Ie3a02d8ddf6f95f0632f97b38b613b185abeb592
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2321118
Reviewed-by: Frank Tang <ftang@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Frank Tang <ftang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69153}
This commit is contained in:
parent
87563c3865
commit
4f87e1a045
6
BUILD.gn
6
BUILD.gn
@ -2874,6 +2874,9 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/objects/js-segmenter-inl.h",
|
||||
"src/objects/js-segmenter.cc",
|
||||
"src/objects/js-segmenter.h",
|
||||
"src/objects/js-segments-inl.h",
|
||||
"src/objects/js-segments.cc",
|
||||
"src/objects/js-segments.h",
|
||||
"src/objects/js-weak-refs-inl.h",
|
||||
"src/objects/js-weak-refs.h",
|
||||
"src/objects/keys.cc",
|
||||
@ -3724,6 +3727,9 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/objects/js-segmenter-inl.h",
|
||||
"src/objects/js-segmenter.cc",
|
||||
"src/objects/js-segmenter.h",
|
||||
"src/objects/js-segments-inl.h",
|
||||
"src/objects/js-segments.cc",
|
||||
"src/objects/js-segments.h",
|
||||
"src/runtime/runtime-intl.cc",
|
||||
"src/strings/char-predicates.cc",
|
||||
]
|
||||
|
@ -1035,16 +1035,12 @@ namespace internal {
|
||||
CPP(SegmenterPrototypeSegment) \
|
||||
/* ecma402 #sec-Intl.Segmenter.supportedLocalesOf */ \
|
||||
CPP(SegmenterSupportedLocalesOf) \
|
||||
/* ecma402 #sec-segment-iterator-prototype-breakType */ \
|
||||
CPP(SegmentIteratorPrototypeBreakType) \
|
||||
/* ecma402 #sec-segment-iterator-prototype-following */ \
|
||||
CPP(SegmentIteratorPrototypeFollowing) \
|
||||
/* ecma402 #sec-segment-iterator-prototype-preceding */ \
|
||||
CPP(SegmentIteratorPrototypePreceding) \
|
||||
/* ecma402 #sec-segment-iterator-prototype-index */ \
|
||||
CPP(SegmentIteratorPrototypeIndex) \
|
||||
/* ecma402 #sec-segment-iterator-prototype-next */ \
|
||||
CPP(SegmentIteratorPrototypeNext) \
|
||||
/* ecma402 #sec-%segmentsprototype%.containing */ \
|
||||
CPP(SegmentsPrototypeContaining) \
|
||||
/* ecma402 #sec-%segmentsprototype%-@@iterator */ \
|
||||
CPP(SegmentsPrototypeIterator) \
|
||||
/* ES #sec-string.prototype.normalize */ \
|
||||
CPP(StringPrototypeNormalizeIntl) \
|
||||
/* ecma402 #sup-string.prototype.tolocalelowercase */ \
|
||||
|
@ -28,10 +28,10 @@
|
||||
#include "src/objects/js-relative-time-format-inl.h"
|
||||
#include "src/objects/js-segment-iterator-inl.h"
|
||||
#include "src/objects/js-segmenter-inl.h"
|
||||
#include "src/objects/js-segments-inl.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/objects/property-descriptor.h"
|
||||
#include "src/objects/smi.h"
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
|
||||
namespace v8 {
|
||||
@ -968,32 +968,9 @@ BUILTIN(CollatorInternalCompare) {
|
||||
return *Intl::CompareStrings(isolate, *icu_collator, string_x, string_y);
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-breakType
|
||||
BUILTIN(SegmentIteratorPrototypeBreakType) {
|
||||
const char* const method = "get %SegmentIteratorPrototype%.breakType";
|
||||
HandleScope scope(isolate);
|
||||
|
||||
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
|
||||
return *segment_iterator->BreakType();
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-following
|
||||
BUILTIN(SegmentIteratorPrototypeFollowing) {
|
||||
const char* const method = "%SegmentIteratorPrototype%.following";
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
|
||||
|
||||
Handle<Object> from = args.atOrUndefined(isolate, 1);
|
||||
|
||||
Maybe<bool> success =
|
||||
JSSegmentIterator::Following(isolate, segment_iterator, from);
|
||||
MAYBE_RETURN(success, ReadOnlyRoots(isolate).exception());
|
||||
return *isolate->factory()->ToBoolean(success.FromJust());
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-next
|
||||
// ecma402 #sec-%segmentiteratorprototype%.next
|
||||
BUILTIN(SegmentIteratorPrototypeNext) {
|
||||
const char* const method = "%SegmentIteratorPrototype%.next";
|
||||
const char* const method = "%SegmentIterator.prototype%.next";
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
|
||||
|
||||
@ -1001,29 +978,7 @@ BUILTIN(SegmentIteratorPrototypeNext) {
|
||||
JSSegmentIterator::Next(isolate, segment_iterator));
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-preceding
|
||||
BUILTIN(SegmentIteratorPrototypePreceding) {
|
||||
const char* const method = "%SegmentIteratorPrototype%.preceding";
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
|
||||
|
||||
Handle<Object> from = args.atOrUndefined(isolate, 1);
|
||||
|
||||
Maybe<bool> success =
|
||||
JSSegmentIterator::Preceding(isolate, segment_iterator, from);
|
||||
MAYBE_RETURN(success, ReadOnlyRoots(isolate).exception());
|
||||
return *isolate->factory()->ToBoolean(success.FromJust());
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-index
|
||||
BUILTIN(SegmentIteratorPrototypeIndex) {
|
||||
const char* const method = "get %SegmentIteratorPrototype%.index";
|
||||
HandleScope scope(isolate);
|
||||
|
||||
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
|
||||
return *JSSegmentIterator::Index(isolate, segment_iterator);
|
||||
}
|
||||
|
||||
// ecma402 #sec-intl.segmenter
|
||||
BUILTIN(SegmenterConstructor) {
|
||||
HandleScope scope(isolate);
|
||||
|
||||
@ -1032,6 +987,7 @@ BUILTIN(SegmenterConstructor) {
|
||||
"Intl.Segmenter");
|
||||
}
|
||||
|
||||
// ecma402 #sec-intl.segmenter.supportedlocalesof
|
||||
BUILTIN(SegmenterSupportedLocalesOf) {
|
||||
HandleScope scope(isolate);
|
||||
Handle<Object> locales = args.atOrUndefined(isolate, 1);
|
||||
@ -1043,30 +999,52 @@ BUILTIN(SegmenterSupportedLocalesOf) {
|
||||
JSSegmenter::GetAvailableLocales(), locales, options));
|
||||
}
|
||||
|
||||
// ecma402 #sec-intl.segmenter.prototype.resolvedoptions
|
||||
BUILTIN(SegmenterPrototypeResolvedOptions) {
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegmenter, segmenter_holder,
|
||||
CHECK_RECEIVER(JSSegmenter, segmenter,
|
||||
"Intl.Segmenter.prototype.resolvedOptions");
|
||||
return *JSSegmenter::ResolvedOptions(isolate, segmenter_holder);
|
||||
return *JSSegmenter::ResolvedOptions(isolate, segmenter);
|
||||
}
|
||||
|
||||
// ecma402 #sec-Intl.Segmenter.prototype.segment
|
||||
// ecma402 #sec-intl.segmenter.prototype.segment
|
||||
BUILTIN(SegmenterPrototypeSegment) {
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegmenter, segmenter_holder,
|
||||
"Intl.Segmenter.prototype.segment");
|
||||
CHECK_RECEIVER(JSSegmenter, segmenter, "Intl.Segmenter.prototype.segment");
|
||||
Handle<Object> input_text = args.atOrUndefined(isolate, 1);
|
||||
// 3. Let string be ? ToString(string).
|
||||
Handle<String> text;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, text,
|
||||
Handle<String> string;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
|
||||
Object::ToString(isolate, input_text));
|
||||
|
||||
// 4. Return ? CreateSegmentIterator(segment, string).
|
||||
// 4. Return ? CreateSegmentsObject(segmenter, string).
|
||||
RETURN_RESULT_OR_FAILURE(isolate,
|
||||
JSSegments::Create(isolate, segmenter, string));
|
||||
}
|
||||
|
||||
// ecma402 #sec-%segmentsprototype%.containing
|
||||
BUILTIN(SegmentsPrototypeContaining) {
|
||||
const char* const method = "%Segments.prototype%.containing";
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegments, segments, method);
|
||||
Handle<Object> index = args.atOrUndefined(isolate, 1);
|
||||
|
||||
// 6. Let n be ? ToInteger(index).
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, index,
|
||||
Object::ToInteger(isolate, index));
|
||||
double const n = index->Number();
|
||||
|
||||
RETURN_RESULT_OR_FAILURE(
|
||||
isolate,
|
||||
JSSegmentIterator::Create(
|
||||
isolate, segmenter_holder->icu_break_iterator().raw()->clone(),
|
||||
segmenter_holder->granularity(), text));
|
||||
isolate, JSSegments::Containing(isolate, segments, static_cast<int>(n)));
|
||||
}
|
||||
|
||||
// ecma402 #sec-%segmentsprototype%-@@iterator
|
||||
BUILTIN(SegmentsPrototypeIterator) {
|
||||
const char* const method = "%SegmentIsPrototype%[@@iterator]";
|
||||
HandleScope scope(isolate);
|
||||
CHECK_RECEIVER(JSSegments, segments, method);
|
||||
RETURN_RESULT_OR_FAILURE(
|
||||
isolate, JSSegments::CreateSegmentIterator(isolate, segments));
|
||||
}
|
||||
|
||||
BUILTIN(V8BreakIteratorConstructor) {
|
||||
|
@ -62,6 +62,7 @@ class JSRegExpStringIterator;
|
||||
class JSRelativeTimeFormat;
|
||||
class JSSegmentIterator;
|
||||
class JSSegmenter;
|
||||
class JSSegments;
|
||||
class JSV8BreakIterator;
|
||||
class JSWeakCollection;
|
||||
class JSFinalizationRegistry;
|
||||
|
@ -218,6 +218,7 @@ Type::bitset BitsetType::Lub(const MapRefLike& map) {
|
||||
case JS_RELATIVE_TIME_FORMAT_TYPE:
|
||||
case JS_SEGMENT_ITERATOR_TYPE:
|
||||
case JS_SEGMENTER_TYPE:
|
||||
case JS_SEGMENTS_TYPE:
|
||||
#endif // V8_INTL_SUPPORT
|
||||
case JS_CONTEXT_EXTENSION_OBJECT_TYPE:
|
||||
case JS_GENERATOR_OBJECT_TYPE:
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include "src/objects/js-relative-time-format-inl.h"
|
||||
#include "src/objects/js-segment-iterator-inl.h"
|
||||
#include "src/objects/js-segmenter-inl.h"
|
||||
#include "src/objects/js-segments-inl.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
#include "src/objects/js-weak-refs-inl.h"
|
||||
#include "src/objects/literal-objects-inl.h"
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "src/objects/js-relative-time-format-inl.h"
|
||||
#include "src/objects/js-segment-iterator-inl.h"
|
||||
#include "src/objects/js-segmenter-inl.h"
|
||||
#include "src/objects/js-segments-inl.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
#include "src/compiler/node.h"
|
||||
#include "src/objects/js-weak-refs-inl.h"
|
||||
@ -2107,18 +2108,25 @@ void JSRelativeTimeFormat::JSRelativeTimeFormatPrint(
|
||||
void JSSegmentIterator::JSSegmentIteratorPrint(std::ostream& os) { // NOLINT
|
||||
JSObjectPrintHeader(os, *this, "JSSegmentIterator");
|
||||
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
|
||||
os << "\n - unicode string: " << Brief(unicode_string());
|
||||
os << "\n - granularity: " << GranularityAsString();
|
||||
os << "\n - granularity: " << GranularityAsString(GetIsolate());
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
void JSSegmenter::JSSegmenterPrint(std::ostream& os) { // NOLINT
|
||||
JSObjectPrintHeader(os, *this, "JSSegmenter");
|
||||
os << "\n - locale: " << Brief(locale());
|
||||
os << "\n - granularity: " << GranularityAsString();
|
||||
os << "\n - granularity: " << GranularityAsString(GetIsolate());
|
||||
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
|
||||
JSObjectPrintBody(os, *this);
|
||||
}
|
||||
|
||||
void JSSegments::JSSegmentsPrint(std::ostream& os) { // NOLINT
|
||||
JSObjectPrintHeader(os, *this, "JSSegments");
|
||||
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
|
||||
os << "\n - unicode string: " << Brief(unicode_string());
|
||||
os << "\n - granularity: " << GranularityAsString(GetIsolate());
|
||||
JSObjectPrintBody(os, *this);
|
||||
}
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
namespace {
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "src/objects/js-relative-time-format.h"
|
||||
#include "src/objects/js-segment-iterator.h"
|
||||
#include "src/objects/js-segmenter.h"
|
||||
#include "src/objects/js-segments.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
#include "src/objects/js-weak-refs.h"
|
||||
#include "src/objects/ordered-hash-table.h"
|
||||
@ -4315,6 +4316,12 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
|
||||
Handle<JSObject> prototype(
|
||||
JSObject::cast(segmenter_fun->instance_prototype()), isolate());
|
||||
|
||||
// #sec-intl.segmenter.prototype-@@tostringtag
|
||||
//
|
||||
// Intl.Segmenter.prototype [ @@toStringTag ]
|
||||
//
|
||||
// The initial value of the @@toStringTag property is the String value
|
||||
// "Intl.Segmenter".
|
||||
InstallToStringTag(isolate(), prototype, "Intl.Segmenter");
|
||||
|
||||
SimpleInstallFunction(isolate(), prototype, "resolvedOptions",
|
||||
@ -4325,6 +4332,32 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
|
||||
Builtins::kSegmenterPrototypeSegment, 1, false);
|
||||
}
|
||||
|
||||
{
|
||||
// Setup %SegmentsPrototype%.
|
||||
Handle<JSObject> prototype = factory()->NewJSObject(
|
||||
isolate()->object_function(), AllocationType::kOld);
|
||||
|
||||
Handle<String> name_string =
|
||||
Name::ToFunctionName(isolate(), isolate()->factory()->Segments_string())
|
||||
.ToHandleChecked();
|
||||
Handle<JSFunction> segments_fun = CreateFunction(
|
||||
isolate(), name_string, JS_SEGMENTS_TYPE, JSSegments::kHeaderSize, 0,
|
||||
prototype, Builtins::kIllegal);
|
||||
segments_fun->shared().set_native(false);
|
||||
segments_fun->shared().set_length(0);
|
||||
segments_fun->shared().DontAdaptArguments();
|
||||
|
||||
SimpleInstallFunction(isolate(), prototype, "containing",
|
||||
Builtins::kSegmentsPrototypeContaining, 1, false);
|
||||
|
||||
InstallFunctionAtSymbol(
|
||||
isolate_, prototype, factory()->iterator_symbol(), "[Symbol.iterator]",
|
||||
Builtins::kSegmentsPrototypeIterator, 0, true, DONT_ENUM);
|
||||
|
||||
Handle<Map> segments_map(segments_fun->initial_map(), isolate());
|
||||
native_context()->set_intl_segments_map(*segments_map);
|
||||
}
|
||||
|
||||
{
|
||||
// Setup %SegmentIteratorPrototype%.
|
||||
Handle<JSObject> iterator_prototype(
|
||||
@ -4334,26 +4367,17 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
|
||||
isolate()->object_function(), AllocationType::kOld);
|
||||
JSObject::ForceSetPrototype(prototype, iterator_prototype);
|
||||
|
||||
InstallToStringTag(isolate(), prototype,
|
||||
factory()->SegmentIterator_string());
|
||||
// #sec-%segmentiteratorprototype%.@@tostringtag
|
||||
//
|
||||
// %SegmentIteratorPrototype% [ @@toStringTag ]
|
||||
//
|
||||
// The initial value of the @@toStringTag property is the String value
|
||||
// "Segmenter String Iterator".
|
||||
InstallToStringTag(isolate(), prototype, "Segmenter String Iterator");
|
||||
|
||||
SimpleInstallFunction(isolate(), prototype, "next",
|
||||
Builtins::kSegmentIteratorPrototypeNext, 0, false);
|
||||
|
||||
SimpleInstallFunction(isolate(), prototype, "following",
|
||||
Builtins::kSegmentIteratorPrototypeFollowing, 0,
|
||||
false);
|
||||
|
||||
SimpleInstallFunction(isolate(), prototype, "preceding",
|
||||
Builtins::kSegmentIteratorPrototypePreceding, 0,
|
||||
false);
|
||||
|
||||
SimpleInstallGetter(isolate(), prototype, factory()->index_string(),
|
||||
Builtins::kSegmentIteratorPrototypeIndex, false);
|
||||
|
||||
SimpleInstallGetter(isolate(), prototype, factory()->breakType_string(),
|
||||
Builtins::kSegmentIteratorPrototypeBreakType, false);
|
||||
|
||||
// Setup SegmentIterator constructor.
|
||||
Handle<String> name_string =
|
||||
Name::ToFunctionName(isolate(),
|
||||
|
@ -54,6 +54,7 @@
|
||||
V(_, ignorePunctuation_string, "ignorePunctuation") \
|
||||
V(_, Invalid_Date_string, "Invalid Date") \
|
||||
V(_, integer_string, "integer") \
|
||||
V(_, isWordLike_string, "isWordLike") \
|
||||
V(_, kana_string, "kana") \
|
||||
V(_, language_string, "language") \
|
||||
V(_, letter_string, "letter") \
|
||||
@ -88,6 +89,7 @@
|
||||
V(_, second_string, "second") \
|
||||
V(_, segment_string, "segment") \
|
||||
V(_, SegmentIterator_string, "Segment Iterator") \
|
||||
V(_, Segments_string, "Segments") \
|
||||
V(_, sensitivity_string, "sensitivity") \
|
||||
V(_, sep_string, "sep") \
|
||||
V(_, shared_string, "shared") \
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "src/objects/js-relative-time-format-inl.h"
|
||||
#include "src/objects/js-segment-iterator-inl.h"
|
||||
#include "src/objects/js-segmenter-inl.h"
|
||||
#include "src/objects/js-segments-inl.h"
|
||||
#endif
|
||||
|
||||
#endif // V8_OBJECTS_CLASS_DEFINITIONS_TQ_DEPS_INL_H_
|
||||
|
@ -153,6 +153,7 @@ enum ContextLookupFlags {
|
||||
V(INTL_RELATIVE_TIME_FORMAT_FUNCTION_INDEX, JSFunction, \
|
||||
intl_relative_time_format_function) \
|
||||
V(INTL_SEGMENTER_FUNCTION_INDEX, JSFunction, intl_segmenter_function) \
|
||||
V(INTL_SEGMENTS_MAP_INDEX, Map, intl_segments_map) \
|
||||
V(INTL_SEGMENT_ITERATOR_MAP_INDEX, Map, intl_segment_iterator_map) \
|
||||
V(ITERATOR_RESULT_MAP_INDEX, Map, iterator_result_map) \
|
||||
V(JS_ARRAY_PACKED_SMI_ELEMENTS_MAP_INDEX, Map, \
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include 'src/objects/js-relative-time-format.h'
|
||||
#include 'src/objects/js-segment-iterator.h'
|
||||
#include 'src/objects/js-segmenter.h'
|
||||
#include 'src/objects/js-segments.h'
|
||||
|
||||
type DateTimeStyle extends int32 constexpr 'JSDateTimeFormat::DateTimeStyle';
|
||||
type HourCycle extends int32 constexpr 'JSDateTimeFormat::HourCycle';
|
||||
@ -115,15 +116,24 @@ extern class JSSegmenter extends JSObject {
|
||||
flags: SmiTagged<JSSegmenterFlags>;
|
||||
}
|
||||
|
||||
bitfield struct JSSegmentsFlags extends uint31 {
|
||||
granularity: JSSegmenterGranularity: 2 bit;
|
||||
}
|
||||
|
||||
@generateCppClass
|
||||
extern class JSSegments extends JSObject {
|
||||
icu_break_iterator: Foreign; // Managed<icu::BreakIterator>
|
||||
unicode_string: Foreign; // Managed<icu::UnicodeString>
|
||||
flags: SmiTagged<JSSegmentsFlags>;
|
||||
}
|
||||
|
||||
bitfield struct JSSegmentIteratorFlags extends uint31 {
|
||||
granularity: JSSegmenterGranularity: 2 bit;
|
||||
break_type_set: bool: 1 bit;
|
||||
}
|
||||
|
||||
@generateCppClass
|
||||
extern class JSSegmentIterator extends JSObject {
|
||||
icu_break_iterator: Foreign; // Managed<icu::BreakIterator>
|
||||
unicode_string: Foreign; // Managed<icu::UnicodeString>
|
||||
flags: SmiTagged<JSSegmentIteratorFlags>;
|
||||
}
|
||||
|
||||
|
@ -668,6 +668,7 @@ bool CanSubclassHaveInobjectProperties(InstanceType instance_type) {
|
||||
case JS_RELATIVE_TIME_FORMAT_TYPE:
|
||||
case JS_SEGMENT_ITERATOR_TYPE:
|
||||
case JS_SEGMENTER_TYPE:
|
||||
case JS_SEGMENTS_TYPE:
|
||||
case JS_V8_BREAK_ITERATOR_TYPE:
|
||||
#endif
|
||||
case JS_ASYNC_FUNCTION_OBJECT_TYPE:
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "src/objects/js-relative-time-format.h"
|
||||
#include "src/objects/js-segment-iterator.h"
|
||||
#include "src/objects/js-segmenter.h"
|
||||
#include "src/objects/js-segments.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
#include "src/objects/js-weak-refs.h"
|
||||
#include "src/objects/map-inl.h"
|
||||
@ -2173,6 +2174,8 @@ int JSObject::GetHeaderSize(InstanceType type,
|
||||
return JSSegmentIterator::kHeaderSize;
|
||||
case JS_SEGMENTER_TYPE:
|
||||
return JSSegmenter::kHeaderSize;
|
||||
case JS_SEGMENTS_TYPE:
|
||||
return JSSegments::kHeaderSize;
|
||||
#endif // V8_INTL_SUPPORT
|
||||
case WASM_GLOBAL_OBJECT_TYPE:
|
||||
return WasmGlobalObject::kHeaderSize;
|
||||
|
@ -1,14 +1,13 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
|
||||
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
|
||||
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
|
||||
|
||||
#include "src/objects/js-segment-iterator.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
|
||||
@ -23,11 +22,6 @@ TQ_OBJECT_CONSTRUCTORS_IMPL(JSSegmentIterator)
|
||||
// Base segment iterator accessors.
|
||||
ACCESSORS(JSSegmentIterator, icu_break_iterator, Managed<icu::BreakIterator>,
|
||||
kIcuBreakIteratorOffset)
|
||||
ACCESSORS(JSSegmentIterator, unicode_string, Managed<icu::UnicodeString>,
|
||||
kUnicodeStringOffset)
|
||||
|
||||
BIT_FIELD_ACCESSORS(JSSegmentIterator, flags, is_break_type_set,
|
||||
JSSegmentIterator::BreakTypeSetBit)
|
||||
|
||||
inline void JSSegmentIterator::set_granularity(
|
||||
JSSegmenter::Granularity granularity) {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "src/heap/factory.h"
|
||||
#include "src/objects/intl-objects.h"
|
||||
#include "src/objects/js-segment-iterator-inl.h"
|
||||
#include "src/objects/js-segments.h"
|
||||
#include "src/objects/managed.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "unicode/brkiter.h"
|
||||
@ -23,36 +24,22 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
|
||||
int32_t start,
|
||||
int32_t end) const {
|
||||
return Intl::ToString(isolate, *(unicode_string().raw()), start, end);
|
||||
}
|
||||
|
||||
Handle<String> JSSegmentIterator::GranularityAsString() const {
|
||||
switch (granularity()) {
|
||||
case JSSegmenter::Granularity::GRAPHEME:
|
||||
return GetReadOnlyRoots().grapheme_string_handle();
|
||||
case JSSegmenter::Granularity::WORD:
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
case JSSegmenter::Granularity::SENTENCE:
|
||||
return GetReadOnlyRoots().sentence_string_handle();
|
||||
}
|
||||
UNREACHABLE();
|
||||
Handle<String> JSSegmentIterator::GranularityAsString(Isolate* isolate) const {
|
||||
return JSSegmenter::GetGranularityString(isolate, granularity());
|
||||
}
|
||||
|
||||
// ecma402 #sec-createsegmentiterator
|
||||
MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
|
||||
Isolate* isolate, icu::BreakIterator* break_iterator,
|
||||
JSSegmenter::Granularity granularity, Handle<String> text) {
|
||||
CHECK_NOT_NULL(break_iterator);
|
||||
// 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
|
||||
JSSegmenter::Granularity granularity) {
|
||||
DCHECK_NOT_NULL(break_iterator);
|
||||
Handle<Map> map = Handle<Map>(
|
||||
isolate->native_context()->intl_segment_iterator_map(), isolate);
|
||||
|
||||
// 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
|
||||
break_iterator->first();
|
||||
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
|
||||
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
|
||||
Handle<Managed<icu::UnicodeString>> unicode_string =
|
||||
Intl::SetTextToBreakIterator(isolate, text, break_iterator);
|
||||
|
||||
// Now all properties are ready, so we can allocate the result object.
|
||||
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
|
||||
@ -62,230 +49,47 @@ MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
|
||||
|
||||
segment_iterator->set_flags(0);
|
||||
segment_iterator->set_granularity(granularity);
|
||||
// 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
|
||||
segment_iterator->set_icu_break_iterator(*managed_break_iterator);
|
||||
|
||||
// 3. Let iterator.[[SegmentIteratorString]] be string.
|
||||
segment_iterator->set_unicode_string(*unicode_string);
|
||||
|
||||
// 4. Let iterator.[[SegmentIteratorIndex]] be 0.
|
||||
// step 4 is stored inside break_iterator.
|
||||
|
||||
// 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
|
||||
segment_iterator->set_is_break_type_set(false);
|
||||
|
||||
return segment_iterator;
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-breakType
|
||||
Handle<Object> JSSegmentIterator::BreakType() const {
|
||||
if (!is_break_type_set()) {
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
}
|
||||
icu::BreakIterator* break_iterator = icu_break_iterator().raw();
|
||||
int32_t rule_status = break_iterator->getRuleStatus();
|
||||
switch (granularity()) {
|
||||
case JSSegmenter::Granularity::GRAPHEME:
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
case JSSegmenter::Granularity::WORD:
|
||||
if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
|
||||
// "words" that do not fit into any of other categories. Includes spaces
|
||||
// and most punctuation.
|
||||
return GetReadOnlyRoots().none_string_handle();
|
||||
}
|
||||
if ((rule_status >= UBRK_WORD_NUMBER &&
|
||||
rule_status < UBRK_WORD_NUMBER_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_LETTER &&
|
||||
rule_status < UBRK_WORD_LETTER_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_KANA &&
|
||||
rule_status < UBRK_WORD_KANA_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_IDEO &&
|
||||
rule_status < UBRK_WORD_IDEO_LIMIT)) {
|
||||
// words that appear to be numbers, letters, kana characters,
|
||||
// ideographic characters, etc
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
}
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
case JSSegmenter::Granularity::SENTENCE:
|
||||
if (rule_status >= UBRK_SENTENCE_TERM &&
|
||||
rule_status < UBRK_SENTENCE_TERM_LIMIT) {
|
||||
// sentences ending with a sentence terminator ('.', '?', '!', etc.)
|
||||
// character, possibly followed by a hard separator (CR, LF, PS, etc.)
|
||||
return GetReadOnlyRoots().term_string_handle();
|
||||
}
|
||||
if ((rule_status >= UBRK_SENTENCE_SEP &&
|
||||
rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
|
||||
// sentences that do not contain an ending sentence terminator ('.',
|
||||
// '?', '!', etc.) character, but are ended only by a hard separator
|
||||
// (CR, LF, PS, etc.) hard, or mandatory line breaks
|
||||
return GetReadOnlyRoots().sep_string_handle();
|
||||
}
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-index
|
||||
Handle<Object> JSSegmentIterator::Index(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
|
||||
icu::BreakIterator* icu_break_iterator =
|
||||
segment_iterator->icu_break_iterator().raw();
|
||||
CHECK_NOT_NULL(icu_break_iterator);
|
||||
return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-next
|
||||
// ecma402 #sec-%segmentiteratorprototype%.next
|
||||
MaybeHandle<JSReceiver> JSSegmentIterator::Next(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
|
||||
Factory* factory = isolate->factory();
|
||||
icu::BreakIterator* icu_break_iterator =
|
||||
segment_iterator->icu_break_iterator().raw();
|
||||
// 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
|
||||
int32_t prev = icu_break_iterator->current();
|
||||
// 4. Let done be AdvanceSegmentIterator(iterator, forwards).
|
||||
int32_t index = icu_break_iterator->next();
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
if (index == icu::BreakIterator::DONE) {
|
||||
// 5. If done is true, return CreateIterResultObject(undefined, true).
|
||||
// 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
|
||||
int32_t start_index = icu_break_iterator->current();
|
||||
// 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
|
||||
int32_t end_index = icu_break_iterator->next();
|
||||
|
||||
// 7. If endIndex is not finite, then
|
||||
if (end_index == icu::BreakIterator::DONE) {
|
||||
// a. Return ! CreateIterResultObject(undefined, true).
|
||||
return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
|
||||
true);
|
||||
}
|
||||
// 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
|
||||
Handle<Object> new_index = factory->NewNumberFromInt(index);
|
||||
|
||||
// 8. Let segment be the substring of string from previousIndex to
|
||||
// newIndex, inclusive of previousIndex and exclusive of newIndex.
|
||||
Handle<String> segment;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
|
||||
segment_iterator->GetSegment(isolate, prev, index),
|
||||
JSReceiver);
|
||||
// 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
|
||||
|
||||
// 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
|
||||
Handle<Object> break_type = segment_iterator->BreakType();
|
||||
// 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string,
|
||||
// startIndex, endIndex).
|
||||
|
||||
// 10. Let result be ! ObjectCreate(%ObjectPrototype%).
|
||||
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
|
||||
icu::UnicodeString string;
|
||||
icu_break_iterator->getText().getText(string);
|
||||
|
||||
// 11. Perform ! CreateDataProperty(result "segment", segment).
|
||||
CHECK(JSReceiver::CreateDataProperty(isolate, result,
|
||||
factory->segment_string(), segment,
|
||||
Just(kDontThrow))
|
||||
.FromJust());
|
||||
Handle<Object> segment_data;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(
|
||||
isolate, segment_data,
|
||||
JSSegments::CreateSegmentDataObject(
|
||||
isolate, segment_iterator->granularity(), icu_break_iterator, string,
|
||||
start_index, end_index),
|
||||
JSReceiver);
|
||||
|
||||
// 12. Perform ! CreateDataProperty(result, "breakType", breakType).
|
||||
CHECK(JSReceiver::CreateDataProperty(isolate, result,
|
||||
factory->breakType_string(), break_type,
|
||||
Just(kDontThrow))
|
||||
.FromJust());
|
||||
|
||||
// 13. Perform ! CreateDataProperty(result, "index", newIndex).
|
||||
CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
|
||||
new_index, Just(kDontThrow))
|
||||
.FromJust());
|
||||
|
||||
// 14. Return CreateIterResultObject(result, false).
|
||||
return factory->NewJSIteratorResult(result, false);
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-following
|
||||
Maybe<bool> JSSegmentIterator::Following(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
|
||||
Handle<Object> from_obj) {
|
||||
Factory* factory = isolate->factory();
|
||||
icu::BreakIterator* icu_break_iterator =
|
||||
segment_iterator->icu_break_iterator().raw();
|
||||
// 3. If from is not undefined,
|
||||
if (!from_obj->IsUndefined()) {
|
||||
// a. Let from be ? ToIndex(from).
|
||||
uint32_t from;
|
||||
Handle<Object> index;
|
||||
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
|
||||
isolate, index,
|
||||
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
|
||||
Nothing<bool>());
|
||||
if (!index->ToArrayIndex(&from)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->from_string(),
|
||||
factory->NewStringFromStaticChars("following"), index),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// b. Let length be the length of iterator.[[SegmentIteratorString]].
|
||||
uint32_t length =
|
||||
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
|
||||
|
||||
// c. If from ≥ length, throw a RangeError exception.
|
||||
if (from >= length) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->from_string(),
|
||||
factory->NewStringFromStaticChars("following"),
|
||||
from_obj),
|
||||
Nothing<bool>());
|
||||
}
|
||||
|
||||
// d. Let iterator.[[SegmentIteratorPosition]] be from.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
icu_break_iterator->following(from);
|
||||
return Just(false);
|
||||
}
|
||||
// 4. return AdvanceSegmentIterator(iterator, forward).
|
||||
// 4. .... or if direction is backwards and position is 0, return true.
|
||||
// 4. If direction is forwards and position is the length of string ... return
|
||||
// true.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-preceding
|
||||
Maybe<bool> JSSegmentIterator::Preceding(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
|
||||
Handle<Object> from_obj) {
|
||||
Factory* factory = isolate->factory();
|
||||
icu::BreakIterator* icu_break_iterator =
|
||||
segment_iterator->icu_break_iterator().raw();
|
||||
// 3. If from is not undefined,
|
||||
if (!from_obj->IsUndefined()) {
|
||||
// a. Let from be ? ToIndex(from).
|
||||
uint32_t from;
|
||||
Handle<Object> index;
|
||||
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
|
||||
isolate, index,
|
||||
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
|
||||
Nothing<bool>());
|
||||
|
||||
if (!index->ToArrayIndex(&from)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->from_string(),
|
||||
factory->NewStringFromStaticChars("preceding"), index),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// b. Let length be the length of iterator.[[SegmentIteratorString]].
|
||||
uint32_t length =
|
||||
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
|
||||
// c. If from > length or from = 0, throw a RangeError exception.
|
||||
if (from > length || from == 0) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->from_string(),
|
||||
factory->NewStringFromStaticChars("preceding"),
|
||||
from_obj),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// d. Let iterator.[[SegmentIteratorIndex]] be from.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
icu_break_iterator->preceding(from);
|
||||
return Just(false);
|
||||
}
|
||||
// 4. return AdvanceSegmentIterator(iterator, backwards).
|
||||
// 4. .... or if direction is backwards and position is 0, return true.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
|
||||
// 10. Return ! CreateIterResultObject(segmentData, false).
|
||||
return factory->NewJSIteratorResult(segment_data, false);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
@ -1,14 +1,13 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
|
||||
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
|
||||
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
|
||||
|
||||
#include "src/base/bit-field.h"
|
||||
#include "src/execution/isolate.h"
|
||||
#include "src/heap/factory.h"
|
||||
@ -34,40 +33,16 @@ class JSSegmentIterator
|
||||
// ecma402 #sec-CreateSegmentIterator
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<JSSegmentIterator> Create(
|
||||
Isolate* isolate, icu::BreakIterator* icu_break_iterator,
|
||||
JSSegmenter::Granularity granularity, Handle<String> string);
|
||||
JSSegmenter::Granularity granularity);
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-next
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<JSReceiver> Next(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder);
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-following
|
||||
static Maybe<bool> Following(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder,
|
||||
Handle<Object> from);
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-preceding
|
||||
static Maybe<bool> Preceding(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder,
|
||||
Handle<Object> from);
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-index
|
||||
static Handle<Object> Index(
|
||||
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder);
|
||||
|
||||
Handle<String> GranularityAsString() const;
|
||||
|
||||
DECL_BOOLEAN_ACCESSORS(is_break_type_set)
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-breakType
|
||||
Handle<Object> BreakType() const;
|
||||
|
||||
V8_WARN_UNUSED_RESULT MaybeHandle<String> GetSegment(Isolate* isolate,
|
||||
int32_t start,
|
||||
int32_t end) const;
|
||||
Handle<String> GranularityAsString(Isolate* isolate) const;
|
||||
|
||||
// SegmentIterator accessors.
|
||||
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
|
||||
DECL_ACCESSORS(unicode_string, Managed<icu::UnicodeString>)
|
||||
|
||||
DECL_PRINTER(JSSegmentIterator)
|
||||
|
||||
|
@ -1,14 +1,13 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTER_INL_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTER_INL_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTER_INL_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTER_INL_H_
|
||||
|
||||
#include "src/objects/js-segmenter.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
|
||||
|
@ -26,36 +26,37 @@ namespace internal {
|
||||
MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
|
||||
Handle<Object> locales,
|
||||
Handle<Object> input_options) {
|
||||
// 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
|
||||
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
|
||||
Maybe<std::vector<std::string>> maybe_requested_locales =
|
||||
Intl::CanonicalizeLocaleList(isolate, locales);
|
||||
MAYBE_RETURN(maybe_requested_locales, Handle<JSSegmenter>());
|
||||
std::vector<std::string> requested_locales =
|
||||
maybe_requested_locales.FromJust();
|
||||
|
||||
// 11. If options is undefined, then
|
||||
// 5. If options is undefined, then
|
||||
Handle<JSReceiver> options;
|
||||
if (input_options->IsUndefined(isolate)) {
|
||||
// 11. a. Let options be ObjectCreate(null).
|
||||
// a. Let options be ObjectCreate(null).
|
||||
options = isolate->factory()->NewJSObjectWithNullProto();
|
||||
// 12. Else
|
||||
} else {
|
||||
// 23. a. Let options be ? ToObject(options).
|
||||
} else { // 6. Else
|
||||
// a. Let options be ? ToObject(options).
|
||||
ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
|
||||
Object::ToObject(isolate, input_options),
|
||||
JSSegmenter);
|
||||
}
|
||||
|
||||
// 4. Let opt be a new Record.
|
||||
// 5. Let matcher be ? GetOption(options, "localeMatcher", "string",
|
||||
// 7. Let opt be a new Record.
|
||||
// 8. Let matcher be ? GetOption(options, "localeMatcher", "string",
|
||||
// « "lookup", "best fit" », "best fit").
|
||||
// 6. Set opt.[[localeMatcher]] to matcher.
|
||||
// 9. Set opt.[[localeMatcher]] to matcher.
|
||||
Maybe<Intl::MatcherOption> maybe_locale_matcher =
|
||||
Intl::GetLocaleMatcher(isolate, options, "Intl.Segmenter");
|
||||
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
|
||||
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
|
||||
|
||||
// 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
|
||||
// 10. Let localeData be %Segmenter%.[[LocaleData]].
|
||||
|
||||
// 11. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
|
||||
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
|
||||
Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
|
||||
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
|
||||
@ -66,6 +67,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
|
||||
}
|
||||
Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
|
||||
|
||||
// 12. Set segmenter.[[Locale]] to the value of r.[[locale]].
|
||||
Handle<String> locale_str =
|
||||
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
|
||||
|
||||
@ -100,32 +102,34 @@ MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
|
||||
break;
|
||||
}
|
||||
|
||||
CHECK(U_SUCCESS(status));
|
||||
CHECK_NOT_NULL(icu_break_iterator.get());
|
||||
DCHECK(U_SUCCESS(status));
|
||||
DCHECK_NOT_NULL(icu_break_iterator.get());
|
||||
|
||||
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
|
||||
Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
|
||||
std::move(icu_break_iterator));
|
||||
|
||||
// Now all properties are ready, so we can allocate the result object.
|
||||
Handle<JSSegmenter> segmenter_holder = Handle<JSSegmenter>::cast(
|
||||
Handle<JSSegmenter> segmenter = Handle<JSSegmenter>::cast(
|
||||
isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
|
||||
DisallowHeapAllocation no_gc;
|
||||
segmenter_holder->set_flags(0);
|
||||
segmenter->set_flags(0);
|
||||
|
||||
// 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
|
||||
segmenter_holder->set_locale(*locale_str);
|
||||
// 12. Set segmenter.[[Locale]] to the value of r.[[Locale]].
|
||||
segmenter->set_locale(*locale_str);
|
||||
|
||||
// 14. Set segmenter.[[SegmenterGranularity]] to granularity.
|
||||
segmenter_holder->set_granularity(granularity_enum);
|
||||
segmenter->set_granularity(granularity_enum);
|
||||
|
||||
segmenter_holder->set_icu_break_iterator(*managed_break_iterator);
|
||||
return segmenter_holder;
|
||||
segmenter->set_icu_break_iterator(*managed_break_iterator);
|
||||
|
||||
// 15. Return segmenter.
|
||||
return segmenter;
|
||||
}
|
||||
|
||||
// ecma402 #sec-Intl.Segmenter.prototype.resolvedOptions
|
||||
Handle<JSObject> JSSegmenter::ResolvedOptions(
|
||||
Isolate* isolate, Handle<JSSegmenter> segmenter_holder) {
|
||||
Handle<JSObject> JSSegmenter::ResolvedOptions(Isolate* isolate,
|
||||
Handle<JSSegmenter> segmenter) {
|
||||
Factory* factory = isolate->factory();
|
||||
// 3. Let options be ! ObjectCreate(%ObjectPrototype%).
|
||||
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
|
||||
@ -141,23 +145,29 @@ Handle<JSObject> JSSegmenter::ResolvedOptions(
|
||||
// [[Locale]] "locale"
|
||||
// [[SegmenterGranularity]] "granularity"
|
||||
|
||||
Handle<String> locale(segmenter_holder->locale(), isolate);
|
||||
Handle<String> locale(segmenter->locale(), isolate);
|
||||
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
|
||||
NONE);
|
||||
JSObject::AddProperty(isolate, result, factory->granularity_string(),
|
||||
segmenter_holder->GranularityAsString(), NONE);
|
||||
segmenter->GranularityAsString(isolate), NONE);
|
||||
// 5. Return options.
|
||||
return result;
|
||||
}
|
||||
|
||||
Handle<String> JSSegmenter::GranularityAsString() const {
|
||||
switch (granularity()) {
|
||||
Handle<String> JSSegmenter::GranularityAsString(Isolate* isolate) const {
|
||||
return GetGranularityString(isolate, granularity());
|
||||
}
|
||||
|
||||
Handle<String> JSSegmenter::GetGranularityString(Isolate* isolate,
|
||||
Granularity granularity) {
|
||||
Factory* factory = isolate->factory();
|
||||
switch (granularity) {
|
||||
case Granularity::GRAPHEME:
|
||||
return GetReadOnlyRoots().grapheme_string_handle();
|
||||
return factory->grapheme_string();
|
||||
case Granularity::WORD:
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
return factory->word_string();
|
||||
case Granularity::SENTENCE:
|
||||
return GetReadOnlyRoots().sentence_string_handle();
|
||||
return factory->sentence_string();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -1,14 +1,13 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTER_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTER_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTER_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTER_H_
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
@ -42,7 +41,7 @@ class JSSegmenter : public TorqueGeneratedJSSegmenter<JSSegmenter, JSObject> {
|
||||
|
||||
V8_EXPORT_PRIVATE static const std::set<std::string>& GetAvailableLocales();
|
||||
|
||||
Handle<String> GranularityAsString() const;
|
||||
Handle<String> GranularityAsString(Isolate* isolate) const;
|
||||
|
||||
// Segmenter accessors.
|
||||
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
|
||||
@ -58,6 +57,9 @@ class JSSegmenter : public TorqueGeneratedJSSegmenter<JSSegmenter, JSObject> {
|
||||
inline void set_granularity(Granularity granularity);
|
||||
inline Granularity granularity() const;
|
||||
|
||||
Handle<String> static GetGranularityString(Isolate* isolate,
|
||||
Granularity granularity);
|
||||
|
||||
// Bit positions in |flags|.
|
||||
DEFINE_TORQUE_GENERATED_JS_SEGMENTER_FLAGS()
|
||||
|
||||
|
44
src/objects/js-segments-inl.h
Normal file
44
src/objects/js-segments-inl.h
Normal file
@ -0,0 +1,44 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTS_INL_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTS_INL_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#include "src/objects/js-segments.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
|
||||
// Has to be the last include (doesn't have include guards):
|
||||
#include "src/objects/object-macros.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
TQ_OBJECT_CONSTRUCTORS_IMPL(JSSegments)
|
||||
|
||||
// Base segments accessors.
|
||||
ACCESSORS(JSSegments, icu_break_iterator, Managed<icu::BreakIterator>,
|
||||
kIcuBreakIteratorOffset)
|
||||
ACCESSORS(JSSegments, unicode_string, Managed<icu::UnicodeString>,
|
||||
kUnicodeStringOffset)
|
||||
|
||||
inline void JSSegments::set_granularity(JSSegmenter::Granularity granularity) {
|
||||
DCHECK_GE(GranularityBits::kMax, granularity);
|
||||
int hints = flags();
|
||||
hints = GranularityBits::update(hints, granularity);
|
||||
set_flags(hints);
|
||||
}
|
||||
|
||||
inline JSSegmenter::Granularity JSSegments::granularity() const {
|
||||
return GranularityBits::decode(flags());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#include "src/objects/object-macros-undef.h"
|
||||
|
||||
#endif // V8_OBJECTS_JS_SEGMENTS_INL_H_
|
183
src/objects/js-segments.cc
Normal file
183
src/objects/js-segments.cc
Normal file
@ -0,0 +1,183 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#include "src/objects/js-segments.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "src/execution/isolate.h"
|
||||
#include "src/heap/factory.h"
|
||||
#include "src/objects/intl-objects.h"
|
||||
#include "src/objects/js-segment-iterator.h"
|
||||
#include "src/objects/js-segmenter-inl.h"
|
||||
#include "src/objects/js-segments-inl.h"
|
||||
#include "src/objects/managed.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "unicode/brkiter.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// ecma402 #sec-createsegmentsobject
|
||||
MaybeHandle<JSSegments> JSSegments::Create(Isolate* isolate,
|
||||
Handle<JSSegmenter> segmenter,
|
||||
Handle<String> string) {
|
||||
icu::BreakIterator* break_iterator =
|
||||
segmenter->icu_break_iterator().raw()->clone();
|
||||
DCHECK_NOT_NULL(break_iterator);
|
||||
|
||||
Handle<Managed<icu::UnicodeString>> unicode_string =
|
||||
Intl::SetTextToBreakIterator(isolate, string, break_iterator);
|
||||
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
|
||||
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
|
||||
|
||||
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
|
||||
// 2. Let segments be ! ObjectCreate(%Segments.prototype%, internalSlotsList).
|
||||
Handle<Map> map(isolate->native_context()->intl_segments_map(), isolate);
|
||||
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
|
||||
|
||||
Handle<JSSegments> segments = Handle<JSSegments>::cast(result);
|
||||
segments->set_flags(0);
|
||||
|
||||
// 3. Set segments.[[SegmentsSegmenter]] to segmenter.
|
||||
segments->set_icu_break_iterator(*managed_break_iterator);
|
||||
segments->set_granularity(segmenter->granularity());
|
||||
|
||||
// 4. Set segments.[[SegmentsString]] to string.
|
||||
segments->set_unicode_string(*unicode_string);
|
||||
|
||||
// 5. Return segments.
|
||||
return segments;
|
||||
}
|
||||
|
||||
// ecma402 #sec-createsegmentiterator
|
||||
MaybeHandle<Object> JSSegments::CreateSegmentIterator(
|
||||
Isolate* isolate, Handle<JSSegments> segments) {
|
||||
return JSSegmentIterator::Create(
|
||||
isolate, segments->icu_break_iterator().raw()->clone(),
|
||||
segments->granularity());
|
||||
}
|
||||
|
||||
// ecma402 #sec-%segmentsprototype%.containing
|
||||
MaybeHandle<Object> JSSegments::Containing(Isolate* isolate,
|
||||
Handle<JSSegments> segments,
|
||||
int32_t n) {
|
||||
// 5. Let len be the length of string.
|
||||
int32_t len = segments->unicode_string().raw()->length();
|
||||
|
||||
// 7. If n < 0 or n ≥ len, return undefined.
|
||||
if (n < 0 || n >= len) {
|
||||
return isolate->factory()->undefined_value();
|
||||
}
|
||||
|
||||
icu::BreakIterator* break_iterator = segments->icu_break_iterator().raw();
|
||||
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
|
||||
int32_t start_index =
|
||||
break_iterator->isBoundary(n) ? n : break_iterator->preceding(n);
|
||||
|
||||
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
|
||||
int32_t end_index = break_iterator->following(n);
|
||||
|
||||
// 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex,
|
||||
// endIndex).
|
||||
return CreateSegmentDataObject(
|
||||
isolate, segments->granularity(), break_iterator,
|
||||
*(segments->unicode_string().raw()), start_index, end_index);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
bool CurrentSegmentIsWordLike(icu::BreakIterator* break_iterator) {
|
||||
int32_t rule_status = break_iterator->getRuleStatus();
|
||||
return (rule_status >= UBRK_WORD_NUMBER &&
|
||||
rule_status < UBRK_WORD_NUMBER_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_LETTER &&
|
||||
rule_status < UBRK_WORD_LETTER_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_KANA &&
|
||||
rule_status < UBRK_WORD_KANA_LIMIT) ||
|
||||
(rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// ecma402 #sec-createsegmentdataobject
|
||||
MaybeHandle<Object> JSSegments::CreateSegmentDataObject(
|
||||
Isolate* isolate, JSSegmenter::Granularity granularity,
|
||||
icu::BreakIterator* break_iterator, const icu::UnicodeString& string,
|
||||
int32_t start_index, int32_t end_index) {
|
||||
Factory* factory = isolate->factory();
|
||||
|
||||
// 1. Let len be the length of string.
|
||||
// 2. Assert: startIndex ≥ 0.
|
||||
DCHECK_GE(start_index, 0);
|
||||
// 3. Assert: endIndex ≤ len.
|
||||
DCHECK_LE(end_index, string.length());
|
||||
// 4. Assert: startIndex < endIndex.
|
||||
DCHECK_LT(start_index, end_index);
|
||||
|
||||
// 5. Let result be ! ObjectCreate(%ObjectPrototype%).
|
||||
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
|
||||
|
||||
// 6. Let segment be the String value equal to the substring of string
|
||||
// consisting of the code units at indices startIndex (inclusive) through
|
||||
// endIndex (exclusive).
|
||||
Handle<String> segment;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(
|
||||
isolate, segment, Intl::ToString(isolate, string, start_index, end_index),
|
||||
JSObject);
|
||||
|
||||
// 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
|
||||
Maybe<bool> maybe_create_segment = JSReceiver::CreateDataProperty(
|
||||
isolate, result, factory->segment_string(), segment, Just(kDontThrow));
|
||||
DCHECK(maybe_create_segment.FromJust());
|
||||
USE(maybe_create_segment);
|
||||
|
||||
// 8. Perform ! CreateDataPropertyOrThrow(result, "index", startIndex).
|
||||
Maybe<bool> maybe_create_index = JSReceiver::CreateDataProperty(
|
||||
isolate, result, factory->index_string(),
|
||||
factory->NewNumberFromInt(start_index), Just(kDontThrow));
|
||||
DCHECK(maybe_create_index.FromJust());
|
||||
USE(maybe_create_index);
|
||||
|
||||
// 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
|
||||
Handle<String> input_string;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(isolate, input_string,
|
||||
Intl::ToString(isolate, string), JSObject);
|
||||
Maybe<bool> maybe_create_input = JSReceiver::CreateDataProperty(
|
||||
isolate, result, factory->input_string(), input_string, Just(kDontThrow));
|
||||
DCHECK(maybe_create_input.FromJust());
|
||||
USE(maybe_create_input);
|
||||
|
||||
Handle<Object> is_word_like;
|
||||
// 10. Let granularity be segmenter.[[SegmenterGranularity]].
|
||||
// 11. If granularity is "word", then
|
||||
if (granularity == JSSegmenter::Granularity::WORD) {
|
||||
// a. Let isWordLike be a Boolean value indicating whether the word segment
|
||||
// segment in string is "word-like" according to locale
|
||||
// segmenter.[[Locale]].
|
||||
is_word_like = CurrentSegmentIsWordLike(break_iterator)
|
||||
? factory->true_value()
|
||||
: factory->false_value();
|
||||
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
|
||||
Maybe<bool> maybe_create_is_word_like = JSReceiver::CreateDataProperty(
|
||||
isolate, result, factory->isWordLike_string(), is_word_like,
|
||||
Just(kDontThrow));
|
||||
DCHECK(maybe_create_is_word_like.FromJust());
|
||||
USE(maybe_create_is_word_like);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Handle<String> JSSegments::GranularityAsString(Isolate* isolate) const {
|
||||
return JSSegmenter::GetGranularityString(isolate, granularity());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
80
src/objects/js-segments.h
Normal file
80
src/objects/js-segments.h
Normal file
@ -0,0 +1,80 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
#ifndef V8_OBJECTS_JS_SEGMENTS_H_
|
||||
#define V8_OBJECTS_JS_SEGMENTS_H_
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
#error Internationalization is expected to be enabled.
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
#include "src/base/bit-field.h"
|
||||
#include "src/execution/isolate.h"
|
||||
#include "src/heap/factory.h"
|
||||
#include "src/objects/js-segmenter.h"
|
||||
#include "src/objects/managed.h"
|
||||
#include "src/objects/objects.h"
|
||||
#include "unicode/uversion.h"
|
||||
|
||||
// Has to be the last include (doesn't have include guards):
|
||||
#include "src/objects/object-macros.h"
|
||||
|
||||
namespace U_ICU_NAMESPACE {
|
||||
class BreakIterator;
|
||||
class UnicodeString;
|
||||
} // namespace U_ICU_NAMESPACE
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class JSSegments : public TorqueGeneratedJSSegments<JSSegments, JSObject> {
|
||||
public:
|
||||
// ecma402 #sec-createsegmentsobject
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<JSSegments> Create(
|
||||
Isolate* isolate, Handle<JSSegmenter> segmenter, Handle<String> string);
|
||||
|
||||
// ecma402 #sec-%segmentsprototype%.containing
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Containing(
|
||||
Isolate* isolate, Handle<JSSegments> segments_holder, int32_t index);
|
||||
|
||||
// ecma402 #sec-%segmentsprototype%-@@iterator
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> CreateSegmentIterator(
|
||||
Isolate* isolate, Handle<JSSegments> segments_holder);
|
||||
|
||||
// ecma402 #sec-get-%segmentsprototype%.string
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetString(
|
||||
Isolate* isolate, Handle<JSSegments> segments_holder);
|
||||
|
||||
// ecma402 #sec-createsegmentdataobject
|
||||
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> CreateSegmentDataObject(
|
||||
Isolate* isolate, JSSegmenter::Granularity granularity,
|
||||
icu::BreakIterator* break_iterator, const icu::UnicodeString& string,
|
||||
int32_t start_index, int32_t end_index);
|
||||
|
||||
Handle<String> GranularityAsString(Isolate* isolate) const;
|
||||
|
||||
// SegmentIterator accessors.
|
||||
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
|
||||
DECL_ACCESSORS(unicode_string, Managed<icu::UnicodeString>)
|
||||
|
||||
DECL_PRINTER(JSSegments)
|
||||
|
||||
inline void set_granularity(JSSegmenter::Granularity granularity);
|
||||
inline JSSegmenter::Granularity granularity() const;
|
||||
|
||||
// Bit positions in |flags|.
|
||||
DEFINE_TORQUE_GENERATED_JS_SEGMENT_ITERATOR_FLAGS()
|
||||
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::GRAPHEME <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::WORD <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::SENTENCE <= GranularityBits::kMax);
|
||||
|
||||
TQ_OBJECT_CONSTRUCTORS(JSSegments)
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#include "src/objects/object-macros-undef.h"
|
||||
|
||||
#endif // V8_OBJECTS_JS_SEGMENTS_H_
|
@ -306,6 +306,7 @@ VisitorId Map::GetVisitorId(Map map) {
|
||||
case JS_RELATIVE_TIME_FORMAT_TYPE:
|
||||
case JS_SEGMENT_ITERATOR_TYPE:
|
||||
case JS_SEGMENTER_TYPE:
|
||||
case JS_SEGMENTS_TYPE:
|
||||
#endif // V8_INTL_SUPPORT
|
||||
case WASM_EXCEPTION_OBJECT_TYPE:
|
||||
case WASM_GLOBAL_OBJECT_TYPE:
|
||||
|
@ -252,7 +252,8 @@ class ZoneForwardList;
|
||||
V(JSPluralRules) \
|
||||
V(JSRelativeTimeFormat) \
|
||||
V(JSSegmentIterator) \
|
||||
V(JSSegmenter)
|
||||
V(JSSegmenter) \
|
||||
V(JSSegments)
|
||||
#else
|
||||
#define HEAP_OBJECT_ORDINARY_TYPE_LIST(V) HEAP_OBJECT_ORDINARY_TYPE_LIST_BASE(V)
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
@ -1033,6 +1033,7 @@ ReturnType BodyDescriptorApply(InstanceType type, T1 p1, T2 p2, T3 p3, T4 p4) {
|
||||
case JS_RELATIVE_TIME_FORMAT_TYPE:
|
||||
case JS_SEGMENT_ITERATOR_TYPE:
|
||||
case JS_SEGMENTER_TYPE:
|
||||
case JS_SEGMENTS_TYPE:
|
||||
#endif // V8_INTL_SUPPORT
|
||||
case WASM_EXCEPTION_OBJECT_TYPE:
|
||||
case WASM_GLOBAL_OBJECT_TYPE:
|
||||
|
@ -94,6 +94,7 @@
|
||||
#include "src/objects/js-relative-time-format.h"
|
||||
#include "src/objects/js-segment-iterator.h"
|
||||
#include "src/objects/js-segmenter.h"
|
||||
#include "src/objects/js-segments.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
#include "src/codegen/source-position-table.h"
|
||||
#include "src/objects/js-weak-refs-inl.h"
|
||||
|
@ -78,6 +78,7 @@
|
||||
// - JSPluralRules // If V8_INTL_SUPPORT enabled.
|
||||
// - JSRelativeTimeFormat // If V8_INTL_SUPPORT enabled.
|
||||
// - JSSegmenter // If V8_INTL_SUPPORT enabled.
|
||||
// - JSSegments // If V8_INTL_SUPPORT enabled.
|
||||
// - JSSegmentIterator // If V8_INTL_SUPPORT enabled.
|
||||
// - JSV8BreakIterator // If V8_INTL_SUPPORT enabled.
|
||||
// - WasmExceptionObject
|
||||
|
@ -4788,6 +4788,7 @@ void ImplementationVisitor::GenerateExportedMacrosAssembler(
|
||||
cc_contents << "#include \"src/objects/js-relative-time-format.h\"\n";
|
||||
cc_contents << "#include \"src/objects/js-segment-iterator.h\"\n";
|
||||
cc_contents << "#include \"src/objects/js-segmenter.h\"\n";
|
||||
cc_contents << "#include \"src/objects/js-segments.h\"\n";
|
||||
}
|
||||
cc_contents << "#include \"torque-generated/" << file_name << ".h\"\n";
|
||||
|
||||
|
@ -1,38 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.join(""));
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assertEquals(undefined, v.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assertFalse(iter.following());
|
||||
assertEquals(iter.breakType, v.breakType);
|
||||
assertEquals(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assertTrue(iter.following());
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assertEquals(undefined, v.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assertEquals("number", typeof v.index);
|
||||
assertTrue(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev)
|
||||
assertEquals(undefined, iter.breakType)
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.reverse().join(""));
|
||||
}
|
@ -23,7 +23,21 @@ for (const text of [
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.index);
|
||||
const segments = seg.segment(text);
|
||||
let results = [];
|
||||
var pos = -1;
|
||||
for (let s of segments) {
|
||||
assertEquals(["segment", "index", "input"], Object.keys(s));
|
||||
assertEquals(typeof s.index, "number");
|
||||
assertEquals(typeof s.segment, "string");
|
||||
assertEquals(typeof s.input, "string");
|
||||
assertEquals(text, s.input);
|
||||
assertEquals(text.substring(s.index, s.index + s.segment.length),
|
||||
s.segment);
|
||||
assertTrue(pos < s.index);
|
||||
pos = s.index;
|
||||
results.push(s.segment);
|
||||
}
|
||||
assertTrue(pos < text.length);
|
||||
assertEquals(text, results.join(""));
|
||||
}
|
||||
|
@ -1,11 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals(undefined, iter.breakType);
|
@ -1,26 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("function", typeof iter.following);
|
||||
|
||||
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
|
||||
assertDoesNotThrow(() => iter.following("ABC"));
|
||||
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
|
||||
assertDoesNotThrow(() => iter.following(null));
|
||||
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
|
||||
assertDoesNotThrow(() => iter.following(1.4));
|
||||
|
||||
assertThrows(() => iter.following(-3), RangeError);
|
||||
|
||||
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )
|
||||
// 3.b If from >= iterator.[[SegmentIteratorString]], throw a RangeError exception.
|
||||
assertDoesNotThrow(() => iter.following(text.length - 1));
|
||||
assertThrows(() => iter.following(text.length), RangeError);
|
||||
assertThrows(() => iter.following(text.length + 1), RangeError);
|
@ -1,11 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("function", typeof iter.next);
|
@ -11,6 +11,9 @@ assertTrue(descriptor.writable);
|
||||
assertFalse(descriptor.enumerable);
|
||||
assertTrue(descriptor.configurable);
|
||||
|
||||
let segmenterPrototype = Object.getPrototypeOf(seg);
|
||||
assertEquals("Intl.Segmenter", segmenterPrototype[Symbol.toStringTag]);
|
||||
|
||||
// ecma402 #sec-Intl.Segmenter.prototype
|
||||
// Intl.Segmenter.prototype
|
||||
// The value of Intl.Segmenter.prototype is %SegmenterPrototype%.
|
||||
@ -29,10 +32,11 @@ for (let func of ["segment", "resolvedOptions"]) {
|
||||
assertTrue(descriptor.configurable);
|
||||
}
|
||||
|
||||
let segmentIterator = seg.segment('text');
|
||||
let prototype = Object.getPrototypeOf(segmentIterator);
|
||||
for (let func of ["next", "following", "preceding"]) {
|
||||
let descriptor = Object.getOwnPropertyDescriptor(prototype, func);
|
||||
|
||||
let segments = seg.segment('text');
|
||||
let segmentsPrototype = Object.getPrototypeOf(segments);
|
||||
for (let func of ["containing", Symbol.iterator]) {
|
||||
let descriptor = Object.getOwnPropertyDescriptor(segmentsPrototype, func);
|
||||
assertTrue(descriptor.writable);
|
||||
assertFalse(descriptor.enumerable);
|
||||
assertTrue(descriptor.configurable);
|
||||
@ -47,34 +51,44 @@ function checkGetterProperty(prototype, property) {
|
||||
assertTrue(desc.configurable);
|
||||
}
|
||||
|
||||
// Test the descriptor is correct for properties.
|
||||
checkGetterProperty(prototype, 'index');
|
||||
checkGetterProperty(prototype, 'breakType');
|
||||
|
||||
// Test the SegmentIteratorPrototype methods are called with same
|
||||
// Test the SegmentsPrototype methods are called with same
|
||||
// receiver and won't throw.
|
||||
assertDoesNotThrow(() => prototype.next.call(segmentIterator));
|
||||
assertDoesNotThrow(() => prototype.following.call(segmentIterator));
|
||||
assertDoesNotThrow(() => prototype.preceding.call(segmentIterator));
|
||||
assertDoesNotThrow(() => segmentsPrototype.containing.call(segments));
|
||||
assertDoesNotThrow(() => segmentsPrototype[Symbol.iterator].call(segments));
|
||||
|
||||
// Test the SegmentIteratorPrototype methods are called with a different
|
||||
// receiver and correctly throw.
|
||||
var otherReceivers = [
|
||||
1, 123.45, undefined, null, "string", true, false,
|
||||
Intl, Intl.Segmenter, Intl.Segmenter.prototype,
|
||||
prototype,
|
||||
segmentsPrototype,
|
||||
new Intl.Segmenter(),
|
||||
new Intl.Collator(),
|
||||
new Intl.DateTimeFormat(),
|
||||
new Intl.NumberFormat(),
|
||||
];
|
||||
for (let rec of otherReceivers) {
|
||||
assertThrows(() => prototype.next.call(rec), TypeError);
|
||||
assertThrows(() => prototype.following.call(rec), TypeError);
|
||||
assertThrows(() => prototype.preceding.call(rec), TypeError);
|
||||
assertThrows(() => segmentsPrototype.containing.call(rec), TypeError);
|
||||
}
|
||||
|
||||
// Check the property of the return object of next()
|
||||
let segmentIterator = segments[Symbol.iterator]();
|
||||
let segmentIteratorPrototype = Object.getPrototypeOf(segmentIterator);
|
||||
for (let func of ["next"]) {
|
||||
let descriptor = Object.getOwnPropertyDescriptor(segmentIteratorPrototype,
|
||||
func);
|
||||
assertTrue(descriptor.writable);
|
||||
assertFalse(descriptor.enumerable);
|
||||
assertTrue(descriptor.configurable);
|
||||
}
|
||||
|
||||
assertEquals("Segmenter String Iterator",
|
||||
segmentIteratorPrototype[Symbol.toStringTag]);
|
||||
let desc = Object.getOwnPropertyDescriptor(
|
||||
segmentIteratorPrototype, Symbol.toStringTag);
|
||||
assertFalse(desc.writable);
|
||||
assertFalse(desc.enumerable);
|
||||
assertTrue(desc.configurable);
|
||||
|
||||
let nextReturn = segmentIterator.next();
|
||||
|
||||
function checkProperty(obj, property) {
|
||||
@ -83,9 +97,3 @@ function checkProperty(obj, property) {
|
||||
assertTrue(desc.enumerable);
|
||||
assertTrue(desc.configurable);
|
||||
}
|
||||
|
||||
checkProperty(nextReturn, 'done');
|
||||
checkProperty(nextReturn, 'value');
|
||||
checkProperty(nextReturn.value, 'segment');
|
||||
checkProperty(nextReturn.value, 'breakType');
|
||||
checkProperty(nextReturn.value, 'index');
|
||||
|
@ -1,11 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals(0, iter.index);
|
@ -1,30 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("function", typeof iter.preceding);
|
||||
|
||||
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
|
||||
assertThrows(() => iter.preceding("ABC"), RangeError);
|
||||
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
|
||||
assertThrows(() => iter.preceding(null), RangeError);
|
||||
assertThrows(() => iter.preceding(-3), RangeError);
|
||||
|
||||
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
|
||||
assertDoesNotThrow(() => iter.preceding(1.4));
|
||||
|
||||
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
|
||||
// 3.b If ... from = 0, throw a RangeError exception.
|
||||
assertThrows(() => iter.preceding(0), RangeError);
|
||||
|
||||
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
|
||||
// 3.b If from > iterator.[[SegmentIteratorString]] ... , throw a RangeError exception.
|
||||
assertDoesNotThrow(() => iter.preceding(text.length - 1));
|
||||
assertDoesNotThrow(() => iter.preceding(text.length));
|
||||
assertThrows(() => iter.preceding(text.length + 1), RangeError);
|
@ -1,15 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
for (const granularity of ["grapheme", "word", "sentence"]) {
|
||||
const segmenter = new Intl.Segmenter("en", { granularity });
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("number", typeof iter.index);
|
||||
assertEquals(0, iter.index);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.join(""));
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assertTrue(["sep", "term"].includes(v.breakType), v.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assertFalse(iter.following());
|
||||
assertEquals(iter.breakType, v.breakType);
|
||||
assertEquals(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assertTrue(iter.following());
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assertEquals("number", typeof v.index);
|
||||
assertTrue(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev);
|
||||
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.reverse().join(""));
|
||||
}
|
@ -23,7 +23,21 @@ for (const text of [
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.index);
|
||||
const segments = seg.segment(text);
|
||||
let results = [];
|
||||
var pos = -1;
|
||||
for (let s of segments) {
|
||||
assertEquals(["segment", "index", "input"], Object.keys(s));
|
||||
assertEquals(typeof s.index, "number");
|
||||
assertEquals(typeof s.segment, "string");
|
||||
assertEquals(typeof s.input, "string");
|
||||
assertEquals(text, s.input);
|
||||
assertEquals(text.substring(s.index, s.index + s.segment.length),
|
||||
s.segment);
|
||||
assertTrue(pos < s.index);
|
||||
pos = s.index;
|
||||
results.push(s.segment);
|
||||
}
|
||||
assertTrue(pos < text.length);
|
||||
assertEquals(text, results.join(""));
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.join(""));
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assertTrue(["word", "none"].includes(v.breakType), v.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assertFalse(iter.following());
|
||||
assertEquals(iter.breakType, v.breakType);
|
||||
assertEquals(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assertTrue(iter.following());
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assertEquals("number", typeof v.index);
|
||||
assertTrue(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev);
|
||||
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.reverse().join(""));
|
||||
}
|
@ -23,7 +23,22 @@ for (const text of [
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.index);
|
||||
const segments = seg.segment(text);
|
||||
let results = [];
|
||||
var pos = -1;
|
||||
for (let s of segments) {
|
||||
assertEquals(["segment", "index", "input", "isWordLike"], Object.keys(s));
|
||||
assertEquals(typeof s.isWordLike, "boolean");
|
||||
assertEquals(typeof s.index, "number");
|
||||
assertEquals(typeof s.segment, "string");
|
||||
assertEquals(typeof s.input, "string");
|
||||
assertEquals(text, s.input);
|
||||
assertEquals(text.substring(s.index, s.index + s.segment.length),
|
||||
s.segment);
|
||||
assertTrue(pos < s.index);
|
||||
pos = s.index;
|
||||
results.push(s.segment);
|
||||
}
|
||||
assertTrue(pos < text.length);
|
||||
assertEquals(text, results.join(""));
|
||||
}
|
||||
|
@ -8,32 +8,34 @@ assertEquals("function", typeof Intl.Segmenter.prototype.segment);
|
||||
assertEquals(1, Intl.Segmenter.prototype.segment.length);
|
||||
|
||||
let seg = new Intl.Segmenter("en", {granularity: "word"})
|
||||
let res;
|
||||
let segments;
|
||||
|
||||
// test with 0 args
|
||||
assertDoesNotThrow(() => res = seg.segment())
|
||||
assertDoesNotThrow(() => segments = seg.segment())
|
||||
// test with 1 arg
|
||||
assertDoesNotThrow(() => res = seg.segment("hello"))
|
||||
assertEquals("hello", res.next().value.segment);
|
||||
assertDoesNotThrow(() => segments = seg.segment("hello"))
|
||||
assertEquals("hello", segments.containing(0).input);
|
||||
// test with 2 args
|
||||
assertDoesNotThrow(() => res = seg.segment("hello world"))
|
||||
assertEquals("hello", res.next().value.segment);
|
||||
assertDoesNotThrow(() => segments = seg.segment("hello world"))
|
||||
assertEquals("hello world", segments.containing(0).input);
|
||||
|
||||
// test with other types
|
||||
assertDoesNotThrow(() => res = seg.segment(undefined))
|
||||
assertEquals("undefined", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(null))
|
||||
assertEquals("null", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(true))
|
||||
assertEquals("true", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(false))
|
||||
assertEquals("false", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(1234))
|
||||
assertEquals("1234", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(3.1415926))
|
||||
assertEquals("3.1415926", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment(["hello","world"]))
|
||||
assertEquals("hello", res.next().value.segment);
|
||||
assertDoesNotThrow(() => res = seg.segment({k: 'v'}))
|
||||
assertEquals("[", res.next().value.segment);
|
||||
assertThrows(() => res = seg.segment(Symbol()), TypeError)
|
||||
assertDoesNotThrow(() => segments = seg.segment(undefined))
|
||||
assertEquals("undefined", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(null))
|
||||
assertEquals("null", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(true))
|
||||
assertEquals("true", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(false))
|
||||
assertEquals("false", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(1234))
|
||||
assertEquals("1234", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(3.1415926))
|
||||
assertEquals("3.1415926", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(98765432109876543210987654321n))
|
||||
assertEquals("98765432109876543210987654321", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment(["hello","world"]))
|
||||
assertEquals("hello,world", segments.containing(0).input);
|
||||
assertDoesNotThrow(() => segments = seg.segment({k: 'v'}))
|
||||
assertEquals("[object Object]", segments.containing(0).input);
|
||||
assertThrows(() => segments = seg.segment(Symbol()), TypeError)
|
||||
|
@ -516,8 +516,10 @@
|
||||
'intl402/Locale/constructor-options-region-valid': [FAIL],
|
||||
|
||||
# http://crbug/v8/6891
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-tostring': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=9818
|
||||
'built-ins/AsyncFunction/proto-from-ctor-realm': [FAIL],
|
||||
|
@ -60,6 +60,7 @@ export const CATEGORIES = new Map([
|
||||
'JS_RELATIVE_TIME_FORMAT_TYPE',
|
||||
'JS_SEGMENT_ITERATOR_TYPE',
|
||||
'JS_SEGMENTER_TYPE',
|
||||
'JS_SEGMENTS_TYPE',
|
||||
'JS_V8_BREAK_ITERATOR_TYPE',
|
||||
'JS_MAP_KEY_ITERATOR_TYPE',
|
||||
'JS_MAP_KEY_VALUE_ITERATOR_TYPE',
|
||||
|
@ -190,17 +190,18 @@ INSTANCE_TYPES = {
|
||||
1078: "JS_RELATIVE_TIME_FORMAT_TYPE",
|
||||
1079: "JS_SEGMENT_ITERATOR_TYPE",
|
||||
1080: "JS_SEGMENTER_TYPE",
|
||||
1081: "JS_STRING_ITERATOR_TYPE",
|
||||
1082: "JS_V8_BREAK_ITERATOR_TYPE",
|
||||
1083: "JS_WEAK_REF_TYPE",
|
||||
1084: "WASM_EXCEPTION_OBJECT_TYPE",
|
||||
1085: "WASM_GLOBAL_OBJECT_TYPE",
|
||||
1086: "WASM_INSTANCE_OBJECT_TYPE",
|
||||
1087: "WASM_MEMORY_OBJECT_TYPE",
|
||||
1088: "WASM_MODULE_OBJECT_TYPE",
|
||||
1089: "WASM_TABLE_OBJECT_TYPE",
|
||||
1090: "JS_BOUND_FUNCTION_TYPE",
|
||||
1091: "JS_FUNCTION_TYPE",
|
||||
1081: "JS_SEGMENTS_TYPE",
|
||||
1082: "JS_STRING_ITERATOR_TYPE",
|
||||
1083: "JS_V8_BREAK_ITERATOR_TYPE",
|
||||
1084: "JS_WEAK_REF_TYPE",
|
||||
1085: "WASM_EXCEPTION_OBJECT_TYPE",
|
||||
1086: "WASM_GLOBAL_OBJECT_TYPE",
|
||||
1087: "WASM_INSTANCE_OBJECT_TYPE",
|
||||
1088: "WASM_MEMORY_OBJECT_TYPE",
|
||||
1089: "WASM_MODULE_OBJECT_TYPE",
|
||||
1090: "WASM_TABLE_OBJECT_TYPE",
|
||||
1091: "JS_BOUND_FUNCTION_TYPE",
|
||||
1092: "JS_FUNCTION_TYPE",
|
||||
}
|
||||
|
||||
# List of known V8 maps.
|
||||
@ -297,65 +298,65 @@ KNOWN_MAPS = {
|
||||
("read_only_space", 0x0319d): (96, "EnumCacheMap"),
|
||||
("read_only_space", 0x031ed): (87, "ArrayBoilerplateDescriptionMap"),
|
||||
("read_only_space", 0x032d9): (99, "InterceptorInfoMap"),
|
||||
("read_only_space", 0x0538d): (72, "PromiseFulfillReactionJobTaskMap"),
|
||||
("read_only_space", 0x053b5): (73, "PromiseRejectReactionJobTaskMap"),
|
||||
("read_only_space", 0x053dd): (74, "CallableTaskMap"),
|
||||
("read_only_space", 0x05405): (75, "CallbackTaskMap"),
|
||||
("read_only_space", 0x0542d): (76, "PromiseResolveThenableJobTaskMap"),
|
||||
("read_only_space", 0x05455): (79, "FunctionTemplateInfoMap"),
|
||||
("read_only_space", 0x0547d): (80, "ObjectTemplateInfoMap"),
|
||||
("read_only_space", 0x054a5): (81, "AccessCheckInfoMap"),
|
||||
("read_only_space", 0x054cd): (82, "AccessorInfoMap"),
|
||||
("read_only_space", 0x054f5): (83, "AccessorPairMap"),
|
||||
("read_only_space", 0x0551d): (84, "AliasedArgumentsEntryMap"),
|
||||
("read_only_space", 0x05545): (85, "AllocationMementoMap"),
|
||||
("read_only_space", 0x0556d): (88, "AsmWasmDataMap"),
|
||||
("read_only_space", 0x05595): (89, "AsyncGeneratorRequestMap"),
|
||||
("read_only_space", 0x055bd): (90, "BreakPointMap"),
|
||||
("read_only_space", 0x055e5): (91, "BreakPointInfoMap"),
|
||||
("read_only_space", 0x0560d): (92, "CachedTemplateObjectMap"),
|
||||
("read_only_space", 0x05635): (94, "ClassPositionsMap"),
|
||||
("read_only_space", 0x0565d): (95, "DebugInfoMap"),
|
||||
("read_only_space", 0x05685): (98, "FunctionTemplateRareDataMap"),
|
||||
("read_only_space", 0x056ad): (100, "InterpreterDataMap"),
|
||||
("read_only_space", 0x056d5): (101, "PromiseCapabilityMap"),
|
||||
("read_only_space", 0x056fd): (102, "PromiseReactionMap"),
|
||||
("read_only_space", 0x05725): (103, "PropertyDescriptorObjectMap"),
|
||||
("read_only_space", 0x0574d): (104, "PrototypeInfoMap"),
|
||||
("read_only_space", 0x05775): (105, "ScriptMap"),
|
||||
("read_only_space", 0x0579d): (106, "SourceTextModuleInfoEntryMap"),
|
||||
("read_only_space", 0x057c5): (107, "StackFrameInfoMap"),
|
||||
("read_only_space", 0x057ed): (108, "StackTraceFrameMap"),
|
||||
("read_only_space", 0x05815): (109, "TemplateObjectDescriptionMap"),
|
||||
("read_only_space", 0x0583d): (110, "Tuple2Map"),
|
||||
("read_only_space", 0x05865): (111, "WasmCapiFunctionDataMap"),
|
||||
("read_only_space", 0x0588d): (112, "WasmExceptionTagMap"),
|
||||
("read_only_space", 0x058b5): (113, "WasmExportedFunctionDataMap"),
|
||||
("read_only_space", 0x058dd): (114, "WasmIndirectFunctionTableMap"),
|
||||
("read_only_space", 0x05905): (115, "WasmJSFunctionDataMap"),
|
||||
("read_only_space", 0x0592d): (116, "WasmValueMap"),
|
||||
("read_only_space", 0x05955): (136, "SloppyArgumentsElementsMap"),
|
||||
("read_only_space", 0x0597d): (172, "OnHeapBasicBlockProfilerDataMap"),
|
||||
("read_only_space", 0x059a5): (169, "InternalClassMap"),
|
||||
("read_only_space", 0x059cd): (178, "SmiPairMap"),
|
||||
("read_only_space", 0x059f5): (177, "SmiBoxMap"),
|
||||
("read_only_space", 0x05a1d): (147, "ExportedSubClassBaseMap"),
|
||||
("read_only_space", 0x05a45): (148, "ExportedSubClassMap"),
|
||||
("read_only_space", 0x05a6d): (68, "AbstractInternalClassSubclass1Map"),
|
||||
("read_only_space", 0x05a95): (69, "AbstractInternalClassSubclass2Map"),
|
||||
("read_only_space", 0x05abd): (135, "InternalClassWithSmiElementsMap"),
|
||||
("read_only_space", 0x05ae5): (170, "InternalClassWithStructElementsMap"),
|
||||
("read_only_space", 0x05b0d): (149, "ExportedSubClass2Map"),
|
||||
("read_only_space", 0x05b35): (179, "SortStateMap"),
|
||||
("read_only_space", 0x05b5d): (86, "AllocationSiteWithWeakNextMap"),
|
||||
("read_only_space", 0x05b85): (86, "AllocationSiteWithoutWeakNextMap"),
|
||||
("read_only_space", 0x05bad): (77, "LoadHandler1Map"),
|
||||
("read_only_space", 0x05bd5): (77, "LoadHandler2Map"),
|
||||
("read_only_space", 0x05bfd): (77, "LoadHandler3Map"),
|
||||
("read_only_space", 0x05c25): (78, "StoreHandler0Map"),
|
||||
("read_only_space", 0x05c4d): (78, "StoreHandler1Map"),
|
||||
("read_only_space", 0x05c75): (78, "StoreHandler2Map"),
|
||||
("read_only_space", 0x05c9d): (78, "StoreHandler3Map"),
|
||||
("read_only_space", 0x053b9): (72, "PromiseFulfillReactionJobTaskMap"),
|
||||
("read_only_space", 0x053e1): (73, "PromiseRejectReactionJobTaskMap"),
|
||||
("read_only_space", 0x05409): (74, "CallableTaskMap"),
|
||||
("read_only_space", 0x05431): (75, "CallbackTaskMap"),
|
||||
("read_only_space", 0x05459): (76, "PromiseResolveThenableJobTaskMap"),
|
||||
("read_only_space", 0x05481): (79, "FunctionTemplateInfoMap"),
|
||||
("read_only_space", 0x054a9): (80, "ObjectTemplateInfoMap"),
|
||||
("read_only_space", 0x054d1): (81, "AccessCheckInfoMap"),
|
||||
("read_only_space", 0x054f9): (82, "AccessorInfoMap"),
|
||||
("read_only_space", 0x05521): (83, "AccessorPairMap"),
|
||||
("read_only_space", 0x05549): (84, "AliasedArgumentsEntryMap"),
|
||||
("read_only_space", 0x05571): (85, "AllocationMementoMap"),
|
||||
("read_only_space", 0x05599): (88, "AsmWasmDataMap"),
|
||||
("read_only_space", 0x055c1): (89, "AsyncGeneratorRequestMap"),
|
||||
("read_only_space", 0x055e9): (90, "BreakPointMap"),
|
||||
("read_only_space", 0x05611): (91, "BreakPointInfoMap"),
|
||||
("read_only_space", 0x05639): (92, "CachedTemplateObjectMap"),
|
||||
("read_only_space", 0x05661): (94, "ClassPositionsMap"),
|
||||
("read_only_space", 0x05689): (95, "DebugInfoMap"),
|
||||
("read_only_space", 0x056b1): (98, "FunctionTemplateRareDataMap"),
|
||||
("read_only_space", 0x056d9): (100, "InterpreterDataMap"),
|
||||
("read_only_space", 0x05701): (101, "PromiseCapabilityMap"),
|
||||
("read_only_space", 0x05729): (102, "PromiseReactionMap"),
|
||||
("read_only_space", 0x05751): (103, "PropertyDescriptorObjectMap"),
|
||||
("read_only_space", 0x05779): (104, "PrototypeInfoMap"),
|
||||
("read_only_space", 0x057a1): (105, "ScriptMap"),
|
||||
("read_only_space", 0x057c9): (106, "SourceTextModuleInfoEntryMap"),
|
||||
("read_only_space", 0x057f1): (107, "StackFrameInfoMap"),
|
||||
("read_only_space", 0x05819): (108, "StackTraceFrameMap"),
|
||||
("read_only_space", 0x05841): (109, "TemplateObjectDescriptionMap"),
|
||||
("read_only_space", 0x05869): (110, "Tuple2Map"),
|
||||
("read_only_space", 0x05891): (111, "WasmCapiFunctionDataMap"),
|
||||
("read_only_space", 0x058b9): (112, "WasmExceptionTagMap"),
|
||||
("read_only_space", 0x058e1): (113, "WasmExportedFunctionDataMap"),
|
||||
("read_only_space", 0x05909): (114, "WasmIndirectFunctionTableMap"),
|
||||
("read_only_space", 0x05931): (115, "WasmJSFunctionDataMap"),
|
||||
("read_only_space", 0x05959): (116, "WasmValueMap"),
|
||||
("read_only_space", 0x05981): (136, "SloppyArgumentsElementsMap"),
|
||||
("read_only_space", 0x059a9): (172, "OnHeapBasicBlockProfilerDataMap"),
|
||||
("read_only_space", 0x059d1): (169, "InternalClassMap"),
|
||||
("read_only_space", 0x059f9): (178, "SmiPairMap"),
|
||||
("read_only_space", 0x05a21): (177, "SmiBoxMap"),
|
||||
("read_only_space", 0x05a49): (147, "ExportedSubClassBaseMap"),
|
||||
("read_only_space", 0x05a71): (148, "ExportedSubClassMap"),
|
||||
("read_only_space", 0x05a99): (68, "AbstractInternalClassSubclass1Map"),
|
||||
("read_only_space", 0x05ac1): (69, "AbstractInternalClassSubclass2Map"),
|
||||
("read_only_space", 0x05ae9): (135, "InternalClassWithSmiElementsMap"),
|
||||
("read_only_space", 0x05b11): (170, "InternalClassWithStructElementsMap"),
|
||||
("read_only_space", 0x05b39): (149, "ExportedSubClass2Map"),
|
||||
("read_only_space", 0x05b61): (179, "SortStateMap"),
|
||||
("read_only_space", 0x05b89): (86, "AllocationSiteWithWeakNextMap"),
|
||||
("read_only_space", 0x05bb1): (86, "AllocationSiteWithoutWeakNextMap"),
|
||||
("read_only_space", 0x05bd9): (77, "LoadHandler1Map"),
|
||||
("read_only_space", 0x05c01): (77, "LoadHandler2Map"),
|
||||
("read_only_space", 0x05c29): (77, "LoadHandler3Map"),
|
||||
("read_only_space", 0x05c51): (78, "StoreHandler0Map"),
|
||||
("read_only_space", 0x05c79): (78, "StoreHandler1Map"),
|
||||
("read_only_space", 0x05ca1): (78, "StoreHandler2Map"),
|
||||
("read_only_space", 0x05cc9): (78, "StoreHandler3Map"),
|
||||
("map_space", 0x0211d): (1057, "ExternalMap"),
|
||||
("map_space", 0x02145): (1072, "JSMessageObjectMap"),
|
||||
("map_space", 0x0216d): (181, "WasmRttEqrefMap"),
|
||||
|
Loading…
Reference in New Issue
Block a user