Reland "[Intl] Sync Intl.Segmenter to latest version"

This is a reland of 482c3bbf1e

Original change's description:
> [Intl] Sync Intl.Segmenter to latest version
> 
> https://tc39.es/proposal-intl-segmenter/
> 
> TC39 passed Intl.Segmenter to stage 3 in Jul 21.
> This CL move our earlier prototype to the current spec.
> 
> Bug: v8:6891
> Change-Id: I07234beed54f671c26bdbfb3983c5bc2fa5a29b0
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2219413
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Frank Tang <ftang@chromium.org>
> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
> Commit-Queue: Frank Tang <ftang@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#69080}

Bug: v8:6891
Change-Id: Ie3a02d8ddf6f95f0632f97b38b613b185abeb592
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2321118
Reviewed-by: Frank Tang <ftang@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Frank Tang <ftang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69153}
This commit is contained in:
Frank Tang 2020-07-29 08:06:31 -07:00 committed by Commit Bot
parent 87563c3865
commit 4f87e1a045
55 changed files with 707 additions and 1125 deletions

View File

@ -2874,6 +2874,9 @@ v8_source_set("v8_base_without_compiler") {
"src/objects/js-segmenter-inl.h",
"src/objects/js-segmenter.cc",
"src/objects/js-segmenter.h",
"src/objects/js-segments-inl.h",
"src/objects/js-segments.cc",
"src/objects/js-segments.h",
"src/objects/js-weak-refs-inl.h",
"src/objects/js-weak-refs.h",
"src/objects/keys.cc",
@ -3724,6 +3727,9 @@ v8_source_set("v8_base_without_compiler") {
"src/objects/js-segmenter-inl.h",
"src/objects/js-segmenter.cc",
"src/objects/js-segmenter.h",
"src/objects/js-segments-inl.h",
"src/objects/js-segments.cc",
"src/objects/js-segments.h",
"src/runtime/runtime-intl.cc",
"src/strings/char-predicates.cc",
]

View File

@ -1035,16 +1035,12 @@ namespace internal {
CPP(SegmenterPrototypeSegment) \
/* ecma402 #sec-Intl.Segmenter.supportedLocalesOf */ \
CPP(SegmenterSupportedLocalesOf) \
/* ecma402 #sec-segment-iterator-prototype-breakType */ \
CPP(SegmentIteratorPrototypeBreakType) \
/* ecma402 #sec-segment-iterator-prototype-following */ \
CPP(SegmentIteratorPrototypeFollowing) \
/* ecma402 #sec-segment-iterator-prototype-preceding */ \
CPP(SegmentIteratorPrototypePreceding) \
/* ecma402 #sec-segment-iterator-prototype-index */ \
CPP(SegmentIteratorPrototypeIndex) \
/* ecma402 #sec-segment-iterator-prototype-next */ \
CPP(SegmentIteratorPrototypeNext) \
/* ecma402 #sec-%segmentsprototype%.containing */ \
CPP(SegmentsPrototypeContaining) \
/* ecma402 #sec-%segmentsprototype%-@@iterator */ \
CPP(SegmentsPrototypeIterator) \
/* ES #sec-string.prototype.normalize */ \
CPP(StringPrototypeNormalizeIntl) \
/* ecma402 #sup-string.prototype.tolocalelowercase */ \

View File

@ -28,10 +28,10 @@
#include "src/objects/js-relative-time-format-inl.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#include "src/objects/objects-inl.h"
#include "src/objects/property-descriptor.h"
#include "src/objects/smi.h"
#include "unicode/brkiter.h"
namespace v8 {
@ -968,32 +968,9 @@ BUILTIN(CollatorInternalCompare) {
return *Intl::CompareStrings(isolate, *icu_collator, string_x, string_y);
}
// ecma402 #sec-segment-iterator-prototype-breakType
BUILTIN(SegmentIteratorPrototypeBreakType) {
const char* const method = "get %SegmentIteratorPrototype%.breakType";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
return *segment_iterator->BreakType();
}
// ecma402 #sec-segment-iterator-prototype-following
BUILTIN(SegmentIteratorPrototypeFollowing) {
const char* const method = "%SegmentIteratorPrototype%.following";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
Handle<Object> from = args.atOrUndefined(isolate, 1);
Maybe<bool> success =
JSSegmentIterator::Following(isolate, segment_iterator, from);
MAYBE_RETURN(success, ReadOnlyRoots(isolate).exception());
return *isolate->factory()->ToBoolean(success.FromJust());
}
// ecma402 #sec-segment-iterator-prototype-next
// ecma402 #sec-%segmentiteratorprototype%.next
BUILTIN(SegmentIteratorPrototypeNext) {
const char* const method = "%SegmentIteratorPrototype%.next";
const char* const method = "%SegmentIterator.prototype%.next";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
@ -1001,29 +978,7 @@ BUILTIN(SegmentIteratorPrototypeNext) {
JSSegmentIterator::Next(isolate, segment_iterator));
}
// ecma402 #sec-segment-iterator-prototype-preceding
BUILTIN(SegmentIteratorPrototypePreceding) {
const char* const method = "%SegmentIteratorPrototype%.preceding";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
Handle<Object> from = args.atOrUndefined(isolate, 1);
Maybe<bool> success =
JSSegmentIterator::Preceding(isolate, segment_iterator, from);
MAYBE_RETURN(success, ReadOnlyRoots(isolate).exception());
return *isolate->factory()->ToBoolean(success.FromJust());
}
// ecma402 #sec-segment-iterator-prototype-index
BUILTIN(SegmentIteratorPrototypeIndex) {
const char* const method = "get %SegmentIteratorPrototype%.index";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmentIterator, segment_iterator, method);
return *JSSegmentIterator::Index(isolate, segment_iterator);
}
// ecma402 #sec-intl.segmenter
BUILTIN(SegmenterConstructor) {
HandleScope scope(isolate);
@ -1032,6 +987,7 @@ BUILTIN(SegmenterConstructor) {
"Intl.Segmenter");
}
// ecma402 #sec-intl.segmenter.supportedlocalesof
BUILTIN(SegmenterSupportedLocalesOf) {
HandleScope scope(isolate);
Handle<Object> locales = args.atOrUndefined(isolate, 1);
@ -1043,30 +999,52 @@ BUILTIN(SegmenterSupportedLocalesOf) {
JSSegmenter::GetAvailableLocales(), locales, options));
}
// ecma402 #sec-intl.segmenter.prototype.resolvedoptions
BUILTIN(SegmenterPrototypeResolvedOptions) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmenter, segmenter_holder,
CHECK_RECEIVER(JSSegmenter, segmenter,
"Intl.Segmenter.prototype.resolvedOptions");
return *JSSegmenter::ResolvedOptions(isolate, segmenter_holder);
return *JSSegmenter::ResolvedOptions(isolate, segmenter);
}
// ecma402 #sec-Intl.Segmenter.prototype.segment
// ecma402 #sec-intl.segmenter.prototype.segment
BUILTIN(SegmenterPrototypeSegment) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegmenter, segmenter_holder,
"Intl.Segmenter.prototype.segment");
CHECK_RECEIVER(JSSegmenter, segmenter, "Intl.Segmenter.prototype.segment");
Handle<Object> input_text = args.atOrUndefined(isolate, 1);
// 3. Let string be ? ToString(string).
Handle<String> text;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, text,
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, input_text));
// 4. Return ? CreateSegmentIterator(segment, string).
// 4. Return ? CreateSegmentsObject(segmenter, string).
RETURN_RESULT_OR_FAILURE(isolate,
JSSegments::Create(isolate, segmenter, string));
}
// ecma402 #sec-%segmentsprototype%.containing
BUILTIN(SegmentsPrototypeContaining) {
const char* const method = "%Segments.prototype%.containing";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegments, segments, method);
Handle<Object> index = args.atOrUndefined(isolate, 1);
// 6. Let n be ? ToInteger(index).
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, index,
Object::ToInteger(isolate, index));
double const n = index->Number();
RETURN_RESULT_OR_FAILURE(
isolate,
JSSegmentIterator::Create(
isolate, segmenter_holder->icu_break_iterator().raw()->clone(),
segmenter_holder->granularity(), text));
isolate, JSSegments::Containing(isolate, segments, static_cast<int>(n)));
}
// ecma402 #sec-%segmentsprototype%-@@iterator
BUILTIN(SegmentsPrototypeIterator) {
const char* const method = "%SegmentIsPrototype%[@@iterator]";
HandleScope scope(isolate);
CHECK_RECEIVER(JSSegments, segments, method);
RETURN_RESULT_OR_FAILURE(
isolate, JSSegments::CreateSegmentIterator(isolate, segments));
}
BUILTIN(V8BreakIteratorConstructor) {

View File

@ -62,6 +62,7 @@ class JSRegExpStringIterator;
class JSRelativeTimeFormat;
class JSSegmentIterator;
class JSSegmenter;
class JSSegments;
class JSV8BreakIterator;
class JSWeakCollection;
class JSFinalizationRegistry;

View File

@ -218,6 +218,7 @@ Type::bitset BitsetType::Lub(const MapRefLike& map) {
case JS_RELATIVE_TIME_FORMAT_TYPE:
case JS_SEGMENT_ITERATOR_TYPE:
case JS_SEGMENTER_TYPE:
case JS_SEGMENTS_TYPE:
#endif // V8_INTL_SUPPORT
case JS_CONTEXT_EXTENSION_OBJECT_TYPE:
case JS_GENERATOR_OBJECT_TYPE:

View File

@ -54,6 +54,7 @@
#include "src/objects/js-relative-time-format-inl.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/js-weak-refs-inl.h"
#include "src/objects/literal-objects-inl.h"

View File

@ -50,6 +50,7 @@
#include "src/objects/js-relative-time-format-inl.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#endif // V8_INTL_SUPPORT
#include "src/compiler/node.h"
#include "src/objects/js-weak-refs-inl.h"
@ -2107,18 +2108,25 @@ void JSRelativeTimeFormat::JSRelativeTimeFormatPrint(
void JSSegmentIterator::JSSegmentIteratorPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSSegmentIterator");
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
os << "\n - unicode string: " << Brief(unicode_string());
os << "\n - granularity: " << GranularityAsString();
os << "\n - granularity: " << GranularityAsString(GetIsolate());
os << "\n";
}
void JSSegmenter::JSSegmenterPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSSegmenter");
os << "\n - locale: " << Brief(locale());
os << "\n - granularity: " << GranularityAsString();
os << "\n - granularity: " << GranularityAsString(GetIsolate());
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
JSObjectPrintBody(os, *this);
}
void JSSegments::JSSegmentsPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSSegments");
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
os << "\n - unicode string: " << Brief(unicode_string());
os << "\n - granularity: " << GranularityAsString(GetIsolate());
JSObjectPrintBody(os, *this);
}
#endif // V8_INTL_SUPPORT
namespace {

View File

@ -53,6 +53,7 @@
#include "src/objects/js-relative-time-format.h"
#include "src/objects/js-segment-iterator.h"
#include "src/objects/js-segmenter.h"
#include "src/objects/js-segments.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/js-weak-refs.h"
#include "src/objects/ordered-hash-table.h"
@ -4315,6 +4316,12 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
Handle<JSObject> prototype(
JSObject::cast(segmenter_fun->instance_prototype()), isolate());
// #sec-intl.segmenter.prototype-@@tostringtag
//
// Intl.Segmenter.prototype [ @@toStringTag ]
//
// The initial value of the @@toStringTag property is the String value
// "Intl.Segmenter".
InstallToStringTag(isolate(), prototype, "Intl.Segmenter");
SimpleInstallFunction(isolate(), prototype, "resolvedOptions",
@ -4325,6 +4332,32 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
Builtins::kSegmenterPrototypeSegment, 1, false);
}
{
// Setup %SegmentsPrototype%.
Handle<JSObject> prototype = factory()->NewJSObject(
isolate()->object_function(), AllocationType::kOld);
Handle<String> name_string =
Name::ToFunctionName(isolate(), isolate()->factory()->Segments_string())
.ToHandleChecked();
Handle<JSFunction> segments_fun = CreateFunction(
isolate(), name_string, JS_SEGMENTS_TYPE, JSSegments::kHeaderSize, 0,
prototype, Builtins::kIllegal);
segments_fun->shared().set_native(false);
segments_fun->shared().set_length(0);
segments_fun->shared().DontAdaptArguments();
SimpleInstallFunction(isolate(), prototype, "containing",
Builtins::kSegmentsPrototypeContaining, 1, false);
InstallFunctionAtSymbol(
isolate_, prototype, factory()->iterator_symbol(), "[Symbol.iterator]",
Builtins::kSegmentsPrototypeIterator, 0, true, DONT_ENUM);
Handle<Map> segments_map(segments_fun->initial_map(), isolate());
native_context()->set_intl_segments_map(*segments_map);
}
{
// Setup %SegmentIteratorPrototype%.
Handle<JSObject> iterator_prototype(
@ -4334,26 +4367,17 @@ void Genesis::InitializeGlobal_harmony_intl_segmenter() {
isolate()->object_function(), AllocationType::kOld);
JSObject::ForceSetPrototype(prototype, iterator_prototype);
InstallToStringTag(isolate(), prototype,
factory()->SegmentIterator_string());
// #sec-%segmentiteratorprototype%.@@tostringtag
//
// %SegmentIteratorPrototype% [ @@toStringTag ]
//
// The initial value of the @@toStringTag property is the String value
// "Segmenter String Iterator".
InstallToStringTag(isolate(), prototype, "Segmenter String Iterator");
SimpleInstallFunction(isolate(), prototype, "next",
Builtins::kSegmentIteratorPrototypeNext, 0, false);
SimpleInstallFunction(isolate(), prototype, "following",
Builtins::kSegmentIteratorPrototypeFollowing, 0,
false);
SimpleInstallFunction(isolate(), prototype, "preceding",
Builtins::kSegmentIteratorPrototypePreceding, 0,
false);
SimpleInstallGetter(isolate(), prototype, factory()->index_string(),
Builtins::kSegmentIteratorPrototypeIndex, false);
SimpleInstallGetter(isolate(), prototype, factory()->breakType_string(),
Builtins::kSegmentIteratorPrototypeBreakType, false);
// Setup SegmentIterator constructor.
Handle<String> name_string =
Name::ToFunctionName(isolate(),

View File

@ -54,6 +54,7 @@
V(_, ignorePunctuation_string, "ignorePunctuation") \
V(_, Invalid_Date_string, "Invalid Date") \
V(_, integer_string, "integer") \
V(_, isWordLike_string, "isWordLike") \
V(_, kana_string, "kana") \
V(_, language_string, "language") \
V(_, letter_string, "letter") \
@ -88,6 +89,7 @@
V(_, second_string, "second") \
V(_, segment_string, "segment") \
V(_, SegmentIterator_string, "Segment Iterator") \
V(_, Segments_string, "Segments") \
V(_, sensitivity_string, "sensitivity") \
V(_, sep_string, "sep") \
V(_, shared_string, "shared") \

View File

@ -38,6 +38,7 @@
#include "src/objects/js-relative-time-format-inl.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#endif
#endif // V8_OBJECTS_CLASS_DEFINITIONS_TQ_DEPS_INL_H_

View File

@ -153,6 +153,7 @@ enum ContextLookupFlags {
V(INTL_RELATIVE_TIME_FORMAT_FUNCTION_INDEX, JSFunction, \
intl_relative_time_format_function) \
V(INTL_SEGMENTER_FUNCTION_INDEX, JSFunction, intl_segmenter_function) \
V(INTL_SEGMENTS_MAP_INDEX, Map, intl_segments_map) \
V(INTL_SEGMENT_ITERATOR_MAP_INDEX, Map, intl_segment_iterator_map) \
V(ITERATOR_RESULT_MAP_INDEX, Map, iterator_result_map) \
V(JS_ARRAY_PACKED_SMI_ELEMENTS_MAP_INDEX, Map, \

View File

@ -14,6 +14,7 @@
#include 'src/objects/js-relative-time-format.h'
#include 'src/objects/js-segment-iterator.h'
#include 'src/objects/js-segmenter.h'
#include 'src/objects/js-segments.h'
type DateTimeStyle extends int32 constexpr 'JSDateTimeFormat::DateTimeStyle';
type HourCycle extends int32 constexpr 'JSDateTimeFormat::HourCycle';
@ -115,15 +116,24 @@ extern class JSSegmenter extends JSObject {
flags: SmiTagged<JSSegmenterFlags>;
}
bitfield struct JSSegmentsFlags extends uint31 {
granularity: JSSegmenterGranularity: 2 bit;
}
@generateCppClass
extern class JSSegments extends JSObject {
icu_break_iterator: Foreign; // Managed<icu::BreakIterator>
unicode_string: Foreign; // Managed<icu::UnicodeString>
flags: SmiTagged<JSSegmentsFlags>;
}
bitfield struct JSSegmentIteratorFlags extends uint31 {
granularity: JSSegmenterGranularity: 2 bit;
break_type_set: bool: 1 bit;
}
@generateCppClass
extern class JSSegmentIterator extends JSObject {
icu_break_iterator: Foreign; // Managed<icu::BreakIterator>
unicode_string: Foreign; // Managed<icu::UnicodeString>
flags: SmiTagged<JSSegmentIteratorFlags>;
}

View File

@ -668,6 +668,7 @@ bool CanSubclassHaveInobjectProperties(InstanceType instance_type) {
case JS_RELATIVE_TIME_FORMAT_TYPE:
case JS_SEGMENT_ITERATOR_TYPE:
case JS_SEGMENTER_TYPE:
case JS_SEGMENTS_TYPE:
case JS_V8_BREAK_ITERATOR_TYPE:
#endif
case JS_ASYNC_FUNCTION_OBJECT_TYPE:

View File

@ -53,6 +53,7 @@
#include "src/objects/js-relative-time-format.h"
#include "src/objects/js-segment-iterator.h"
#include "src/objects/js-segmenter.h"
#include "src/objects/js-segments.h"
#endif // V8_INTL_SUPPORT
#include "src/objects/js-weak-refs.h"
#include "src/objects/map-inl.h"
@ -2173,6 +2174,8 @@ int JSObject::GetHeaderSize(InstanceType type,
return JSSegmentIterator::kHeaderSize;
case JS_SEGMENTER_TYPE:
return JSSegmenter::kHeaderSize;
case JS_SEGMENTS_TYPE:
return JSSegments::kHeaderSize;
#endif // V8_INTL_SUPPORT
case WASM_GLOBAL_OBJECT_TYPE:
return WasmGlobalObject::kHeaderSize;

View File

@ -1,14 +1,13 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_INL_H_
#include "src/objects/js-segment-iterator.h"
#include "src/objects/objects-inl.h"
@ -23,11 +22,6 @@ TQ_OBJECT_CONSTRUCTORS_IMPL(JSSegmentIterator)
// Base segment iterator accessors.
ACCESSORS(JSSegmentIterator, icu_break_iterator, Managed<icu::BreakIterator>,
kIcuBreakIteratorOffset)
ACCESSORS(JSSegmentIterator, unicode_string, Managed<icu::UnicodeString>,
kUnicodeStringOffset)
BIT_FIELD_ACCESSORS(JSSegmentIterator, flags, is_break_type_set,
JSSegmentIterator::BreakTypeSetBit)
inline void JSSegmentIterator::set_granularity(
JSSegmenter::Granularity granularity) {

View File

@ -16,6 +16,7 @@
#include "src/heap/factory.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segments.h"
#include "src/objects/managed.h"
#include "src/objects/objects-inl.h"
#include "unicode/brkiter.h"
@ -23,36 +24,22 @@
namespace v8 {
namespace internal {
MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
int32_t start,
int32_t end) const {
return Intl::ToString(isolate, *(unicode_string().raw()), start, end);
}
Handle<String> JSSegmentIterator::GranularityAsString() const {
switch (granularity()) {
case JSSegmenter::Granularity::GRAPHEME:
return GetReadOnlyRoots().grapheme_string_handle();
case JSSegmenter::Granularity::WORD:
return GetReadOnlyRoots().word_string_handle();
case JSSegmenter::Granularity::SENTENCE:
return GetReadOnlyRoots().sentence_string_handle();
}
UNREACHABLE();
Handle<String> JSSegmentIterator::GranularityAsString(Isolate* isolate) const {
return JSSegmenter::GetGranularityString(isolate, granularity());
}
// ecma402 #sec-createsegmentiterator
MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
Isolate* isolate, icu::BreakIterator* break_iterator,
JSSegmenter::Granularity granularity, Handle<String> text) {
CHECK_NOT_NULL(break_iterator);
// 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
JSSegmenter::Granularity granularity) {
DCHECK_NOT_NULL(break_iterator);
Handle<Map> map = Handle<Map>(
isolate->native_context()->intl_segment_iterator_map(), isolate);
// 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
break_iterator->first();
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
Handle<Managed<icu::UnicodeString>> unicode_string =
Intl::SetTextToBreakIterator(isolate, text, break_iterator);
// Now all properties are ready, so we can allocate the result object.
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
@ -62,230 +49,47 @@ MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
segment_iterator->set_flags(0);
segment_iterator->set_granularity(granularity);
// 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
segment_iterator->set_icu_break_iterator(*managed_break_iterator);
// 3. Let iterator.[[SegmentIteratorString]] be string.
segment_iterator->set_unicode_string(*unicode_string);
// 4. Let iterator.[[SegmentIteratorIndex]] be 0.
// step 4 is stored inside break_iterator.
// 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
segment_iterator->set_is_break_type_set(false);
return segment_iterator;
}
// ecma402 #sec-segment-iterator-prototype-breakType
Handle<Object> JSSegmentIterator::BreakType() const {
if (!is_break_type_set()) {
return GetReadOnlyRoots().undefined_value_handle();
}
icu::BreakIterator* break_iterator = icu_break_iterator().raw();
int32_t rule_status = break_iterator->getRuleStatus();
switch (granularity()) {
case JSSegmenter::Granularity::GRAPHEME:
return GetReadOnlyRoots().undefined_value_handle();
case JSSegmenter::Granularity::WORD:
if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
// "words" that do not fit into any of other categories. Includes spaces
// and most punctuation.
return GetReadOnlyRoots().none_string_handle();
}
if ((rule_status >= UBRK_WORD_NUMBER &&
rule_status < UBRK_WORD_NUMBER_LIMIT) ||
(rule_status >= UBRK_WORD_LETTER &&
rule_status < UBRK_WORD_LETTER_LIMIT) ||
(rule_status >= UBRK_WORD_KANA &&
rule_status < UBRK_WORD_KANA_LIMIT) ||
(rule_status >= UBRK_WORD_IDEO &&
rule_status < UBRK_WORD_IDEO_LIMIT)) {
// words that appear to be numbers, letters, kana characters,
// ideographic characters, etc
return GetReadOnlyRoots().word_string_handle();
}
return GetReadOnlyRoots().undefined_value_handle();
case JSSegmenter::Granularity::SENTENCE:
if (rule_status >= UBRK_SENTENCE_TERM &&
rule_status < UBRK_SENTENCE_TERM_LIMIT) {
// sentences ending with a sentence terminator ('.', '?', '!', etc.)
// character, possibly followed by a hard separator (CR, LF, PS, etc.)
return GetReadOnlyRoots().term_string_handle();
}
if ((rule_status >= UBRK_SENTENCE_SEP &&
rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
// sentences that do not contain an ending sentence terminator ('.',
// '?', '!', etc.) character, but are ended only by a hard separator
// (CR, LF, PS, etc.) hard, or mandatory line breaks
return GetReadOnlyRoots().sep_string_handle();
}
return GetReadOnlyRoots().undefined_value_handle();
}
UNREACHABLE();
}
// ecma402 #sec-segment-iterator-prototype-index
Handle<Object> JSSegmentIterator::Index(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
icu::BreakIterator* icu_break_iterator =
segment_iterator->icu_break_iterator().raw();
CHECK_NOT_NULL(icu_break_iterator);
return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
}
// ecma402 #sec-segment-iterator-prototype-next
// ecma402 #sec-%segmentiteratorprototype%.next
MaybeHandle<JSReceiver> JSSegmentIterator::Next(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
Factory* factory = isolate->factory();
icu::BreakIterator* icu_break_iterator =
segment_iterator->icu_break_iterator().raw();
// 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
int32_t prev = icu_break_iterator->current();
// 4. Let done be AdvanceSegmentIterator(iterator, forwards).
int32_t index = icu_break_iterator->next();
segment_iterator->set_is_break_type_set(true);
if (index == icu::BreakIterator::DONE) {
// 5. If done is true, return CreateIterResultObject(undefined, true).
// 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
int32_t start_index = icu_break_iterator->current();
// 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
int32_t end_index = icu_break_iterator->next();
// 7. If endIndex is not finite, then
if (end_index == icu::BreakIterator::DONE) {
// a. Return ! CreateIterResultObject(undefined, true).
return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
true);
}
// 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
Handle<Object> new_index = factory->NewNumberFromInt(index);
// 8. Let segment be the substring of string from previousIndex to
// newIndex, inclusive of previousIndex and exclusive of newIndex.
Handle<String> segment;
ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
segment_iterator->GetSegment(isolate, prev, index),
// 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
// 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string,
// startIndex, endIndex).
icu::UnicodeString string;
icu_break_iterator->getText().getText(string);
Handle<Object> segment_data;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, segment_data,
JSSegments::CreateSegmentDataObject(
isolate, segment_iterator->granularity(), icu_break_iterator, string,
start_index, end_index),
JSReceiver);
// 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
Handle<Object> break_type = segment_iterator->BreakType();
// 10. Let result be ! ObjectCreate(%ObjectPrototype%).
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
// 11. Perform ! CreateDataProperty(result "segment", segment).
CHECK(JSReceiver::CreateDataProperty(isolate, result,
factory->segment_string(), segment,
Just(kDontThrow))
.FromJust());
// 12. Perform ! CreateDataProperty(result, "breakType", breakType).
CHECK(JSReceiver::CreateDataProperty(isolate, result,
factory->breakType_string(), break_type,
Just(kDontThrow))
.FromJust());
// 13. Perform ! CreateDataProperty(result, "index", newIndex).
CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
new_index, Just(kDontThrow))
.FromJust());
// 14. Return CreateIterResultObject(result, false).
return factory->NewJSIteratorResult(result, false);
}
// ecma402 #sec-segment-iterator-prototype-following
Maybe<bool> JSSegmentIterator::Following(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
Handle<Object> from_obj) {
Factory* factory = isolate->factory();
icu::BreakIterator* icu_break_iterator =
segment_iterator->icu_break_iterator().raw();
// 3. If from is not undefined,
if (!from_obj->IsUndefined()) {
// a. Let from be ? ToIndex(from).
uint32_t from;
Handle<Object> index;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, index,
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
Nothing<bool>());
if (!index->ToArrayIndex(&from)) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->from_string(),
factory->NewStringFromStaticChars("following"), index),
Nothing<bool>());
}
// b. Let length be the length of iterator.[[SegmentIteratorString]].
uint32_t length =
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
// c. If from ≥ length, throw a RangeError exception.
if (from >= length) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->from_string(),
factory->NewStringFromStaticChars("following"),
from_obj),
Nothing<bool>());
}
// d. Let iterator.[[SegmentIteratorPosition]] be from.
segment_iterator->set_is_break_type_set(true);
icu_break_iterator->following(from);
return Just(false);
}
// 4. return AdvanceSegmentIterator(iterator, forward).
// 4. .... or if direction is backwards and position is 0, return true.
// 4. If direction is forwards and position is the length of string ... return
// true.
segment_iterator->set_is_break_type_set(true);
return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
}
// ecma402 #sec-segment-iterator-prototype-preceding
Maybe<bool> JSSegmentIterator::Preceding(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
Handle<Object> from_obj) {
Factory* factory = isolate->factory();
icu::BreakIterator* icu_break_iterator =
segment_iterator->icu_break_iterator().raw();
// 3. If from is not undefined,
if (!from_obj->IsUndefined()) {
// a. Let from be ? ToIndex(from).
uint32_t from;
Handle<Object> index;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, index,
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
Nothing<bool>());
if (!index->ToArrayIndex(&from)) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->from_string(),
factory->NewStringFromStaticChars("preceding"), index),
Nothing<bool>());
}
// b. Let length be the length of iterator.[[SegmentIteratorString]].
uint32_t length =
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
// c. If from > length or from = 0, throw a RangeError exception.
if (from > length || from == 0) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->from_string(),
factory->NewStringFromStaticChars("preceding"),
from_obj),
Nothing<bool>());
}
// d. Let iterator.[[SegmentIteratorIndex]] be from.
segment_iterator->set_is_break_type_set(true);
icu_break_iterator->preceding(from);
return Just(false);
}
// 4. return AdvanceSegmentIterator(iterator, backwards).
// 4. .... or if direction is backwards and position is 0, return true.
segment_iterator->set_is_break_type_set(true);
return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
// 10. Return ! CreateIterResultObject(segmentData, false).
return factory->NewJSIteratorResult(segment_data, false);
}
} // namespace internal

View File

@ -1,14 +1,13 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
#define V8_OBJECTS_JS_SEGMENT_ITERATOR_H_
#include "src/base/bit-field.h"
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
@ -34,40 +33,16 @@ class JSSegmentIterator
// ecma402 #sec-CreateSegmentIterator
V8_WARN_UNUSED_RESULT static MaybeHandle<JSSegmentIterator> Create(
Isolate* isolate, icu::BreakIterator* icu_break_iterator,
JSSegmenter::Granularity granularity, Handle<String> string);
JSSegmenter::Granularity granularity);
// ecma402 #sec-segment-iterator-prototype-next
V8_WARN_UNUSED_RESULT static MaybeHandle<JSReceiver> Next(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder);
// ecma402 #sec-segment-iterator-prototype-following
static Maybe<bool> Following(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder,
Handle<Object> from);
// ecma402 #sec-segment-iterator-prototype-preceding
static Maybe<bool> Preceding(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder,
Handle<Object> from);
// ecma402 #sec-segment-iterator-prototype-index
static Handle<Object> Index(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator_holder);
Handle<String> GranularityAsString() const;
DECL_BOOLEAN_ACCESSORS(is_break_type_set)
// ecma402 #sec-segment-iterator-prototype-breakType
Handle<Object> BreakType() const;
V8_WARN_UNUSED_RESULT MaybeHandle<String> GetSegment(Isolate* isolate,
int32_t start,
int32_t end) const;
Handle<String> GranularityAsString(Isolate* isolate) const;
// SegmentIterator accessors.
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
DECL_ACCESSORS(unicode_string, Managed<icu::UnicodeString>)
DECL_PRINTER(JSSegmentIterator)

View File

@ -1,14 +1,13 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENTER_INL_H_
#define V8_OBJECTS_JS_SEGMENTER_INL_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_SEGMENTER_INL_H_
#define V8_OBJECTS_JS_SEGMENTER_INL_H_
#include "src/objects/js-segmenter.h"
#include "src/objects/objects-inl.h"

View File

@ -26,36 +26,37 @@ namespace internal {
MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
Handle<Object> locales,
Handle<Object> input_options) {
// 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
Maybe<std::vector<std::string>> maybe_requested_locales =
Intl::CanonicalizeLocaleList(isolate, locales);
MAYBE_RETURN(maybe_requested_locales, Handle<JSSegmenter>());
std::vector<std::string> requested_locales =
maybe_requested_locales.FromJust();
// 11. If options is undefined, then
// 5. If options is undefined, then
Handle<JSReceiver> options;
if (input_options->IsUndefined(isolate)) {
// 11. a. Let options be ObjectCreate(null).
// a. Let options be ObjectCreate(null).
options = isolate->factory()->NewJSObjectWithNullProto();
// 12. Else
} else {
// 23. a. Let options be ? ToObject(options).
} else { // 6. Else
// a. Let options be ? ToObject(options).
ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
Object::ToObject(isolate, input_options),
JSSegmenter);
}
// 4. Let opt be a new Record.
// 5. Let matcher be ? GetOption(options, "localeMatcher", "string",
// 7. Let opt be a new Record.
// 8. Let matcher be ? GetOption(options, "localeMatcher", "string",
// « "lookup", "best fit" », "best fit").
// 6. Set opt.[[localeMatcher]] to matcher.
// 9. Set opt.[[localeMatcher]] to matcher.
Maybe<Intl::MatcherOption> maybe_locale_matcher =
Intl::GetLocaleMatcher(isolate, options, "Intl.Segmenter");
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
// 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
// 10. Let localeData be %Segmenter%.[[LocaleData]].
// 11. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
@ -66,6 +67,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
}
Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
// 12. Set segmenter.[[Locale]] to the value of r.[[locale]].
Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
@ -100,32 +102,34 @@ MaybeHandle<JSSegmenter> JSSegmenter::New(Isolate* isolate, Handle<Map> map,
break;
}
CHECK(U_SUCCESS(status));
CHECK_NOT_NULL(icu_break_iterator.get());
DCHECK(U_SUCCESS(status));
DCHECK_NOT_NULL(icu_break_iterator.get());
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
std::move(icu_break_iterator));
// Now all properties are ready, so we can allocate the result object.
Handle<JSSegmenter> segmenter_holder = Handle<JSSegmenter>::cast(
Handle<JSSegmenter> segmenter = Handle<JSSegmenter>::cast(
isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
DisallowHeapAllocation no_gc;
segmenter_holder->set_flags(0);
segmenter->set_flags(0);
// 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
segmenter_holder->set_locale(*locale_str);
// 12. Set segmenter.[[Locale]] to the value of r.[[Locale]].
segmenter->set_locale(*locale_str);
// 14. Set segmenter.[[SegmenterGranularity]] to granularity.
segmenter_holder->set_granularity(granularity_enum);
segmenter->set_granularity(granularity_enum);
segmenter_holder->set_icu_break_iterator(*managed_break_iterator);
return segmenter_holder;
segmenter->set_icu_break_iterator(*managed_break_iterator);
// 15. Return segmenter.
return segmenter;
}
// ecma402 #sec-Intl.Segmenter.prototype.resolvedOptions
Handle<JSObject> JSSegmenter::ResolvedOptions(
Isolate* isolate, Handle<JSSegmenter> segmenter_holder) {
Handle<JSObject> JSSegmenter::ResolvedOptions(Isolate* isolate,
Handle<JSSegmenter> segmenter) {
Factory* factory = isolate->factory();
// 3. Let options be ! ObjectCreate(%ObjectPrototype%).
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
@ -141,23 +145,29 @@ Handle<JSObject> JSSegmenter::ResolvedOptions(
// [[Locale]] "locale"
// [[SegmenterGranularity]] "granularity"
Handle<String> locale(segmenter_holder->locale(), isolate);
Handle<String> locale(segmenter->locale(), isolate);
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
NONE);
JSObject::AddProperty(isolate, result, factory->granularity_string(),
segmenter_holder->GranularityAsString(), NONE);
segmenter->GranularityAsString(isolate), NONE);
// 5. Return options.
return result;
}
Handle<String> JSSegmenter::GranularityAsString() const {
switch (granularity()) {
Handle<String> JSSegmenter::GranularityAsString(Isolate* isolate) const {
return GetGranularityString(isolate, granularity());
}
Handle<String> JSSegmenter::GetGranularityString(Isolate* isolate,
Granularity granularity) {
Factory* factory = isolate->factory();
switch (granularity) {
case Granularity::GRAPHEME:
return GetReadOnlyRoots().grapheme_string_handle();
return factory->grapheme_string();
case Granularity::WORD:
return GetReadOnlyRoots().word_string_handle();
return factory->word_string();
case Granularity::SENTENCE:
return GetReadOnlyRoots().sentence_string_handle();
return factory->sentence_string();
}
UNREACHABLE();
}

View File

@ -1,14 +1,13 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENTER_H_
#define V8_OBJECTS_JS_SEGMENTER_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#ifndef V8_OBJECTS_JS_SEGMENTER_H_
#define V8_OBJECTS_JS_SEGMENTER_H_
#include <set>
#include <string>
@ -42,7 +41,7 @@ class JSSegmenter : public TorqueGeneratedJSSegmenter<JSSegmenter, JSObject> {
V8_EXPORT_PRIVATE static const std::set<std::string>& GetAvailableLocales();
Handle<String> GranularityAsString() const;
Handle<String> GranularityAsString(Isolate* isolate) const;
// Segmenter accessors.
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
@ -58,6 +57,9 @@ class JSSegmenter : public TorqueGeneratedJSSegmenter<JSSegmenter, JSObject> {
inline void set_granularity(Granularity granularity);
inline Granularity granularity() const;
Handle<String> static GetGranularityString(Isolate* isolate,
Granularity granularity);
// Bit positions in |flags|.
DEFINE_TORQUE_GENERATED_JS_SEGMENTER_FLAGS()

View File

@ -0,0 +1,44 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENTS_INL_H_
#define V8_OBJECTS_JS_SEGMENTS_INL_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-segments.h"
#include "src/objects/objects-inl.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace v8 {
namespace internal {
TQ_OBJECT_CONSTRUCTORS_IMPL(JSSegments)
// Base segments accessors.
ACCESSORS(JSSegments, icu_break_iterator, Managed<icu::BreakIterator>,
kIcuBreakIteratorOffset)
ACCESSORS(JSSegments, unicode_string, Managed<icu::UnicodeString>,
kUnicodeStringOffset)
inline void JSSegments::set_granularity(JSSegmenter::Granularity granularity) {
DCHECK_GE(GranularityBits::kMax, granularity);
int hints = flags();
hints = GranularityBits::update(hints, granularity);
set_flags(hints);
}
inline JSSegmenter::Granularity JSSegments::granularity() const {
return GranularityBits::decode(flags());
}
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_SEGMENTS_INL_H_

183
src/objects/js-segments.cc Normal file
View File

@ -0,0 +1,183 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-segments.h"
#include <map>
#include <memory>
#include <string>
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-segment-iterator.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#include "src/objects/managed.h"
#include "src/objects/objects-inl.h"
#include "unicode/brkiter.h"
namespace v8 {
namespace internal {
// ecma402 #sec-createsegmentsobject
MaybeHandle<JSSegments> JSSegments::Create(Isolate* isolate,
Handle<JSSegmenter> segmenter,
Handle<String> string) {
icu::BreakIterator* break_iterator =
segmenter->icu_break_iterator().raw()->clone();
DCHECK_NOT_NULL(break_iterator);
Handle<Managed<icu::UnicodeString>> unicode_string =
Intl::SetTextToBreakIterator(isolate, string, break_iterator);
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
// 2. Let segments be ! ObjectCreate(%Segments.prototype%, internalSlotsList).
Handle<Map> map(isolate->native_context()->intl_segments_map(), isolate);
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
Handle<JSSegments> segments = Handle<JSSegments>::cast(result);
segments->set_flags(0);
// 3. Set segments.[[SegmentsSegmenter]] to segmenter.
segments->set_icu_break_iterator(*managed_break_iterator);
segments->set_granularity(segmenter->granularity());
// 4. Set segments.[[SegmentsString]] to string.
segments->set_unicode_string(*unicode_string);
// 5. Return segments.
return segments;
}
// ecma402 #sec-createsegmentiterator
MaybeHandle<Object> JSSegments::CreateSegmentIterator(
Isolate* isolate, Handle<JSSegments> segments) {
return JSSegmentIterator::Create(
isolate, segments->icu_break_iterator().raw()->clone(),
segments->granularity());
}
// ecma402 #sec-%segmentsprototype%.containing
MaybeHandle<Object> JSSegments::Containing(Isolate* isolate,
Handle<JSSegments> segments,
int32_t n) {
// 5. Let len be the length of string.
int32_t len = segments->unicode_string().raw()->length();
// 7. If n < 0 or n ≥ len, return undefined.
if (n < 0 || n >= len) {
return isolate->factory()->undefined_value();
}
icu::BreakIterator* break_iterator = segments->icu_break_iterator().raw();
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
int32_t start_index =
break_iterator->isBoundary(n) ? n : break_iterator->preceding(n);
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
int32_t end_index = break_iterator->following(n);
// 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex,
// endIndex).
return CreateSegmentDataObject(
isolate, segments->granularity(), break_iterator,
*(segments->unicode_string().raw()), start_index, end_index);
}
namespace {
bool CurrentSegmentIsWordLike(icu::BreakIterator* break_iterator) {
int32_t rule_status = break_iterator->getRuleStatus();
return (rule_status >= UBRK_WORD_NUMBER &&
rule_status < UBRK_WORD_NUMBER_LIMIT) ||
(rule_status >= UBRK_WORD_LETTER &&
rule_status < UBRK_WORD_LETTER_LIMIT) ||
(rule_status >= UBRK_WORD_KANA &&
rule_status < UBRK_WORD_KANA_LIMIT) ||
(rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
}
} // namespace
// ecma402 #sec-createsegmentdataobject
MaybeHandle<Object> JSSegments::CreateSegmentDataObject(
Isolate* isolate, JSSegmenter::Granularity granularity,
icu::BreakIterator* break_iterator, const icu::UnicodeString& string,
int32_t start_index, int32_t end_index) {
Factory* factory = isolate->factory();
// 1. Let len be the length of string.
// 2. Assert: startIndex ≥ 0.
DCHECK_GE(start_index, 0);
// 3. Assert: endIndex ≤ len.
DCHECK_LE(end_index, string.length());
// 4. Assert: startIndex < endIndex.
DCHECK_LT(start_index, end_index);
// 5. Let result be ! ObjectCreate(%ObjectPrototype%).
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
// 6. Let segment be the String value equal to the substring of string
// consisting of the code units at indices startIndex (inclusive) through
// endIndex (exclusive).
Handle<String> segment;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, segment, Intl::ToString(isolate, string, start_index, end_index),
JSObject);
// 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
Maybe<bool> maybe_create_segment = JSReceiver::CreateDataProperty(
isolate, result, factory->segment_string(), segment, Just(kDontThrow));
DCHECK(maybe_create_segment.FromJust());
USE(maybe_create_segment);
// 8. Perform ! CreateDataPropertyOrThrow(result, "index", startIndex).
Maybe<bool> maybe_create_index = JSReceiver::CreateDataProperty(
isolate, result, factory->index_string(),
factory->NewNumberFromInt(start_index), Just(kDontThrow));
DCHECK(maybe_create_index.FromJust());
USE(maybe_create_index);
// 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
Handle<String> input_string;
ASSIGN_RETURN_ON_EXCEPTION(isolate, input_string,
Intl::ToString(isolate, string), JSObject);
Maybe<bool> maybe_create_input = JSReceiver::CreateDataProperty(
isolate, result, factory->input_string(), input_string, Just(kDontThrow));
DCHECK(maybe_create_input.FromJust());
USE(maybe_create_input);
Handle<Object> is_word_like;
// 10. Let granularity be segmenter.[[SegmenterGranularity]].
// 11. If granularity is "word", then
if (granularity == JSSegmenter::Granularity::WORD) {
// a. Let isWordLike be a Boolean value indicating whether the word segment
// segment in string is "word-like" according to locale
// segmenter.[[Locale]].
is_word_like = CurrentSegmentIsWordLike(break_iterator)
? factory->true_value()
: factory->false_value();
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
Maybe<bool> maybe_create_is_word_like = JSReceiver::CreateDataProperty(
isolate, result, factory->isWordLike_string(), is_word_like,
Just(kDontThrow));
DCHECK(maybe_create_is_word_like.FromJust());
USE(maybe_create_is_word_like);
}
return result;
}
Handle<String> JSSegments::GranularityAsString(Isolate* isolate) const {
return JSSegmenter::GetGranularityString(isolate, granularity());
}
} // namespace internal
} // namespace v8

80
src/objects/js-segments.h Normal file
View File

@ -0,0 +1,80 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_SEGMENTS_H_
#define V8_OBJECTS_JS_SEGMENTS_H_
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/base/bit-field.h"
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/js-segmenter.h"
#include "src/objects/managed.h"
#include "src/objects/objects.h"
#include "unicode/uversion.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace U_ICU_NAMESPACE {
class BreakIterator;
class UnicodeString;
} // namespace U_ICU_NAMESPACE
namespace v8 {
namespace internal {
class JSSegments : public TorqueGeneratedJSSegments<JSSegments, JSObject> {
public:
// ecma402 #sec-createsegmentsobject
V8_WARN_UNUSED_RESULT static MaybeHandle<JSSegments> Create(
Isolate* isolate, Handle<JSSegmenter> segmenter, Handle<String> string);
// ecma402 #sec-%segmentsprototype%.containing
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Containing(
Isolate* isolate, Handle<JSSegments> segments_holder, int32_t index);
// ecma402 #sec-%segmentsprototype%-@@iterator
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> CreateSegmentIterator(
Isolate* isolate, Handle<JSSegments> segments_holder);
// ecma402 #sec-get-%segmentsprototype%.string
V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetString(
Isolate* isolate, Handle<JSSegments> segments_holder);
// ecma402 #sec-createsegmentdataobject
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> CreateSegmentDataObject(
Isolate* isolate, JSSegmenter::Granularity granularity,
icu::BreakIterator* break_iterator, const icu::UnicodeString& string,
int32_t start_index, int32_t end_index);
Handle<String> GranularityAsString(Isolate* isolate) const;
// SegmentIterator accessors.
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
DECL_ACCESSORS(unicode_string, Managed<icu::UnicodeString>)
DECL_PRINTER(JSSegments)
inline void set_granularity(JSSegmenter::Granularity granularity);
inline JSSegmenter::Granularity granularity() const;
// Bit positions in |flags|.
DEFINE_TORQUE_GENERATED_JS_SEGMENT_ITERATOR_FLAGS()
STATIC_ASSERT(JSSegmenter::Granularity::GRAPHEME <= GranularityBits::kMax);
STATIC_ASSERT(JSSegmenter::Granularity::WORD <= GranularityBits::kMax);
STATIC_ASSERT(JSSegmenter::Granularity::SENTENCE <= GranularityBits::kMax);
TQ_OBJECT_CONSTRUCTORS(JSSegments)
};
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_SEGMENTS_H_

View File

@ -306,6 +306,7 @@ VisitorId Map::GetVisitorId(Map map) {
case JS_RELATIVE_TIME_FORMAT_TYPE:
case JS_SEGMENT_ITERATOR_TYPE:
case JS_SEGMENTER_TYPE:
case JS_SEGMENTS_TYPE:
#endif // V8_INTL_SUPPORT
case WASM_EXCEPTION_OBJECT_TYPE:
case WASM_GLOBAL_OBJECT_TYPE:

View File

@ -252,7 +252,8 @@ class ZoneForwardList;
V(JSPluralRules) \
V(JSRelativeTimeFormat) \
V(JSSegmentIterator) \
V(JSSegmenter)
V(JSSegmenter) \
V(JSSegments)
#else
#define HEAP_OBJECT_ORDINARY_TYPE_LIST(V) HEAP_OBJECT_ORDINARY_TYPE_LIST_BASE(V)
#endif // V8_INTL_SUPPORT

View File

@ -1033,6 +1033,7 @@ ReturnType BodyDescriptorApply(InstanceType type, T1 p1, T2 p2, T3 p3, T4 p4) {
case JS_RELATIVE_TIME_FORMAT_TYPE:
case JS_SEGMENT_ITERATOR_TYPE:
case JS_SEGMENTER_TYPE:
case JS_SEGMENTS_TYPE:
#endif // V8_INTL_SUPPORT
case WASM_EXCEPTION_OBJECT_TYPE:
case WASM_GLOBAL_OBJECT_TYPE:

View File

@ -94,6 +94,7 @@
#include "src/objects/js-relative-time-format.h"
#include "src/objects/js-segment-iterator.h"
#include "src/objects/js-segmenter.h"
#include "src/objects/js-segments.h"
#endif // V8_INTL_SUPPORT
#include "src/codegen/source-position-table.h"
#include "src/objects/js-weak-refs-inl.h"

View File

@ -78,6 +78,7 @@
// - JSPluralRules // If V8_INTL_SUPPORT enabled.
// - JSRelativeTimeFormat // If V8_INTL_SUPPORT enabled.
// - JSSegmenter // If V8_INTL_SUPPORT enabled.
// - JSSegments // If V8_INTL_SUPPORT enabled.
// - JSSegmentIterator // If V8_INTL_SUPPORT enabled.
// - JSV8BreakIterator // If V8_INTL_SUPPORT enabled.
// - WasmExceptionObject

View File

@ -4788,6 +4788,7 @@ void ImplementationVisitor::GenerateExportedMacrosAssembler(
cc_contents << "#include \"src/objects/js-relative-time-format.h\"\n";
cc_contents << "#include \"src/objects/js-segment-iterator.h\"\n";
cc_contents << "#include \"src/objects/js-segmenter.h\"\n";
cc_contents << "#include \"src/objects/js-segments.h\"\n";
}
cc_contents << "#include \"torque-generated/" << file_name << ".h\"\n";

View File

@ -1,38 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assertEquals(undefined, iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assertEquals(text, segments.join(""));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assertEquals(undefined, v.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assertFalse(iter.following());
assertEquals(iter.breakType, v.breakType);
assertEquals(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assertTrue(iter.following());
assertEquals(text, segments.join(''));
}

View File

@ -1,40 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assertEquals(undefined, v.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
assertEquals("number", typeof v.index);
assertTrue(oldPos < v.index);
oldPos = v.index;
}
assertEquals(text, segments.join(''));
}

View File

@ -1,44 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev)
assertEquals(undefined, iter.breakType)
assertTrue(iter.index >= 0);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assertEquals(undefined, iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assertEquals(text, segments.reverse().join(""));
}

View File

@ -23,7 +23,21 @@ for (const text of [
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.index);
const segments = seg.segment(text);
let results = [];
var pos = -1;
for (let s of segments) {
assertEquals(["segment", "index", "input"], Object.keys(s));
assertEquals(typeof s.index, "number");
assertEquals(typeof s.segment, "string");
assertEquals(typeof s.input, "string");
assertEquals(text, s.input);
assertEquals(text.substring(s.index, s.index + s.segment.length),
s.segment);
assertTrue(pos < s.index);
pos = s.index;
results.push(s.segment);
}
assertTrue(pos < text.length);
assertEquals(text, results.join(""));
}

View File

@ -1,11 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assertEquals(undefined, iter.breakType);

View File

@ -1,26 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assertEquals("function", typeof iter.following);
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
assertDoesNotThrow(() => iter.following("ABC"));
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
assertDoesNotThrow(() => iter.following(null));
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
assertDoesNotThrow(() => iter.following(1.4));
assertThrows(() => iter.following(-3), RangeError);
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )
// 3.b If from >= iterator.[[SegmentIteratorString]], throw a RangeError exception.
assertDoesNotThrow(() => iter.following(text.length - 1));
assertThrows(() => iter.following(text.length), RangeError);
assertThrows(() => iter.following(text.length + 1), RangeError);

View File

@ -1,11 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assertEquals("function", typeof iter.next);

View File

@ -11,6 +11,9 @@ assertTrue(descriptor.writable);
assertFalse(descriptor.enumerable);
assertTrue(descriptor.configurable);
let segmenterPrototype = Object.getPrototypeOf(seg);
assertEquals("Intl.Segmenter", segmenterPrototype[Symbol.toStringTag]);
// ecma402 #sec-Intl.Segmenter.prototype
// Intl.Segmenter.prototype
// The value of Intl.Segmenter.prototype is %SegmenterPrototype%.
@ -29,10 +32,11 @@ for (let func of ["segment", "resolvedOptions"]) {
assertTrue(descriptor.configurable);
}
let segmentIterator = seg.segment('text');
let prototype = Object.getPrototypeOf(segmentIterator);
for (let func of ["next", "following", "preceding"]) {
let descriptor = Object.getOwnPropertyDescriptor(prototype, func);
let segments = seg.segment('text');
let segmentsPrototype = Object.getPrototypeOf(segments);
for (let func of ["containing", Symbol.iterator]) {
let descriptor = Object.getOwnPropertyDescriptor(segmentsPrototype, func);
assertTrue(descriptor.writable);
assertFalse(descriptor.enumerable);
assertTrue(descriptor.configurable);
@ -47,34 +51,44 @@ function checkGetterProperty(prototype, property) {
assertTrue(desc.configurable);
}
// Test the descriptor is correct for properties.
checkGetterProperty(prototype, 'index');
checkGetterProperty(prototype, 'breakType');
// Test the SegmentIteratorPrototype methods are called with same
// Test the SegmentsPrototype methods are called with same
// receiver and won't throw.
assertDoesNotThrow(() => prototype.next.call(segmentIterator));
assertDoesNotThrow(() => prototype.following.call(segmentIterator));
assertDoesNotThrow(() => prototype.preceding.call(segmentIterator));
assertDoesNotThrow(() => segmentsPrototype.containing.call(segments));
assertDoesNotThrow(() => segmentsPrototype[Symbol.iterator].call(segments));
// Test the SegmentIteratorPrototype methods are called with a different
// receiver and correctly throw.
var otherReceivers = [
1, 123.45, undefined, null, "string", true, false,
Intl, Intl.Segmenter, Intl.Segmenter.prototype,
prototype,
segmentsPrototype,
new Intl.Segmenter(),
new Intl.Collator(),
new Intl.DateTimeFormat(),
new Intl.NumberFormat(),
];
for (let rec of otherReceivers) {
assertThrows(() => prototype.next.call(rec), TypeError);
assertThrows(() => prototype.following.call(rec), TypeError);
assertThrows(() => prototype.preceding.call(rec), TypeError);
assertThrows(() => segmentsPrototype.containing.call(rec), TypeError);
}
// Check the property of the return object of next()
let segmentIterator = segments[Symbol.iterator]();
let segmentIteratorPrototype = Object.getPrototypeOf(segmentIterator);
for (let func of ["next"]) {
let descriptor = Object.getOwnPropertyDescriptor(segmentIteratorPrototype,
func);
assertTrue(descriptor.writable);
assertFalse(descriptor.enumerable);
assertTrue(descriptor.configurable);
}
assertEquals("Segmenter String Iterator",
segmentIteratorPrototype[Symbol.toStringTag]);
let desc = Object.getOwnPropertyDescriptor(
segmentIteratorPrototype, Symbol.toStringTag);
assertFalse(desc.writable);
assertFalse(desc.enumerable);
assertTrue(desc.configurable);
let nextReturn = segmentIterator.next();
function checkProperty(obj, property) {
@ -83,9 +97,3 @@ function checkProperty(obj, property) {
assertTrue(desc.enumerable);
assertTrue(desc.configurable);
}
checkProperty(nextReturn, 'done');
checkProperty(nextReturn, 'value');
checkProperty(nextReturn.value, 'segment');
checkProperty(nextReturn.value, 'breakType');
checkProperty(nextReturn.value, 'index');

View File

@ -1,11 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assertEquals(0, iter.index);

View File

@ -1,30 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assertEquals("function", typeof iter.preceding);
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
assertThrows(() => iter.preceding("ABC"), RangeError);
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
assertThrows(() => iter.preceding(null), RangeError);
assertThrows(() => iter.preceding(-3), RangeError);
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
assertDoesNotThrow(() => iter.preceding(1.4));
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
// 3.b If ... from = 0, throw a RangeError exception.
assertThrows(() => iter.preceding(0), RangeError);
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
// 3.b If from > iterator.[[SegmentIteratorString]] ... , throw a RangeError exception.
assertDoesNotThrow(() => iter.preceding(text.length - 1));
assertDoesNotThrow(() => iter.preceding(text.length));
assertThrows(() => iter.preceding(text.length + 1), RangeError);

View File

@ -1,15 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const text = "Hello World, Test 123! Foo Bar. How are you?";
for (const granularity of ["grapheme", "word", "sentence"]) {
const segmenter = new Intl.Segmenter("en", { granularity });
const iter = segmenter.segment(text);
assertEquals("number", typeof iter.index);
assertEquals(0, iter.index);
assertEquals(undefined, iter.breakType);
}

View File

@ -1,38 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assertEquals(text, segments.join(""));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assertTrue(["sep", "term"].includes(v.breakType), v.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assertFalse(iter.following());
assertEquals(iter.breakType, v.breakType);
assertEquals(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assertTrue(iter.following());
assertEquals(text, segments.join(''));
}

View File

@ -1,40 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
assertEquals("number", typeof v.index);
assertTrue(oldPos < v.index);
oldPos = v.index;
}
assertEquals(text, segments.join(''));
}

View File

@ -1,44 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev);
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assertEquals(text, segments.reverse().join(""));
}

View File

@ -23,7 +23,21 @@ for (const text of [
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.index);
const segments = seg.segment(text);
let results = [];
var pos = -1;
for (let s of segments) {
assertEquals(["segment", "index", "input"], Object.keys(s));
assertEquals(typeof s.index, "number");
assertEquals(typeof s.segment, "string");
assertEquals(typeof s.input, "string");
assertEquals(text, s.input);
assertEquals(text.substring(s.index, s.index + s.segment.length),
s.segment);
assertTrue(pos < s.index);
pos = s.index;
results.push(s.segment);
}
assertTrue(pos < text.length);
assertEquals(text, results.join(""));
}

View File

@ -1,38 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assertEquals(text, segments.join(""));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assertTrue(["word", "none"].includes(v.breakType), v.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assertFalse(iter.following());
assertEquals(iter.breakType, v.breakType);
assertEquals(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assertTrue(iter.following());
assertEquals(text, segments.join(''));
}

View File

@ -1,40 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
assertEquals("number", typeof v.index);
assertTrue(oldPos < v.index);
oldPos = v.index;
}
assertEquals(text, segments.join(''));
}

View File

@ -1,44 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev);
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assertEquals(text, segments.reverse().join(""));
}

View File

@ -23,7 +23,22 @@ for (const text of [
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.index);
const segments = seg.segment(text);
let results = [];
var pos = -1;
for (let s of segments) {
assertEquals(["segment", "index", "input", "isWordLike"], Object.keys(s));
assertEquals(typeof s.isWordLike, "boolean");
assertEquals(typeof s.index, "number");
assertEquals(typeof s.segment, "string");
assertEquals(typeof s.input, "string");
assertEquals(text, s.input);
assertEquals(text.substring(s.index, s.index + s.segment.length),
s.segment);
assertTrue(pos < s.index);
pos = s.index;
results.push(s.segment);
}
assertTrue(pos < text.length);
assertEquals(text, results.join(""));
}

View File

@ -8,32 +8,34 @@ assertEquals("function", typeof Intl.Segmenter.prototype.segment);
assertEquals(1, Intl.Segmenter.prototype.segment.length);
let seg = new Intl.Segmenter("en", {granularity: "word"})
let res;
let segments;
// test with 0 args
assertDoesNotThrow(() => res = seg.segment())
assertDoesNotThrow(() => segments = seg.segment())
// test with 1 arg
assertDoesNotThrow(() => res = seg.segment("hello"))
assertEquals("hello", res.next().value.segment);
assertDoesNotThrow(() => segments = seg.segment("hello"))
assertEquals("hello", segments.containing(0).input);
// test with 2 args
assertDoesNotThrow(() => res = seg.segment("hello world"))
assertEquals("hello", res.next().value.segment);
assertDoesNotThrow(() => segments = seg.segment("hello world"))
assertEquals("hello world", segments.containing(0).input);
// test with other types
assertDoesNotThrow(() => res = seg.segment(undefined))
assertEquals("undefined", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(null))
assertEquals("null", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(true))
assertEquals("true", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(false))
assertEquals("false", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(1234))
assertEquals("1234", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(3.1415926))
assertEquals("3.1415926", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment(["hello","world"]))
assertEquals("hello", res.next().value.segment);
assertDoesNotThrow(() => res = seg.segment({k: 'v'}))
assertEquals("[", res.next().value.segment);
assertThrows(() => res = seg.segment(Symbol()), TypeError)
assertDoesNotThrow(() => segments = seg.segment(undefined))
assertEquals("undefined", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(null))
assertEquals("null", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(true))
assertEquals("true", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(false))
assertEquals("false", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(1234))
assertEquals("1234", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(3.1415926))
assertEquals("3.1415926", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(98765432109876543210987654321n))
assertEquals("98765432109876543210987654321", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment(["hello","world"]))
assertEquals("hello,world", segments.containing(0).input);
assertDoesNotThrow(() => segments = seg.segment({k: 'v'}))
assertEquals("[object Object]", segments.containing(0).input);
assertThrows(() => segments = seg.segment(Symbol()), TypeError)

View File

@ -516,8 +516,10 @@
'intl402/Locale/constructor-options-region-valid': [FAIL],
# http://crbug/v8/6891
'intl402/Segmenter/prototype/segment/segment-grapheme': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
'intl402/Segmenter/prototype/segment/segment-tostring': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=9818
'built-ins/AsyncFunction/proto-from-ctor-realm': [FAIL],

View File

@ -60,6 +60,7 @@ export const CATEGORIES = new Map([
'JS_RELATIVE_TIME_FORMAT_TYPE',
'JS_SEGMENT_ITERATOR_TYPE',
'JS_SEGMENTER_TYPE',
'JS_SEGMENTS_TYPE',
'JS_V8_BREAK_ITERATOR_TYPE',
'JS_MAP_KEY_ITERATOR_TYPE',
'JS_MAP_KEY_VALUE_ITERATOR_TYPE',

View File

@ -190,17 +190,18 @@ INSTANCE_TYPES = {
1078: "JS_RELATIVE_TIME_FORMAT_TYPE",
1079: "JS_SEGMENT_ITERATOR_TYPE",
1080: "JS_SEGMENTER_TYPE",
1081: "JS_STRING_ITERATOR_TYPE",
1082: "JS_V8_BREAK_ITERATOR_TYPE",
1083: "JS_WEAK_REF_TYPE",
1084: "WASM_EXCEPTION_OBJECT_TYPE",
1085: "WASM_GLOBAL_OBJECT_TYPE",
1086: "WASM_INSTANCE_OBJECT_TYPE",
1087: "WASM_MEMORY_OBJECT_TYPE",
1088: "WASM_MODULE_OBJECT_TYPE",
1089: "WASM_TABLE_OBJECT_TYPE",
1090: "JS_BOUND_FUNCTION_TYPE",
1091: "JS_FUNCTION_TYPE",
1081: "JS_SEGMENTS_TYPE",
1082: "JS_STRING_ITERATOR_TYPE",
1083: "JS_V8_BREAK_ITERATOR_TYPE",
1084: "JS_WEAK_REF_TYPE",
1085: "WASM_EXCEPTION_OBJECT_TYPE",
1086: "WASM_GLOBAL_OBJECT_TYPE",
1087: "WASM_INSTANCE_OBJECT_TYPE",
1088: "WASM_MEMORY_OBJECT_TYPE",
1089: "WASM_MODULE_OBJECT_TYPE",
1090: "WASM_TABLE_OBJECT_TYPE",
1091: "JS_BOUND_FUNCTION_TYPE",
1092: "JS_FUNCTION_TYPE",
}
# List of known V8 maps.
@ -297,65 +298,65 @@ KNOWN_MAPS = {
("read_only_space", 0x0319d): (96, "EnumCacheMap"),
("read_only_space", 0x031ed): (87, "ArrayBoilerplateDescriptionMap"),
("read_only_space", 0x032d9): (99, "InterceptorInfoMap"),
("read_only_space", 0x0538d): (72, "PromiseFulfillReactionJobTaskMap"),
("read_only_space", 0x053b5): (73, "PromiseRejectReactionJobTaskMap"),
("read_only_space", 0x053dd): (74, "CallableTaskMap"),
("read_only_space", 0x05405): (75, "CallbackTaskMap"),
("read_only_space", 0x0542d): (76, "PromiseResolveThenableJobTaskMap"),
("read_only_space", 0x05455): (79, "FunctionTemplateInfoMap"),
("read_only_space", 0x0547d): (80, "ObjectTemplateInfoMap"),
("read_only_space", 0x054a5): (81, "AccessCheckInfoMap"),
("read_only_space", 0x054cd): (82, "AccessorInfoMap"),
("read_only_space", 0x054f5): (83, "AccessorPairMap"),
("read_only_space", 0x0551d): (84, "AliasedArgumentsEntryMap"),
("read_only_space", 0x05545): (85, "AllocationMementoMap"),
("read_only_space", 0x0556d): (88, "AsmWasmDataMap"),
("read_only_space", 0x05595): (89, "AsyncGeneratorRequestMap"),
("read_only_space", 0x055bd): (90, "BreakPointMap"),
("read_only_space", 0x055e5): (91, "BreakPointInfoMap"),
("read_only_space", 0x0560d): (92, "CachedTemplateObjectMap"),
("read_only_space", 0x05635): (94, "ClassPositionsMap"),
("read_only_space", 0x0565d): (95, "DebugInfoMap"),
("read_only_space", 0x05685): (98, "FunctionTemplateRareDataMap"),
("read_only_space", 0x056ad): (100, "InterpreterDataMap"),
("read_only_space", 0x056d5): (101, "PromiseCapabilityMap"),
("read_only_space", 0x056fd): (102, "PromiseReactionMap"),
("read_only_space", 0x05725): (103, "PropertyDescriptorObjectMap"),
("read_only_space", 0x0574d): (104, "PrototypeInfoMap"),
("read_only_space", 0x05775): (105, "ScriptMap"),
("read_only_space", 0x0579d): (106, "SourceTextModuleInfoEntryMap"),
("read_only_space", 0x057c5): (107, "StackFrameInfoMap"),
("read_only_space", 0x057ed): (108, "StackTraceFrameMap"),
("read_only_space", 0x05815): (109, "TemplateObjectDescriptionMap"),
("read_only_space", 0x0583d): (110, "Tuple2Map"),
("read_only_space", 0x05865): (111, "WasmCapiFunctionDataMap"),
("read_only_space", 0x0588d): (112, "WasmExceptionTagMap"),
("read_only_space", 0x058b5): (113, "WasmExportedFunctionDataMap"),
("read_only_space", 0x058dd): (114, "WasmIndirectFunctionTableMap"),
("read_only_space", 0x05905): (115, "WasmJSFunctionDataMap"),
("read_only_space", 0x0592d): (116, "WasmValueMap"),
("read_only_space", 0x05955): (136, "SloppyArgumentsElementsMap"),
("read_only_space", 0x0597d): (172, "OnHeapBasicBlockProfilerDataMap"),
("read_only_space", 0x059a5): (169, "InternalClassMap"),
("read_only_space", 0x059cd): (178, "SmiPairMap"),
("read_only_space", 0x059f5): (177, "SmiBoxMap"),
("read_only_space", 0x05a1d): (147, "ExportedSubClassBaseMap"),
("read_only_space", 0x05a45): (148, "ExportedSubClassMap"),
("read_only_space", 0x05a6d): (68, "AbstractInternalClassSubclass1Map"),
("read_only_space", 0x05a95): (69, "AbstractInternalClassSubclass2Map"),
("read_only_space", 0x05abd): (135, "InternalClassWithSmiElementsMap"),
("read_only_space", 0x05ae5): (170, "InternalClassWithStructElementsMap"),
("read_only_space", 0x05b0d): (149, "ExportedSubClass2Map"),
("read_only_space", 0x05b35): (179, "SortStateMap"),
("read_only_space", 0x05b5d): (86, "AllocationSiteWithWeakNextMap"),
("read_only_space", 0x05b85): (86, "AllocationSiteWithoutWeakNextMap"),
("read_only_space", 0x05bad): (77, "LoadHandler1Map"),
("read_only_space", 0x05bd5): (77, "LoadHandler2Map"),
("read_only_space", 0x05bfd): (77, "LoadHandler3Map"),
("read_only_space", 0x05c25): (78, "StoreHandler0Map"),
("read_only_space", 0x05c4d): (78, "StoreHandler1Map"),
("read_only_space", 0x05c75): (78, "StoreHandler2Map"),
("read_only_space", 0x05c9d): (78, "StoreHandler3Map"),
("read_only_space", 0x053b9): (72, "PromiseFulfillReactionJobTaskMap"),
("read_only_space", 0x053e1): (73, "PromiseRejectReactionJobTaskMap"),
("read_only_space", 0x05409): (74, "CallableTaskMap"),
("read_only_space", 0x05431): (75, "CallbackTaskMap"),
("read_only_space", 0x05459): (76, "PromiseResolveThenableJobTaskMap"),
("read_only_space", 0x05481): (79, "FunctionTemplateInfoMap"),
("read_only_space", 0x054a9): (80, "ObjectTemplateInfoMap"),
("read_only_space", 0x054d1): (81, "AccessCheckInfoMap"),
("read_only_space", 0x054f9): (82, "AccessorInfoMap"),
("read_only_space", 0x05521): (83, "AccessorPairMap"),
("read_only_space", 0x05549): (84, "AliasedArgumentsEntryMap"),
("read_only_space", 0x05571): (85, "AllocationMementoMap"),
("read_only_space", 0x05599): (88, "AsmWasmDataMap"),
("read_only_space", 0x055c1): (89, "AsyncGeneratorRequestMap"),
("read_only_space", 0x055e9): (90, "BreakPointMap"),
("read_only_space", 0x05611): (91, "BreakPointInfoMap"),
("read_only_space", 0x05639): (92, "CachedTemplateObjectMap"),
("read_only_space", 0x05661): (94, "ClassPositionsMap"),
("read_only_space", 0x05689): (95, "DebugInfoMap"),
("read_only_space", 0x056b1): (98, "FunctionTemplateRareDataMap"),
("read_only_space", 0x056d9): (100, "InterpreterDataMap"),
("read_only_space", 0x05701): (101, "PromiseCapabilityMap"),
("read_only_space", 0x05729): (102, "PromiseReactionMap"),
("read_only_space", 0x05751): (103, "PropertyDescriptorObjectMap"),
("read_only_space", 0x05779): (104, "PrototypeInfoMap"),
("read_only_space", 0x057a1): (105, "ScriptMap"),
("read_only_space", 0x057c9): (106, "SourceTextModuleInfoEntryMap"),
("read_only_space", 0x057f1): (107, "StackFrameInfoMap"),
("read_only_space", 0x05819): (108, "StackTraceFrameMap"),
("read_only_space", 0x05841): (109, "TemplateObjectDescriptionMap"),
("read_only_space", 0x05869): (110, "Tuple2Map"),
("read_only_space", 0x05891): (111, "WasmCapiFunctionDataMap"),
("read_only_space", 0x058b9): (112, "WasmExceptionTagMap"),
("read_only_space", 0x058e1): (113, "WasmExportedFunctionDataMap"),
("read_only_space", 0x05909): (114, "WasmIndirectFunctionTableMap"),
("read_only_space", 0x05931): (115, "WasmJSFunctionDataMap"),
("read_only_space", 0x05959): (116, "WasmValueMap"),
("read_only_space", 0x05981): (136, "SloppyArgumentsElementsMap"),
("read_only_space", 0x059a9): (172, "OnHeapBasicBlockProfilerDataMap"),
("read_only_space", 0x059d1): (169, "InternalClassMap"),
("read_only_space", 0x059f9): (178, "SmiPairMap"),
("read_only_space", 0x05a21): (177, "SmiBoxMap"),
("read_only_space", 0x05a49): (147, "ExportedSubClassBaseMap"),
("read_only_space", 0x05a71): (148, "ExportedSubClassMap"),
("read_only_space", 0x05a99): (68, "AbstractInternalClassSubclass1Map"),
("read_only_space", 0x05ac1): (69, "AbstractInternalClassSubclass2Map"),
("read_only_space", 0x05ae9): (135, "InternalClassWithSmiElementsMap"),
("read_only_space", 0x05b11): (170, "InternalClassWithStructElementsMap"),
("read_only_space", 0x05b39): (149, "ExportedSubClass2Map"),
("read_only_space", 0x05b61): (179, "SortStateMap"),
("read_only_space", 0x05b89): (86, "AllocationSiteWithWeakNextMap"),
("read_only_space", 0x05bb1): (86, "AllocationSiteWithoutWeakNextMap"),
("read_only_space", 0x05bd9): (77, "LoadHandler1Map"),
("read_only_space", 0x05c01): (77, "LoadHandler2Map"),
("read_only_space", 0x05c29): (77, "LoadHandler3Map"),
("read_only_space", 0x05c51): (78, "StoreHandler0Map"),
("read_only_space", 0x05c79): (78, "StoreHandler1Map"),
("read_only_space", 0x05ca1): (78, "StoreHandler2Map"),
("read_only_space", 0x05cc9): (78, "StoreHandler3Map"),
("map_space", 0x0211d): (1057, "ExternalMap"),
("map_space", 0x02145): (1072, "JSMessageObjectMap"),
("map_space", 0x0216d): (181, "WasmRttEqrefMap"),