[Intl] Sync Intl.Segmenter w/ latest spec
Change the initial value of breakType to undefined Store break type into bits Change the algorithm Bug: v8:6891 Change-Id: Id2cc1e90c28d92364318928fc8a377f172ebb339 Reviewed-on: https://chromium-review.googlesource.com/c/1374996 Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org> Commit-Queue: Frank Tang <ftang@chromium.org> Cr-Commit-Position: refs/heads/master@{#58298}
This commit is contained in:
parent
f97022cff7
commit
cc494cd3f9
@ -26,6 +26,9 @@ ACCESSORS2(JSSegmentIterator, icu_break_iterator, Managed<icu::BreakIterator>,
|
||||
ACCESSORS2(JSSegmentIterator, unicode_string, Managed<icu::UnicodeString>,
|
||||
kUnicodeStringOffset)
|
||||
|
||||
BIT_FIELD_ACCESSORS(JSSegmentIterator, flags, is_break_type_set,
|
||||
JSSegmentIterator::BreakTypeSetBits)
|
||||
|
||||
SMI_ACCESSORS(JSSegmentIterator, flags, kFlagsOffset)
|
||||
|
||||
CAST_ACCESSOR2(JSSegmentIterator);
|
||||
|
@ -69,15 +69,19 @@ MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
|
||||
segment_iterator->set_unicode_string(unicode_string);
|
||||
|
||||
// 4. Let iterator.[[SegmentIteratorPosition]] be 0.
|
||||
// 5. Let iterator.[[SegmentIteratorBreakType]] be an implementation-dependent
|
||||
// string representing a break at the edge of a string.
|
||||
// step 4 and 5 are stored inside break_iterator.
|
||||
// step 4 is stored inside break_iterator.
|
||||
|
||||
// 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
|
||||
segment_iterator->set_is_break_type_set(false);
|
||||
|
||||
return segment_iterator;
|
||||
}
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-breakType
|
||||
Handle<Object> JSSegmentIterator::BreakType() const {
|
||||
if (!is_break_type_set()) {
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
}
|
||||
icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
|
||||
int32_t rule_status = break_iterator->getRuleStatus();
|
||||
switch (granularity()) {
|
||||
@ -153,6 +157,7 @@ MaybeHandle<JSReceiver> JSSegmentIterator::Next(
|
||||
int32_t prev = icu_break_iterator->current();
|
||||
// 4. Let done be AdvanceSegmentIterator(iterator, forwards).
|
||||
int32_t position = icu_break_iterator->next();
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
if (position == icu::BreakIterator::DONE) {
|
||||
// 5. If done is true, return CreateIterResultObject(undefined, true).
|
||||
return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
|
||||
@ -206,19 +211,25 @@ Maybe<bool> JSSegmentIterator::Following(
|
||||
if (!from_obj->IsUndefined()) {
|
||||
// a. Let from be ? ToIndex(from).
|
||||
uint32_t from;
|
||||
if (!from_obj->ToArrayIndex(&from)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->NewStringFromStaticChars("from"),
|
||||
factory->NewStringFromStaticChars("following"),
|
||||
from_obj),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// b. If from ≥ iterator.[[SegmentIteratorString]], throw a RangeError
|
||||
// exception.
|
||||
// c. Let iterator.[[SegmentIteratorPosition]] be from.
|
||||
if (icu_break_iterator->following(from) == icu::BreakIterator::DONE) {
|
||||
Handle<Object> index;
|
||||
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
|
||||
isolate, index,
|
||||
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
|
||||
Nothing<bool>());
|
||||
if (!index->ToArrayIndex(&from)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->NewStringFromStaticChars("from"),
|
||||
factory->NewStringFromStaticChars("following"), index),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// b. Let length be the length of iterator.[[SegmentIteratorString]].
|
||||
uint32_t length =
|
||||
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
|
||||
|
||||
// c. If from ≥ length, throw a RangeError exception.
|
||||
if (from >= length) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
@ -227,12 +238,17 @@ Maybe<bool> JSSegmentIterator::Following(
|
||||
from_obj),
|
||||
Nothing<bool>());
|
||||
}
|
||||
|
||||
// d. Let iterator.[[SegmentIteratorPosition]] be from.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
icu_break_iterator->following(from);
|
||||
return Just(false);
|
||||
}
|
||||
// 4. return AdvanceSegmentIterator(iterator, forward).
|
||||
// 4. .... or if direction is backwards and position is 0, return true.
|
||||
// 4. If direction is forwards and position is the length of string ... return
|
||||
// true.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
|
||||
}
|
||||
|
||||
@ -247,22 +263,25 @@ Maybe<bool> JSSegmentIterator::Preceding(
|
||||
if (!from_obj->IsUndefined()) {
|
||||
// a. Let from be ? ToIndex(from).
|
||||
uint32_t from;
|
||||
if (!from_obj->ToArrayIndex(&from)) {
|
||||
Handle<Object> index;
|
||||
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
|
||||
isolate, index,
|
||||
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
|
||||
Nothing<bool>());
|
||||
|
||||
if (!index->ToArrayIndex(&from)) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
factory->NewStringFromStaticChars("from"),
|
||||
factory->NewStringFromStaticChars("following"),
|
||||
from_obj),
|
||||
factory->NewStringFromStaticChars("preceding"), index),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// b. If from > iterator.[[SegmentIteratorString]] or from = 0, throw a
|
||||
// RangeError exception.
|
||||
// c. Let iterator.[[SegmentIteratorPosition]] be from.
|
||||
uint32_t text_len =
|
||||
// b. Let length be the length of iterator.[[SegmentIteratorString]].
|
||||
uint32_t length =
|
||||
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
|
||||
if (from > text_len ||
|
||||
icu_break_iterator->preceding(from) == icu::BreakIterator::DONE) {
|
||||
// c. If from > length or from = 0, throw a RangeError exception.
|
||||
if (from > length || from == 0) {
|
||||
THROW_NEW_ERROR_RETURN_VALUE(
|
||||
isolate,
|
||||
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
|
||||
@ -271,10 +290,14 @@ Maybe<bool> JSSegmentIterator::Preceding(
|
||||
from_obj),
|
||||
Nothing<bool>());
|
||||
}
|
||||
// d. Let iterator.[[SegmentIteratorIndex]] be from.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
icu_break_iterator->preceding(from);
|
||||
return Just(false);
|
||||
}
|
||||
// 4. return AdvanceSegmentIterator(iterator, backwards).
|
||||
// 4. .... or if direction is backwards and position is 0, return true.
|
||||
segment_iterator->set_is_break_type_set(true);
|
||||
return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,8 @@ class JSSegmentIterator : public JSObject {
|
||||
|
||||
Handle<String> GranularityAsString() const;
|
||||
|
||||
DECL_BOOLEAN_ACCESSORS(is_break_type_set)
|
||||
|
||||
// ecma402 #sec-segment-iterator-prototype-breakType
|
||||
Handle<Object> BreakType() const;
|
||||
|
||||
@ -74,8 +76,9 @@ class JSSegmentIterator : public JSObject {
|
||||
inline JSSegmenter::Granularity granularity() const;
|
||||
|
||||
// Bit positions in |flags|.
|
||||
#define FLAGS_BIT_FIELDS(V, _) \
|
||||
V(GranularityBits, JSSegmenter::Granularity, 3, _)
|
||||
#define FLAGS_BIT_FIELDS(V, _) \
|
||||
V(GranularityBits, JSSegmenter::Granularity, 3, _) \
|
||||
V(BreakTypeSetBits, bool, 1, _)
|
||||
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
|
||||
#undef FLAGS_BIT_FIELDS
|
||||
|
||||
|
@ -10,9 +10,13 @@ const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("function", typeof iter.following);
|
||||
|
||||
assertThrows(() => iter.following("ABC"), RangeError);
|
||||
assertThrows(() => iter.following(null), RangeError);
|
||||
assertThrows(() => iter.following(1.4), RangeError);
|
||||
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
|
||||
assertDoesNotThrow(() => iter.following("ABC"));
|
||||
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
|
||||
assertDoesNotThrow(() => iter.following(null));
|
||||
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
|
||||
assertDoesNotThrow(() => iter.following(1.4));
|
||||
|
||||
assertThrows(() => iter.following(-3), RangeError);
|
||||
|
||||
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )
|
||||
|
@ -10,11 +10,15 @@ const iter = segmenter.segment(text);
|
||||
|
||||
assertEquals("function", typeof iter.preceding);
|
||||
|
||||
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
|
||||
assertThrows(() => iter.preceding("ABC"), RangeError);
|
||||
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
|
||||
assertThrows(() => iter.preceding(null), RangeError);
|
||||
assertThrows(() => iter.preceding(1.4), RangeError);
|
||||
assertThrows(() => iter.preceding(-3), RangeError);
|
||||
|
||||
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
|
||||
assertDoesNotThrow(() => iter.preceding(1.4));
|
||||
|
||||
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
|
||||
// 3.b If ... from = 0, throw a RangeError exception.
|
||||
assertThrows(() => iter.preceding(0), RangeError);
|
||||
|
@ -11,9 +11,5 @@ for (const granularity of ["grapheme", "word", "sentence", "line"]) {
|
||||
|
||||
assertEquals("number", typeof iter.position);
|
||||
assertEquals(0, iter.position);
|
||||
if (granularity === "grapheme") {
|
||||
assertEquals(undefined, iter.breakType);
|
||||
} else {
|
||||
assertEquals("string", typeof iter.breakType);
|
||||
}
|
||||
assertEquals(undefined, iter.breakType);
|
||||
}
|
||||
|
@ -24,6 +24,6 @@ for (const text of [
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.position);
|
||||
}
|
||||
|
@ -24,6 +24,6 @@ for (const text of [
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.position);
|
||||
}
|
||||
|
@ -24,6 +24,6 @@ for (const text of [
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.position);
|
||||
}
|
||||
|
@ -661,31 +661,29 @@
|
||||
'language/expressions/await/for-await-of-interleaved': ['--harmony-await-optimization'],
|
||||
'language/expressions/await/async-await-interleaved': ['--harmony-await-optimization'],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=6891
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=8588
|
||||
'intl402/Segmenter/iterator/following': [FAIL],
|
||||
'intl402/Segmenter/iterator/granularity': [FAIL],
|
||||
'intl402/Segmenter/iterator/preceding': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=8588
|
||||
'intl402/Segmenter/iterator/position': [FAIL],
|
||||
'intl402/Segmenter/iterator/preceding': [FAIL],
|
||||
'intl402/Segmenter/iterator/prototype': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme-following': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme-next': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-grapheme-preceding': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-following': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-next': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-preceding': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence-following': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence-next': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-sentence-preceding': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-tostring': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word-following': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-word-next': [FAIL],
|
||||
|
Loading…
Reference in New Issue
Block a user