[Intl] Sync Intl.Segmenter w/ latest spec

Change the initial value of breakType to undefined
Store break type into bits
Change the algorithm

Bug: v8:6891
Change-Id: Id2cc1e90c28d92364318928fc8a377f172ebb339
Reviewed-on: https://chromium-review.googlesource.com/c/1374996
Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org>
Commit-Queue: Frank Tang <ftang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#58298}
This commit is contained in:
Frank Tang 2018-12-17 14:11:14 -08:00 committed by Commit Bot
parent f97022cff7
commit cc494cd3f9
10 changed files with 77 additions and 46 deletions

View File

@ -26,6 +26,9 @@ ACCESSORS2(JSSegmentIterator, icu_break_iterator, Managed<icu::BreakIterator>,
ACCESSORS2(JSSegmentIterator, unicode_string, Managed<icu::UnicodeString>,
kUnicodeStringOffset)
BIT_FIELD_ACCESSORS(JSSegmentIterator, flags, is_break_type_set,
JSSegmentIterator::BreakTypeSetBits)
SMI_ACCESSORS(JSSegmentIterator, flags, kFlagsOffset)
CAST_ACCESSOR2(JSSegmentIterator);

View File

@ -69,15 +69,19 @@ MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
segment_iterator->set_unicode_string(unicode_string);
// 4. Let iterator.[[SegmentIteratorPosition]] be 0.
// 5. Let iterator.[[SegmentIteratorBreakType]] be an implementation-dependent
// string representing a break at the edge of a string.
// step 4 and 5 are stored inside break_iterator.
// step 4 is stored inside break_iterator.
// 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
segment_iterator->set_is_break_type_set(false);
return segment_iterator;
}
// ecma402 #sec-segment-iterator-prototype-breakType
Handle<Object> JSSegmentIterator::BreakType() const {
if (!is_break_type_set()) {
return GetReadOnlyRoots().undefined_value_handle();
}
icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
int32_t rule_status = break_iterator->getRuleStatus();
switch (granularity()) {
@ -153,6 +157,7 @@ MaybeHandle<JSReceiver> JSSegmentIterator::Next(
int32_t prev = icu_break_iterator->current();
// 4. Let done be AdvanceSegmentIterator(iterator, forwards).
int32_t position = icu_break_iterator->next();
segment_iterator->set_is_break_type_set(true);
if (position == icu::BreakIterator::DONE) {
// 5. If done is true, return CreateIterResultObject(undefined, true).
return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
@ -206,19 +211,25 @@ Maybe<bool> JSSegmentIterator::Following(
if (!from_obj->IsUndefined()) {
// a. Let from be ? ToIndex(from).
uint32_t from;
if (!from_obj->ToArrayIndex(&from)) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->NewStringFromStaticChars("from"),
factory->NewStringFromStaticChars("following"),
from_obj),
Nothing<bool>());
}
// b. If from ≥ iterator.[[SegmentIteratorString]], throw a RangeError
// exception.
// c. Let iterator.[[SegmentIteratorPosition]] be from.
if (icu_break_iterator->following(from) == icu::BreakIterator::DONE) {
Handle<Object> index;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, index,
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
Nothing<bool>());
if (!index->ToArrayIndex(&from)) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->NewStringFromStaticChars("from"),
factory->NewStringFromStaticChars("following"), index),
Nothing<bool>());
}
// b. Let length be the length of iterator.[[SegmentIteratorString]].
uint32_t length =
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
// c. If from ≥ length, throw a RangeError exception.
if (from >= length) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
@ -227,12 +238,17 @@ Maybe<bool> JSSegmentIterator::Following(
from_obj),
Nothing<bool>());
}
// d. Let iterator.[[SegmentIteratorPosition]] be from.
segment_iterator->set_is_break_type_set(true);
icu_break_iterator->following(from);
return Just(false);
}
// 4. return AdvanceSegmentIterator(iterator, forward).
// 4. .... or if direction is backwards and position is 0, return true.
// 4. If direction is forwards and position is the length of string ... return
// true.
segment_iterator->set_is_break_type_set(true);
return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
}
@ -247,22 +263,25 @@ Maybe<bool> JSSegmentIterator::Preceding(
if (!from_obj->IsUndefined()) {
// a. Let from be ? ToIndex(from).
uint32_t from;
if (!from_obj->ToArrayIndex(&from)) {
Handle<Object> index;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, index,
Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
Nothing<bool>());
if (!index->ToArrayIndex(&from)) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
factory->NewStringFromStaticChars("from"),
factory->NewStringFromStaticChars("following"),
from_obj),
factory->NewStringFromStaticChars("preceding"), index),
Nothing<bool>());
}
// b. If from > iterator.[[SegmentIteratorString]] or from = 0, throw a
// RangeError exception.
// c. Let iterator.[[SegmentIteratorPosition]] be from.
uint32_t text_len =
// b. Let length be the length of iterator.[[SegmentIteratorString]].
uint32_t length =
static_cast<uint32_t>(icu_break_iterator->getText().getLength());
if (from > text_len ||
icu_break_iterator->preceding(from) == icu::BreakIterator::DONE) {
// c. If from > length or from = 0, throw a RangeError exception.
if (from > length || from == 0) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
@ -271,10 +290,14 @@ Maybe<bool> JSSegmentIterator::Preceding(
from_obj),
Nothing<bool>());
}
// d. Let iterator.[[SegmentIteratorIndex]] be from.
segment_iterator->set_is_break_type_set(true);
icu_break_iterator->preceding(from);
return Just(false);
}
// 4. return AdvanceSegmentIterator(iterator, backwards).
// 4. .... or if direction is backwards and position is 0, return true.
segment_iterator->set_is_break_type_set(true);
return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
}

View File

@ -54,6 +54,8 @@ class JSSegmentIterator : public JSObject {
Handle<String> GranularityAsString() const;
DECL_BOOLEAN_ACCESSORS(is_break_type_set)
// ecma402 #sec-segment-iterator-prototype-breakType
Handle<Object> BreakType() const;
@ -74,8 +76,9 @@ class JSSegmentIterator : public JSObject {
inline JSSegmenter::Granularity granularity() const;
// Bit positions in |flags|.
#define FLAGS_BIT_FIELDS(V, _) \
V(GranularityBits, JSSegmenter::Granularity, 3, _)
#define FLAGS_BIT_FIELDS(V, _) \
V(GranularityBits, JSSegmenter::Granularity, 3, _) \
V(BreakTypeSetBits, bool, 1, _)
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
#undef FLAGS_BIT_FIELDS

View File

@ -10,9 +10,13 @@ const iter = segmenter.segment(text);
assertEquals("function", typeof iter.following);
assertThrows(() => iter.following("ABC"), RangeError);
assertThrows(() => iter.following(null), RangeError);
assertThrows(() => iter.following(1.4), RangeError);
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
assertDoesNotThrow(() => iter.following("ABC"));
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
assertDoesNotThrow(() => iter.following(null));
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
assertDoesNotThrow(() => iter.following(1.4));
assertThrows(() => iter.following(-3), RangeError);
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )

View File

@ -10,11 +10,15 @@ const iter = segmenter.segment(text);
assertEquals("function", typeof iter.preceding);
// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0
assertThrows(() => iter.preceding("ABC"), RangeError);
// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0
assertThrows(() => iter.preceding(null), RangeError);
assertThrows(() => iter.preceding(1.4), RangeError);
assertThrows(() => iter.preceding(-3), RangeError);
// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1
assertDoesNotThrow(() => iter.preceding(1.4));
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
// 3.b If ... from = 0, throw a RangeError exception.
assertThrows(() => iter.preceding(0), RangeError);

View File

@ -11,9 +11,5 @@ for (const granularity of ["grapheme", "word", "sentence", "line"]) {
assertEquals("number", typeof iter.position);
assertEquals(0, iter.position);
if (granularity === "grapheme") {
assertEquals(undefined, iter.breakType);
} else {
assertEquals("string", typeof iter.breakType);
}
assertEquals(undefined, iter.breakType);
}

View File

@ -24,6 +24,6 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.position);
}

View File

@ -24,6 +24,6 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.position);
}

View File

@ -24,6 +24,6 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertTrue(["word", "none"].includes(iter.breakType), iter.breakType);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.position);
}

View File

@ -661,31 +661,29 @@
'language/expressions/await/for-await-of-interleaved': ['--harmony-await-optimization'],
'language/expressions/await/async-await-interleaved': ['--harmony-await-optimization'],
# https://bugs.chromium.org/p/v8/issues/detail?id=6891
# https://bugs.chromium.org/p/v8/issues/detail?id=8588
'intl402/Segmenter/iterator/following': [FAIL],
'intl402/Segmenter/iterator/granularity': [FAIL],
'intl402/Segmenter/iterator/preceding': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=8588
'intl402/Segmenter/iterator/position': [FAIL],
'intl402/Segmenter/iterator/preceding': [FAIL],
'intl402/Segmenter/iterator/prototype': [FAIL],
'intl402/Segmenter/prototype/segment/segment-grapheme': [FAIL],
'intl402/Segmenter/prototype/segment/segment-grapheme-following': [FAIL],
'intl402/Segmenter/prototype/segment/segment-grapheme-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-grapheme-next': [FAIL],
'intl402/Segmenter/prototype/segment/segment-grapheme-preceding': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-following': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-next': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-preceding': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence-following': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence-next': [FAIL],
'intl402/Segmenter/prototype/segment/segment-sentence-preceding': [FAIL],
'intl402/Segmenter/prototype/segment/segment-tostring': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word-following': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-word-next': [FAIL],