From cc494cd3f96f92aff60ace1af9a3ffd3f9f22051 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Mon, 17 Dec 2018 14:11:14 -0800 Subject: [PATCH] [Intl] Sync Intl.Segmenter w/ latest spec Change the initial value of breakType to undefined Store break type into bits Change the algorithm Bug: v8:6891 Change-Id: Id2cc1e90c28d92364318928fc8a377f172ebb339 Reviewed-on: https://chromium-review.googlesource.com/c/1374996 Reviewed-by: Sathya Gunasekaran Commit-Queue: Frank Tang Cr-Commit-Position: refs/heads/master@{#58298} --- src/objects/js-segment-iterator-inl.h | 3 + src/objects/js-segment-iterator.cc | 73 ++++++++++++------- src/objects/js-segment-iterator.h | 7 +- .../segmenter/segment-iterator-following.js | 10 ++- .../segmenter/segment-iterator-preceding.js | 6 +- test/intl/segmenter/segment-iterator.js | 6 +- test/intl/segmenter/segment-line.js | 2 +- test/intl/segmenter/segment-sentence.js | 2 +- test/intl/segmenter/segment-word.js | 2 +- test/test262/test262.status | 12 ++- 10 files changed, 77 insertions(+), 46 deletions(-) diff --git a/src/objects/js-segment-iterator-inl.h b/src/objects/js-segment-iterator-inl.h index 4c7875af22..c7c7e01b01 100644 --- a/src/objects/js-segment-iterator-inl.h +++ b/src/objects/js-segment-iterator-inl.h @@ -26,6 +26,9 @@ ACCESSORS2(JSSegmentIterator, icu_break_iterator, Managed, ACCESSORS2(JSSegmentIterator, unicode_string, Managed, kUnicodeStringOffset) +BIT_FIELD_ACCESSORS(JSSegmentIterator, flags, is_break_type_set, + JSSegmentIterator::BreakTypeSetBits) + SMI_ACCESSORS(JSSegmentIterator, flags, kFlagsOffset) CAST_ACCESSOR2(JSSegmentIterator); diff --git a/src/objects/js-segment-iterator.cc b/src/objects/js-segment-iterator.cc index 17c06d5637..0ad71ac768 100644 --- a/src/objects/js-segment-iterator.cc +++ b/src/objects/js-segment-iterator.cc @@ -69,15 +69,19 @@ MaybeHandle JSSegmentIterator::Create( segment_iterator->set_unicode_string(unicode_string); // 4. Let iterator.[[SegmentIteratorPosition]] be 0. - // 5. Let iterator.[[SegmentIteratorBreakType]] be an implementation-dependent - // string representing a break at the edge of a string. - // step 4 and 5 are stored inside break_iterator. + // step 4 is stored inside break_iterator. + + // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined. + segment_iterator->set_is_break_type_set(false); return segment_iterator; } // ecma402 #sec-segment-iterator-prototype-breakType Handle JSSegmentIterator::BreakType() const { + if (!is_break_type_set()) { + return GetReadOnlyRoots().undefined_value_handle(); + } icu::BreakIterator* break_iterator = icu_break_iterator()->raw(); int32_t rule_status = break_iterator->getRuleStatus(); switch (granularity()) { @@ -153,6 +157,7 @@ MaybeHandle JSSegmentIterator::Next( int32_t prev = icu_break_iterator->current(); // 4. Let done be AdvanceSegmentIterator(iterator, forwards). int32_t position = icu_break_iterator->next(); + segment_iterator->set_is_break_type_set(true); if (position == icu::BreakIterator::DONE) { // 5. If done is true, return CreateIterResultObject(undefined, true). return factory->NewJSIteratorResult(isolate->factory()->undefined_value(), @@ -206,19 +211,25 @@ Maybe JSSegmentIterator::Following( if (!from_obj->IsUndefined()) { // a. Let from be ? ToIndex(from). uint32_t from; - if (!from_obj->ToArrayIndex(&from)) { - THROW_NEW_ERROR_RETURN_VALUE( - isolate, - NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, - factory->NewStringFromStaticChars("from"), - factory->NewStringFromStaticChars("following"), - from_obj), - Nothing()); - } - // b. If from ≥ iterator.[[SegmentIteratorString]], throw a RangeError - // exception. - // c. Let iterator.[[SegmentIteratorPosition]] be from. - if (icu_break_iterator->following(from) == icu::BreakIterator::DONE) { + Handle index; + ASSIGN_RETURN_ON_EXCEPTION_VALUE( + isolate, index, + Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex), + Nothing()); + if (!index->ToArrayIndex(&from)) { + THROW_NEW_ERROR_RETURN_VALUE( + isolate, + NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, + factory->NewStringFromStaticChars("from"), + factory->NewStringFromStaticChars("following"), index), + Nothing()); + } + // b. Let length be the length of iterator.[[SegmentIteratorString]]. + uint32_t length = + static_cast(icu_break_iterator->getText().getLength()); + + // c. If from ≥ length, throw a RangeError exception. + if (from >= length) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, @@ -227,12 +238,17 @@ Maybe JSSegmentIterator::Following( from_obj), Nothing()); } + + // d. Let iterator.[[SegmentIteratorPosition]] be from. + segment_iterator->set_is_break_type_set(true); + icu_break_iterator->following(from); return Just(false); } // 4. return AdvanceSegmentIterator(iterator, forward). // 4. .... or if direction is backwards and position is 0, return true. // 4. If direction is forwards and position is the length of string ... return // true. + segment_iterator->set_is_break_type_set(true); return Just(icu_break_iterator->next() == icu::BreakIterator::DONE); } @@ -247,22 +263,25 @@ Maybe JSSegmentIterator::Preceding( if (!from_obj->IsUndefined()) { // a. Let from be ? ToIndex(from). uint32_t from; - if (!from_obj->ToArrayIndex(&from)) { + Handle index; + ASSIGN_RETURN_ON_EXCEPTION_VALUE( + isolate, index, + Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex), + Nothing()); + + if (!index->ToArrayIndex(&from)) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, factory->NewStringFromStaticChars("from"), - factory->NewStringFromStaticChars("following"), - from_obj), + factory->NewStringFromStaticChars("preceding"), index), Nothing()); } - // b. If from > iterator.[[SegmentIteratorString]] or from = 0, throw a - // RangeError exception. - // c. Let iterator.[[SegmentIteratorPosition]] be from. - uint32_t text_len = + // b. Let length be the length of iterator.[[SegmentIteratorString]]. + uint32_t length = static_cast(icu_break_iterator->getText().getLength()); - if (from > text_len || - icu_break_iterator->preceding(from) == icu::BreakIterator::DONE) { + // c. If from > length or from = 0, throw a RangeError exception. + if (from > length || from == 0) { THROW_NEW_ERROR_RETURN_VALUE( isolate, NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, @@ -271,10 +290,14 @@ Maybe JSSegmentIterator::Preceding( from_obj), Nothing()); } + // d. Let iterator.[[SegmentIteratorIndex]] be from. + segment_iterator->set_is_break_type_set(true); + icu_break_iterator->preceding(from); return Just(false); } // 4. return AdvanceSegmentIterator(iterator, backwards). // 4. .... or if direction is backwards and position is 0, return true. + segment_iterator->set_is_break_type_set(true); return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE); } diff --git a/src/objects/js-segment-iterator.h b/src/objects/js-segment-iterator.h index 5cea4f6cb7..9405db6ed5 100644 --- a/src/objects/js-segment-iterator.h +++ b/src/objects/js-segment-iterator.h @@ -54,6 +54,8 @@ class JSSegmentIterator : public JSObject { Handle GranularityAsString() const; + DECL_BOOLEAN_ACCESSORS(is_break_type_set) + // ecma402 #sec-segment-iterator-prototype-breakType Handle BreakType() const; @@ -74,8 +76,9 @@ class JSSegmentIterator : public JSObject { inline JSSegmenter::Granularity granularity() const; // Bit positions in |flags|. -#define FLAGS_BIT_FIELDS(V, _) \ - V(GranularityBits, JSSegmenter::Granularity, 3, _) +#define FLAGS_BIT_FIELDS(V, _) \ + V(GranularityBits, JSSegmenter::Granularity, 3, _) \ + V(BreakTypeSetBits, bool, 1, _) DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS) #undef FLAGS_BIT_FIELDS diff --git a/test/intl/segmenter/segment-iterator-following.js b/test/intl/segmenter/segment-iterator-following.js index c92cbaac35..14f6dd16c4 100644 --- a/test/intl/segmenter/segment-iterator-following.js +++ b/test/intl/segmenter/segment-iterator-following.js @@ -10,9 +10,13 @@ const iter = segmenter.segment(text); assertEquals("function", typeof iter.following); -assertThrows(() => iter.following("ABC"), RangeError); -assertThrows(() => iter.following(null), RangeError); -assertThrows(() => iter.following(1.4), RangeError); +// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0 +assertDoesNotThrow(() => iter.following("ABC")); +// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0 +assertDoesNotThrow(() => iter.following(null)); +// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1 +assertDoesNotThrow(() => iter.following(1.4)); + assertThrows(() => iter.following(-3), RangeError); // 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] ) diff --git a/test/intl/segmenter/segment-iterator-preceding.js b/test/intl/segmenter/segment-iterator-preceding.js index 0130a4d229..09ba2847cc 100644 --- a/test/intl/segmenter/segment-iterator-preceding.js +++ b/test/intl/segmenter/segment-iterator-preceding.js @@ -10,11 +10,15 @@ const iter = segmenter.segment(text); assertEquals("function", typeof iter.preceding); +// ToNumber("ABC") return NaN, ToInteger("ABC") return +0, ToIndex("ABC") return 0 assertThrows(() => iter.preceding("ABC"), RangeError); +// ToNumber(null) return +0, ToInteger(null) return +0, ToIndex(null) return 0 assertThrows(() => iter.preceding(null), RangeError); -assertThrows(() => iter.preceding(1.4), RangeError); assertThrows(() => iter.preceding(-3), RangeError); +// ToNumber(1.4) return 1.4, ToInteger(1.4) return 1, ToIndex(1.4) return 1 +assertDoesNotThrow(() => iter.preceding(1.4)); + // 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] ) // 3.b If ... from = 0, throw a RangeError exception. assertThrows(() => iter.preceding(0), RangeError); diff --git a/test/intl/segmenter/segment-iterator.js b/test/intl/segmenter/segment-iterator.js index 2821c8faaa..816de6b0bc 100644 --- a/test/intl/segmenter/segment-iterator.js +++ b/test/intl/segmenter/segment-iterator.js @@ -11,9 +11,5 @@ for (const granularity of ["grapheme", "word", "sentence", "line"]) { assertEquals("number", typeof iter.position); assertEquals(0, iter.position); - if (granularity === "grapheme") { - assertEquals(undefined, iter.breakType); - } else { - assertEquals("string", typeof iter.breakType); - } + assertEquals(undefined, iter.breakType); } diff --git a/test/intl/segmenter/segment-line.js b/test/intl/segmenter/segment-line.js index cbaa44356f..109122d9e6 100644 --- a/test/intl/segmenter/segment-line.js +++ b/test/intl/segmenter/segment-line.js @@ -24,6 +24,6 @@ for (const text of [ "법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean ]) { const iter = seg.segment(text); - assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType); + assertEquals(undefined, iter.breakType); assertEquals(0, iter.position); } diff --git a/test/intl/segmenter/segment-sentence.js b/test/intl/segmenter/segment-sentence.js index b258d61998..e7942f5da8 100644 --- a/test/intl/segmenter/segment-sentence.js +++ b/test/intl/segmenter/segment-sentence.js @@ -24,6 +24,6 @@ for (const text of [ "법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean ]) { const iter = seg.segment(text); - assertTrue(["sep", "term"].includes(iter.breakType), iter.breakType); + assertEquals(undefined, iter.breakType); assertEquals(0, iter.position); } diff --git a/test/intl/segmenter/segment-word.js b/test/intl/segmenter/segment-word.js index 4c74fbee3c..21820a4be3 100644 --- a/test/intl/segmenter/segment-word.js +++ b/test/intl/segmenter/segment-word.js @@ -24,6 +24,6 @@ for (const text of [ "법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean ]) { const iter = seg.segment(text); - assertTrue(["word", "none"].includes(iter.breakType), iter.breakType); + assertEquals(undefined, iter.breakType); assertEquals(0, iter.position); } diff --git a/test/test262/test262.status b/test/test262/test262.status index 599abe2ee9..29953e262a 100644 --- a/test/test262/test262.status +++ b/test/test262/test262.status @@ -661,31 +661,29 @@ 'language/expressions/await/for-await-of-interleaved': ['--harmony-await-optimization'], 'language/expressions/await/async-await-interleaved': ['--harmony-await-optimization'], - # https://bugs.chromium.org/p/v8/issues/detail?id=6891 + # https://bugs.chromium.org/p/v8/issues/detail?id=8588 'intl402/Segmenter/iterator/following': [FAIL], 'intl402/Segmenter/iterator/granularity': [FAIL], - 'intl402/Segmenter/iterator/preceding': [FAIL], - 'intl402/Segmenter/prototype/segment/segment-line': [FAIL], - 'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL], - 'intl402/Segmenter/prototype/segment/segment-word': [FAIL], - - # https://bugs.chromium.org/p/v8/issues/detail?id=8588 'intl402/Segmenter/iterator/position': [FAIL], + 'intl402/Segmenter/iterator/preceding': [FAIL], 'intl402/Segmenter/iterator/prototype': [FAIL], 'intl402/Segmenter/prototype/segment/segment-grapheme': [FAIL], 'intl402/Segmenter/prototype/segment/segment-grapheme-following': [FAIL], 'intl402/Segmenter/prototype/segment/segment-grapheme-iterable': [FAIL], 'intl402/Segmenter/prototype/segment/segment-grapheme-next': [FAIL], 'intl402/Segmenter/prototype/segment/segment-grapheme-preceding': [FAIL], + 'intl402/Segmenter/prototype/segment/segment-line': [FAIL], 'intl402/Segmenter/prototype/segment/segment-line-following': [FAIL], 'intl402/Segmenter/prototype/segment/segment-line-iterable': [FAIL], 'intl402/Segmenter/prototype/segment/segment-line-next': [FAIL], 'intl402/Segmenter/prototype/segment/segment-line-preceding': [FAIL], + 'intl402/Segmenter/prototype/segment/segment-sentence': [FAIL], 'intl402/Segmenter/prototype/segment/segment-sentence-following': [FAIL], 'intl402/Segmenter/prototype/segment/segment-sentence-iterable': [FAIL], 'intl402/Segmenter/prototype/segment/segment-sentence-next': [FAIL], 'intl402/Segmenter/prototype/segment/segment-sentence-preceding': [FAIL], 'intl402/Segmenter/prototype/segment/segment-tostring': [FAIL], + 'intl402/Segmenter/prototype/segment/segment-word': [FAIL], 'intl402/Segmenter/prototype/segment/segment-word-following': [FAIL], 'intl402/Segmenter/prototype/segment/segment-word-iterable': [FAIL], 'intl402/Segmenter/prototype/segment/segment-word-next': [FAIL],