[Intl] Remove linebreak from Segmenter
Sync w/ https://github.com/tc39/proposal-intl-segmenter/pull/60 Bug: v8:8717 Change-Id: I98fe9e88367a611c14c82195222c8fe8a52e4bc8 Reviewed-on: https://chromium-review.googlesource.com/c/1422749 Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org> Commit-Queue: Frank Tang <ftang@chromium.org> Cr-Commit-Position: refs/heads/master@{#59016}
This commit is contained in:
parent
437e134acc
commit
45fe356e24
@ -23,7 +23,6 @@
|
||||
V(_, h12_string, "h12") \
|
||||
V(_, h23_string, "h23") \
|
||||
V(_, h24_string, "h24") \
|
||||
V(_, hard_string, "hard") \
|
||||
V(_, hour_string, "hour") \
|
||||
V(_, hour12_string, "hour12") \
|
||||
V(_, hourCycle_string, "hourCycle") \
|
||||
@ -36,7 +35,6 @@
|
||||
V(_, integer_string, "integer") \
|
||||
V(_, kana_string, "kana") \
|
||||
V(_, letter_string, "letter") \
|
||||
V(_, lineBreakStyle_string, "lineBreakStyle") \
|
||||
V(_, list_string, "list") \
|
||||
V(_, literal_string, "literal") \
|
||||
V(_, locale_string, "locale") \
|
||||
@ -64,7 +62,6 @@
|
||||
V(_, SegmentIterator_string, "Segment Iterator") \
|
||||
V(_, sensitivity_string, "sensitivity") \
|
||||
V(_, sep_string, "sep") \
|
||||
V(_, soft_string, "soft") \
|
||||
V(_, strict_string, "strict") \
|
||||
V(_, style_string, "style") \
|
||||
V(_, term_string, "term") \
|
||||
|
@ -2103,7 +2103,6 @@ void JSSegmenter::JSSegmenterPrint(std::ostream& os) { // NOLINT
|
||||
JSObjectPrintHeader(os, *this, "JSSegmenter");
|
||||
os << "\n - locale: " << Brief(locale());
|
||||
os << "\n - granularity: " << GranularityAsString();
|
||||
os << "\n - lineBreakStyle: " << LineBreakStyleAsString();
|
||||
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
|
||||
JSObjectPrintBody(os, *this);
|
||||
}
|
||||
|
@ -37,8 +37,6 @@ Handle<String> JSSegmentIterator::GranularityAsString() const {
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
case JSSegmenter::Granularity::SENTENCE:
|
||||
return GetReadOnlyRoots().sentence_string_handle();
|
||||
case JSSegmenter::Granularity::LINE:
|
||||
return GetReadOnlyRoots().line_string_handle();
|
||||
case JSSegmenter::Granularity::COUNT:
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -106,18 +104,6 @@ Handle<Object> JSSegmentIterator::BreakType() const {
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
}
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
case JSSegmenter::Granularity::LINE:
|
||||
if (rule_status >= UBRK_LINE_SOFT && rule_status < UBRK_LINE_SOFT_LIMIT) {
|
||||
// soft line breaks, index at which a line break is acceptable but
|
||||
// not required
|
||||
return GetReadOnlyRoots().soft_string_handle();
|
||||
}
|
||||
if ((rule_status >= UBRK_LINE_HARD &&
|
||||
rule_status < UBRK_LINE_HARD_LIMIT)) {
|
||||
// hard, or mandatory line breaks
|
||||
return GetReadOnlyRoots().hard_string_handle();
|
||||
}
|
||||
return GetReadOnlyRoots().undefined_value_handle();
|
||||
case JSSegmenter::Granularity::SENTENCE:
|
||||
if (rule_status >= UBRK_SENTENCE_TERM &&
|
||||
rule_status < UBRK_SENTENCE_TERM_LIMIT) {
|
||||
|
@ -77,7 +77,7 @@ class JSSegmentIterator : public JSObject {
|
||||
|
||||
// Bit positions in |flags|.
|
||||
#define FLAGS_BIT_FIELDS(V, _) \
|
||||
V(GranularityBits, JSSegmenter::Granularity, 3, _) \
|
||||
V(GranularityBits, JSSegmenter::Granularity, 2, _) \
|
||||
V(BreakTypeSetBits, bool, 1, _)
|
||||
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
|
||||
#undef FLAGS_BIT_FIELDS
|
||||
@ -85,7 +85,6 @@ class JSSegmentIterator : public JSObject {
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::GRAPHEME <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::WORD <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::SENTENCE <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(JSSegmenter::Granularity::LINE <= GranularityBits::kMax);
|
||||
|
||||
// [flags] Bit field containing various flags about the function.
|
||||
DECL_INT_ACCESSORS(flags)
|
||||
|
@ -26,17 +26,6 @@ ACCESSORS(JSSegmenter, icu_break_iterator, Managed<icu::BreakIterator>,
|
||||
kICUBreakIteratorOffset)
|
||||
SMI_ACCESSORS(JSSegmenter, flags, kFlagsOffset)
|
||||
|
||||
inline void JSSegmenter::set_line_break_style(LineBreakStyle line_break_style) {
|
||||
DCHECK_GT(LineBreakStyle::COUNT, line_break_style);
|
||||
int hints = flags();
|
||||
hints = LineBreakStyleBits::update(hints, line_break_style);
|
||||
set_flags(hints);
|
||||
}
|
||||
|
||||
inline JSSegmenter::LineBreakStyle JSSegmenter::line_break_style() const {
|
||||
return LineBreakStyleBits::decode(flags());
|
||||
}
|
||||
|
||||
inline void JSSegmenter::set_granularity(Granularity granularity) {
|
||||
DCHECK_GT(Granularity::COUNT, granularity);
|
||||
int hints = flags();
|
||||
|
@ -23,18 +23,10 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
JSSegmenter::LineBreakStyle JSSegmenter::GetLineBreakStyle(const char* str) {
|
||||
if (strcmp(str, "strict") == 0) return JSSegmenter::LineBreakStyle::STRICT;
|
||||
if (strcmp(str, "normal") == 0) return JSSegmenter::LineBreakStyle::NORMAL;
|
||||
if (strcmp(str, "loose") == 0) return JSSegmenter::LineBreakStyle::LOOSE;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
JSSegmenter::Granularity JSSegmenter::GetGranularity(const char* str) {
|
||||
if (strcmp(str, "grapheme") == 0) return JSSegmenter::Granularity::GRAPHEME;
|
||||
if (strcmp(str, "word") == 0) return JSSegmenter::Granularity::WORD;
|
||||
if (strcmp(str, "sentence") == 0) return JSSegmenter::Granularity::SENTENCE;
|
||||
if (strcmp(str, "line") == 0) return JSSegmenter::Granularity::LINE;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
@ -72,25 +64,11 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
|
||||
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
|
||||
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
|
||||
|
||||
// 8. Set opt.[[lb]] to lineBreakStyle.
|
||||
|
||||
// 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
|
||||
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
|
||||
Intl::ResolvedLocale r =
|
||||
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
|
||||
requested_locales, matcher, {"lb"});
|
||||
|
||||
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
|
||||
// "strict", "normal", "loose" », "normal").
|
||||
Maybe<LineBreakStyle> maybe_line_break_style =
|
||||
Intl::GetStringOption<LineBreakStyle>(
|
||||
isolate, options, "lineBreakStyle", "Intl.Segmenter",
|
||||
{"strict", "normal", "loose"},
|
||||
{LineBreakStyle::STRICT, LineBreakStyle::NORMAL,
|
||||
LineBreakStyle::LOOSE},
|
||||
LineBreakStyle::NORMAL);
|
||||
MAYBE_RETURN(maybe_line_break_style, MaybeHandle<JSSegmenter>());
|
||||
LineBreakStyle line_break_style_enum = maybe_line_break_style.FromJust();
|
||||
requested_locales, matcher, {});
|
||||
|
||||
// 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
|
||||
Handle<String> locale_str =
|
||||
@ -98,12 +76,11 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
|
||||
segmenter_holder->set_locale(*locale_str);
|
||||
|
||||
// 13. Let granularity be ? GetOption(options, "granularity", "string", «
|
||||
// "grapheme", "word", "sentence", "line" », "grapheme").
|
||||
// "grapheme", "word", "sentence" », "grapheme").
|
||||
Maybe<Granularity> maybe_granularity = Intl::GetStringOption<Granularity>(
|
||||
isolate, options, "granularity", "Intl.Segmenter",
|
||||
{"grapheme", "word", "sentence", "line"},
|
||||
{Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE,
|
||||
Granularity::LINE},
|
||||
{"grapheme", "word", "sentence"},
|
||||
{Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE},
|
||||
Granularity::GRAPHEME);
|
||||
MAYBE_RETURN(maybe_granularity, MaybeHandle<JSSegmenter>());
|
||||
Granularity granularity_enum = maybe_granularity.FromJust();
|
||||
@ -111,14 +88,6 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
|
||||
// 14. Set segmenter.[[SegmenterGranularity]] to granularity.
|
||||
segmenter_holder->set_granularity(granularity_enum);
|
||||
|
||||
// 15. If granularity is "line",
|
||||
if (granularity_enum == Granularity::LINE) {
|
||||
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
|
||||
segmenter_holder->set_line_break_style(line_break_style_enum);
|
||||
} else {
|
||||
segmenter_holder->set_line_break_style(LineBreakStyle::NOTSET);
|
||||
}
|
||||
|
||||
icu::Locale icu_locale = r.icu_locale;
|
||||
DCHECK(!icu_locale.isBogus());
|
||||
|
||||
@ -138,21 +107,6 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
|
||||
icu_break_iterator.reset(
|
||||
icu::BreakIterator::createSentenceInstance(icu_locale, status));
|
||||
break;
|
||||
case Granularity::LINE: {
|
||||
// 15. If granularity is "line",
|
||||
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
|
||||
const char* key = uloc_toLegacyKey("lb");
|
||||
CHECK_NOT_NULL(key);
|
||||
const char* value =
|
||||
uloc_toLegacyType(key, segmenter_holder->LineBreakStyleAsCString());
|
||||
CHECK_NOT_NULL(value);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu_locale.setKeywordValue(key, value, status);
|
||||
CHECK(U_SUCCESS(status));
|
||||
icu_break_iterator.reset(
|
||||
icu::BreakIterator::createLineInstance(icu_locale, status));
|
||||
break;
|
||||
}
|
||||
case Granularity::COUNT:
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -185,49 +139,16 @@ Handle<JSObject> JSSegmenter::ResolvedOptions(
|
||||
// Internal Slot Property
|
||||
// [[Locale]] "locale"
|
||||
// [[SegmenterGranularity]] "granularity"
|
||||
// [[SegmenterLineBreakStyle]] "lineBreakStyle"
|
||||
|
||||
Handle<String> locale(segmenter_holder->locale(), isolate);
|
||||
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
|
||||
NONE);
|
||||
JSObject::AddProperty(isolate, result, factory->granularity_string(),
|
||||
segmenter_holder->GranularityAsString(), NONE);
|
||||
if (segmenter_holder->line_break_style() != LineBreakStyle::NOTSET) {
|
||||
JSObject::AddProperty(isolate, result, factory->lineBreakStyle_string(),
|
||||
segmenter_holder->LineBreakStyleAsString(), NONE);
|
||||
}
|
||||
// 5. Return options.
|
||||
return result;
|
||||
}
|
||||
|
||||
const char* JSSegmenter::LineBreakStyleAsCString() const {
|
||||
switch (line_break_style()) {
|
||||
case LineBreakStyle::STRICT:
|
||||
return "strict";
|
||||
case LineBreakStyle::NORMAL:
|
||||
return "normal";
|
||||
case LineBreakStyle::LOOSE:
|
||||
return "loose";
|
||||
case LineBreakStyle::COUNT:
|
||||
case LineBreakStyle::NOTSET:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
Handle<String> JSSegmenter::LineBreakStyleAsString() const {
|
||||
switch (line_break_style()) {
|
||||
case LineBreakStyle::STRICT:
|
||||
return GetReadOnlyRoots().strict_string_handle();
|
||||
case LineBreakStyle::NORMAL:
|
||||
return GetReadOnlyRoots().normal_string_handle();
|
||||
case LineBreakStyle::LOOSE:
|
||||
return GetReadOnlyRoots().loose_string_handle();
|
||||
case LineBreakStyle::COUNT:
|
||||
case LineBreakStyle::NOTSET:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
Handle<String> JSSegmenter::GranularityAsString() const {
|
||||
switch (granularity()) {
|
||||
case Granularity::GRAPHEME:
|
||||
@ -236,8 +157,6 @@ Handle<String> JSSegmenter::GranularityAsString() const {
|
||||
return GetReadOnlyRoots().word_string_handle();
|
||||
case Granularity::SENTENCE:
|
||||
return GetReadOnlyRoots().sentence_string_handle();
|
||||
case Granularity::LINE:
|
||||
return GetReadOnlyRoots().line_string_handle();
|
||||
case Granularity::COUNT:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -41,8 +41,6 @@ class JSSegmenter : public JSObject {
|
||||
|
||||
static std::set<std::string> GetAvailableLocales();
|
||||
|
||||
Handle<String> LineBreakStyleAsString() const;
|
||||
const char* LineBreakStyleAsCString() const;
|
||||
Handle<String> GranularityAsString() const;
|
||||
|
||||
DECL_CAST(JSSegmenter)
|
||||
@ -52,21 +50,6 @@ class JSSegmenter : public JSObject {
|
||||
|
||||
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
|
||||
|
||||
// LineBreakStyle: identifying the style used for line break.
|
||||
//
|
||||
// ecma402 #sec-segmenter-internal-slots
|
||||
|
||||
enum class LineBreakStyle {
|
||||
NOTSET, // While the granularity is not LINE
|
||||
STRICT, // CSS level 3 line-break=strict, e.g. treat CJ as NS
|
||||
NORMAL, // CSS level 3 line-break=normal, e.g. treat CJ as ID, break before
|
||||
// hyphens for ja,zh
|
||||
LOOSE, // CSS level 3 line-break=loose
|
||||
COUNT
|
||||
};
|
||||
inline void set_line_break_style(LineBreakStyle line_break_style);
|
||||
inline LineBreakStyle line_break_style() const;
|
||||
|
||||
// Granularity: identifying the segmenter used.
|
||||
//
|
||||
// ecma402 #sec-segmenter-internal-slots
|
||||
@ -74,27 +57,19 @@ class JSSegmenter : public JSObject {
|
||||
GRAPHEME, // for character-breaks
|
||||
WORD, // for word-breaks
|
||||
SENTENCE, // for sentence-breaks
|
||||
LINE, // for line-breaks
|
||||
COUNT
|
||||
};
|
||||
inline void set_granularity(Granularity granularity);
|
||||
inline Granularity granularity() const;
|
||||
|
||||
// Bit positions in |flags|.
|
||||
#define FLAGS_BIT_FIELDS(V, _) \
|
||||
V(LineBreakStyleBits, LineBreakStyle, 3, _) \
|
||||
V(GranularityBits, Granularity, 3, _)
|
||||
#define FLAGS_BIT_FIELDS(V, _) V(GranularityBits, Granularity, 2, _)
|
||||
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
|
||||
#undef FLAGS_BIT_FIELDS
|
||||
|
||||
STATIC_ASSERT(LineBreakStyle::NOTSET <= LineBreakStyleBits::kMax);
|
||||
STATIC_ASSERT(LineBreakStyle::STRICT <= LineBreakStyleBits::kMax);
|
||||
STATIC_ASSERT(LineBreakStyle::NORMAL <= LineBreakStyleBits::kMax);
|
||||
STATIC_ASSERT(LineBreakStyle::LOOSE <= LineBreakStyleBits::kMax);
|
||||
STATIC_ASSERT(Granularity::GRAPHEME <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(Granularity::WORD <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(Granularity::SENTENCE <= GranularityBits::kMax);
|
||||
STATIC_ASSERT(Granularity::LINE <= GranularityBits::kMax);
|
||||
|
||||
// [flags] Bit field containing various flags about the function.
|
||||
DECL_INT_ACCESSORS(flags)
|
||||
@ -115,7 +90,6 @@ class JSSegmenter : public JSObject {
|
||||
#undef JS_SEGMENTER_FIELDS
|
||||
|
||||
private:
|
||||
static LineBreakStyle GetLineBreakStyle(const char* str);
|
||||
static Granularity GetGranularity(const char* str);
|
||||
|
||||
OBJECT_CONSTRUCTORS(JSSegmenter, JSObject);
|
||||
|
@ -11,9 +11,6 @@ let invalid_lb = [
|
||||
"keepall",
|
||||
"none",
|
||||
"standard",
|
||||
];
|
||||
|
||||
let valid_lb= [
|
||||
"strict",
|
||||
"normal",
|
||||
"loose",
|
||||
@ -30,12 +27,3 @@ invalid_lb.forEach(function(lb) {
|
||||
assertEquals("en", df.resolvedOptions().locale);
|
||||
}
|
||||
);
|
||||
|
||||
valid_lb.forEach(function(lb) {
|
||||
locales.forEach(function(base) {
|
||||
let l = base + "-u-lb-" + lb;
|
||||
let df = new Intl.Segmenter([l + "-fo-obar"]);
|
||||
assertEquals(l, df.resolvedOptions().locale);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
@ -13,11 +13,8 @@ new Intl.Segmenter(['en-US'], {
|
||||
get localeMatcher() {
|
||||
assertEquals(0, getCount++);
|
||||
},
|
||||
get lineBreakStyle() {
|
||||
get granularity() {
|
||||
assertEquals(1, getCount++);
|
||||
},
|
||||
get granularity() {
|
||||
assertEquals(2, getCount++);
|
||||
},
|
||||
});
|
||||
assertEquals(3, getCount);
|
||||
assertEquals(2, getCount);
|
||||
|
@ -42,7 +42,7 @@ assertDoesNotThrow(
|
||||
() => new Intl.Segmenter(["sr"], { granularity: "grapheme" })
|
||||
);
|
||||
|
||||
assertDoesNotThrow(() => new Intl.Segmenter(["sr"], { granularity: "line" }));
|
||||
assertThrows(() => new Intl.Segmenter(["sr"], { granularity: "line" }), RangeError);
|
||||
|
||||
assertThrows(
|
||||
() => new Intl.Segmenter(["sr"], { granularity: "standard" }),
|
||||
@ -61,105 +61,104 @@ assertDoesNotThrow(
|
||||
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" })
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "giant" })
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertThrows(
|
||||
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "giant" }),
|
||||
RangeError
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "sentence",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "word",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "grapheme",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "line",
|
||||
lineBreakStyle: "loose"
|
||||
})
|
||||
}), RangeError
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
assertThrows(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "line",
|
||||
lineBreakStyle: "normal"
|
||||
})
|
||||
}), RangeError
|
||||
);
|
||||
|
||||
assertDoesNotThrow(
|
||||
assertThrows(
|
||||
() =>
|
||||
new Intl.Segmenter(["sr"], {
|
||||
granularity: "line",
|
||||
lineBreakStyle: "strict"
|
||||
})
|
||||
}), RangeError
|
||||
);
|
||||
|
||||
// propagate exception from getter
|
||||
@ -172,14 +171,13 @@ assertThrows(
|
||||
}),
|
||||
TypeError
|
||||
);
|
||||
assertThrows(
|
||||
assertDoesNotThrow(
|
||||
() =>
|
||||
new Intl.Segmenter(undefined, {
|
||||
get lineBreakStyle() {
|
||||
throw new TypeError("");
|
||||
}
|
||||
}),
|
||||
TypeError
|
||||
})
|
||||
);
|
||||
assertThrows(
|
||||
() =>
|
||||
|
@ -1,299 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
let segmenter = new Intl.Segmenter([], { granularity: "line" });
|
||||
// The default lineBreakStyle is 'normal'
|
||||
assertEquals("normal", segmenter.resolvedOptions().lineBreakStyle);
|
||||
|
||||
segmenter = new Intl.Segmenter();
|
||||
assertEquals(undefined, segmenter.resolvedOptions().lineBreakStyle);
|
||||
|
||||
// The default granularity is 'grapheme'
|
||||
assertEquals("grapheme", segmenter.resolvedOptions().granularity);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "strict" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "strict" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "normal" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "normal" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"word",
|
||||
new Intl.Segmenter(["sr"], { granularity: "word" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { granularity: "word" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], { granularity: "grapheme" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { granularity: "grapheme" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"sentence",
|
||||
new Intl.Segmenter(["sr"], { granularity: "sentence" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], { granularity: "sentence" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"line",
|
||||
new Intl.Segmenter(["sr"], { granularity: "line" }).resolvedOptions()
|
||||
.granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"normal",
|
||||
new Intl.Segmenter(["sr"], { granularity: "line" }).resolvedOptions()
|
||||
.lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"grapheme",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "grapheme"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"word",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"word",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"word",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "word"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"sentence",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "sentence"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "sentence"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"sentence",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "sentence"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
undefined,
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "sentence"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"sentence",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "sentence"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"normal",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"line",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"loose",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "loose",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"line",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"strict",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "strict",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"line",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().granularity
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
"normal",
|
||||
new Intl.Segmenter(["sr"], {
|
||||
lineBreakStyle: "normal",
|
||||
granularity: "line"
|
||||
}).resolvedOptions().lineBreakStyle
|
||||
);
|
||||
|
||||
assertEquals("ar", new Intl.Segmenter(["ar"]).resolvedOptions().locale);
|
||||
|
||||
assertEquals("ar", new Intl.Segmenter(["ar", "en"]).resolvedOptions().locale);
|
||||
|
||||
assertEquals("fr", new Intl.Segmenter(["fr", "en"]).resolvedOptions().locale);
|
||||
|
||||
assertEquals("ar", new Intl.Segmenter(["xyz", "ar"]).resolvedOptions().locale);
|
@ -5,7 +5,7 @@
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
for (const granularity of ["grapheme", "word", "sentence", "line"]) {
|
||||
for (const granularity of ["grapheme", "word", "sentence"]) {
|
||||
const segmenter = new Intl.Segmenter("en", { granularity });
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
let breakCounts = {};
|
||||
for (const locale of ["en", "fr", "ja", "zh", "ko"]) {
|
||||
for (const lineBreakStyle of ["strict", "normal", "loose"]) {
|
||||
const seg = new Intl.Segmenter(
|
||||
[locale], {granularity: "line", lineBreakStyle: lineBreakStyle});
|
||||
let opportunity = 0;
|
||||
for (const text of [
|
||||
// We know the following data caused different line break results between
|
||||
// different modes.
|
||||
// https://www.w3.org/TR/css-text-3/#propdef-line-break
|
||||
// Japanese small kana or the Katakana-Hiragana prolonged sound mark
|
||||
"あぁーぃーあーいーぁーぃー",
|
||||
// hyphens:
|
||||
// ‐ U+2010, – U+2013, 〜 U+301C, ゠ U+30A0
|
||||
"ABC‐DEF–GHI〜JKL゠MNO",
|
||||
// iteration marks:
|
||||
// 々 U+3005, 〻 U+303B, ゝ U+309D, ゞ U+309E, ヽ U+30FD, ヾ U+30FE
|
||||
"あ々あ〻あゝあゞあヽあヾあ",
|
||||
// centered punctuation marks:
|
||||
// ・ U+30FB, : U+FF1A, ; U+FF1B, ・ U+FF65, ‼ U+203C
|
||||
"ABC・DEF:GHI;JKL・MNO‼PQR",
|
||||
// centered punctuation marks:
|
||||
// ⁇ U+2047, ⁈ U+2048, ⁉ U+2049, ! U+FF01, ? U+FF1F
|
||||
"ABC⁇DEF⁈GHI⁉JKL!MNO?PQR",
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
while (!iter.following()) {
|
||||
opportunity++;
|
||||
}
|
||||
}
|
||||
breakCounts[locale + "-" + lineBreakStyle] = opportunity;
|
||||
}
|
||||
}
|
||||
// In Japanese
|
||||
// Just test the break count in loose mode is greater than normal mode.
|
||||
assertTrue(breakCounts["ja-loose"] > breakCounts["ja-normal"]);
|
||||
// and test the break count in normal mode is greater than strict mode.
|
||||
assertTrue(breakCounts["ja-normal"] > breakCounts["ja-strict"]);
|
||||
// In Chinese
|
||||
// Just test the break count in loose mode is greater than normal mode.
|
||||
assertTrue(breakCounts["zh-loose"] > breakCounts["zh-normal"]);
|
||||
// and test the break count in normal mode is greater than strict mode.
|
||||
assertTrue(breakCounts["zh-normal"] > breakCounts["zh-strict"]);
|
||||
// In English, French and Korean
|
||||
assertTrue(breakCounts["en-loose"] >= breakCounts["en-normal"]);
|
||||
assertTrue(breakCounts["fr-loose"] >= breakCounts["fr-normal"]);
|
||||
assertTrue(breakCounts["ko-loose"] >= breakCounts["ko-normal"]);
|
||||
// and test the break count in normal mode is greater than strict mode.
|
||||
assertTrue(breakCounts["en-normal"] > breakCounts["en-strict"]);
|
||||
assertTrue(breakCounts["fr-normal"] > breakCounts["fr-strict"]);
|
||||
assertTrue(breakCounts["ko-normal"] > breakCounts["ko-strict"]);
|
@ -1,38 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "line"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.join(""));
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "line"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assertTrue(["soft", "hard"].includes(v.breakType), v.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assertFalse(iter.following());
|
||||
assertEquals(iter.breakType, v.breakType);
|
||||
assertEquals(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assertTrue(iter.following());
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "line"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
|
||||
assertEquals("string", typeof v.segment);
|
||||
assertTrue(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assertEquals("number", typeof v.index);
|
||||
assertTrue(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assertEquals(text, segments.join(''));
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "line"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev)
|
||||
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
|
||||
assertTrue(iter.index >= 0);
|
||||
assertTrue(iter.index <= text.length);
|
||||
assertTrue(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assertEquals(text, segments.reverse().join(""));
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-intl-segmenter
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "line"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
assertEquals(undefined, iter.breakType);
|
||||
assertEquals(0, iter.index);
|
||||
}
|
@ -506,6 +506,24 @@
|
||||
'language/expressions/call/eval-spread-empty-leading': [FAIL],
|
||||
'language/expressions/call/eval-spread-empty-trailing': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=8717
|
||||
'intl402/Segmenter/constructor/constructor/options-granularity-valid': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-lineBreakStyle-invalid': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-lineBreakStyle-valid': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-order': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-throwing-getters': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-toobject-prototype': [FAIL],
|
||||
'intl402/Segmenter/constructor/constructor/options-valid-combinations': [FAIL],
|
||||
'intl402/Segmenter/iterator/granularity': [FAIL],
|
||||
'intl402/Segmenter/prototype/resolvedOptions/order': [FAIL],
|
||||
'intl402/Segmenter/prototype/resolvedOptions/type-with-lbs': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-following': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-following-modes': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-iterable': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-next': [FAIL],
|
||||
'intl402/Segmenter/prototype/segment/segment-line-preceding': [FAIL],
|
||||
|
||||
# https://bugs.chromium.org/p/v8/issues/detail?id=7472
|
||||
'intl402/NumberFormat/currency-digits': [FAIL],
|
||||
|
||||
|
@ -307,44 +307,44 @@ KNOWN_MAPS = {
|
||||
("RO_SPACE", 0x027d1): (173, "Tuple2Map"),
|
||||
("RO_SPACE", 0x02871): (175, "ArrayBoilerplateDescriptionMap"),
|
||||
("RO_SPACE", 0x02bb1): (163, "InterceptorInfoMap"),
|
||||
("RO_SPACE", 0x050d1): (153, "AccessCheckInfoMap"),
|
||||
("RO_SPACE", 0x05121): (154, "AccessorInfoMap"),
|
||||
("RO_SPACE", 0x05171): (155, "AccessorPairMap"),
|
||||
("RO_SPACE", 0x051c1): (156, "AliasedArgumentsEntryMap"),
|
||||
("RO_SPACE", 0x05211): (157, "AllocationMementoMap"),
|
||||
("RO_SPACE", 0x05261): (158, "AsmWasmDataMap"),
|
||||
("RO_SPACE", 0x052b1): (159, "AsyncGeneratorRequestMap"),
|
||||
("RO_SPACE", 0x05301): (160, "DebugInfoMap"),
|
||||
("RO_SPACE", 0x05351): (161, "FunctionTemplateInfoMap"),
|
||||
("RO_SPACE", 0x053a1): (162, "FunctionTemplateRareDataMap"),
|
||||
("RO_SPACE", 0x053f1): (164, "InterpreterDataMap"),
|
||||
("RO_SPACE", 0x05441): (165, "ModuleInfoEntryMap"),
|
||||
("RO_SPACE", 0x05491): (166, "ModuleMap"),
|
||||
("RO_SPACE", 0x054e1): (167, "ObjectTemplateInfoMap"),
|
||||
("RO_SPACE", 0x05531): (168, "PromiseCapabilityMap"),
|
||||
("RO_SPACE", 0x05581): (169, "PromiseReactionMap"),
|
||||
("RO_SPACE", 0x055d1): (170, "PrototypeInfoMap"),
|
||||
("RO_SPACE", 0x05621): (171, "ScriptMap"),
|
||||
("RO_SPACE", 0x05671): (172, "StackFrameInfoMap"),
|
||||
("RO_SPACE", 0x056c1): (174, "Tuple3Map"),
|
||||
("RO_SPACE", 0x05711): (176, "WasmDebugInfoMap"),
|
||||
("RO_SPACE", 0x05761): (177, "WasmExceptionTagMap"),
|
||||
("RO_SPACE", 0x057b1): (178, "WasmExportedFunctionDataMap"),
|
||||
("RO_SPACE", 0x05801): (179, "CallableTaskMap"),
|
||||
("RO_SPACE", 0x05851): (180, "CallbackTaskMap"),
|
||||
("RO_SPACE", 0x058a1): (181, "PromiseFulfillReactionJobTaskMap"),
|
||||
("RO_SPACE", 0x058f1): (182, "PromiseRejectReactionJobTaskMap"),
|
||||
("RO_SPACE", 0x05941): (183, "PromiseResolveThenableJobTaskMap"),
|
||||
("RO_SPACE", 0x05991): (184, "WeakFactoryCleanupJobTaskMap"),
|
||||
("RO_SPACE", 0x059e1): (185, "AllocationSiteWithWeakNextMap"),
|
||||
("RO_SPACE", 0x05a31): (185, "AllocationSiteWithoutWeakNextMap"),
|
||||
("RO_SPACE", 0x05a81): (219, "LoadHandler1Map"),
|
||||
("RO_SPACE", 0x05ad1): (219, "LoadHandler2Map"),
|
||||
("RO_SPACE", 0x05b21): (219, "LoadHandler3Map"),
|
||||
("RO_SPACE", 0x05b71): (227, "StoreHandler0Map"),
|
||||
("RO_SPACE", 0x05bc1): (227, "StoreHandler1Map"),
|
||||
("RO_SPACE", 0x05c11): (227, "StoreHandler2Map"),
|
||||
("RO_SPACE", 0x05c61): (227, "StoreHandler3Map"),
|
||||
("RO_SPACE", 0x05081): (153, "AccessCheckInfoMap"),
|
||||
("RO_SPACE", 0x050d1): (154, "AccessorInfoMap"),
|
||||
("RO_SPACE", 0x05121): (155, "AccessorPairMap"),
|
||||
("RO_SPACE", 0x05171): (156, "AliasedArgumentsEntryMap"),
|
||||
("RO_SPACE", 0x051c1): (157, "AllocationMementoMap"),
|
||||
("RO_SPACE", 0x05211): (158, "AsmWasmDataMap"),
|
||||
("RO_SPACE", 0x05261): (159, "AsyncGeneratorRequestMap"),
|
||||
("RO_SPACE", 0x052b1): (160, "DebugInfoMap"),
|
||||
("RO_SPACE", 0x05301): (161, "FunctionTemplateInfoMap"),
|
||||
("RO_SPACE", 0x05351): (162, "FunctionTemplateRareDataMap"),
|
||||
("RO_SPACE", 0x053a1): (164, "InterpreterDataMap"),
|
||||
("RO_SPACE", 0x053f1): (165, "ModuleInfoEntryMap"),
|
||||
("RO_SPACE", 0x05441): (166, "ModuleMap"),
|
||||
("RO_SPACE", 0x05491): (167, "ObjectTemplateInfoMap"),
|
||||
("RO_SPACE", 0x054e1): (168, "PromiseCapabilityMap"),
|
||||
("RO_SPACE", 0x05531): (169, "PromiseReactionMap"),
|
||||
("RO_SPACE", 0x05581): (170, "PrototypeInfoMap"),
|
||||
("RO_SPACE", 0x055d1): (171, "ScriptMap"),
|
||||
("RO_SPACE", 0x05621): (172, "StackFrameInfoMap"),
|
||||
("RO_SPACE", 0x05671): (174, "Tuple3Map"),
|
||||
("RO_SPACE", 0x056c1): (176, "WasmDebugInfoMap"),
|
||||
("RO_SPACE", 0x05711): (177, "WasmExceptionTagMap"),
|
||||
("RO_SPACE", 0x05761): (178, "WasmExportedFunctionDataMap"),
|
||||
("RO_SPACE", 0x057b1): (179, "CallableTaskMap"),
|
||||
("RO_SPACE", 0x05801): (180, "CallbackTaskMap"),
|
||||
("RO_SPACE", 0x05851): (181, "PromiseFulfillReactionJobTaskMap"),
|
||||
("RO_SPACE", 0x058a1): (182, "PromiseRejectReactionJobTaskMap"),
|
||||
("RO_SPACE", 0x058f1): (183, "PromiseResolveThenableJobTaskMap"),
|
||||
("RO_SPACE", 0x05941): (184, "WeakFactoryCleanupJobTaskMap"),
|
||||
("RO_SPACE", 0x05991): (185, "AllocationSiteWithWeakNextMap"),
|
||||
("RO_SPACE", 0x059e1): (185, "AllocationSiteWithoutWeakNextMap"),
|
||||
("RO_SPACE", 0x05a31): (219, "LoadHandler1Map"),
|
||||
("RO_SPACE", 0x05a81): (219, "LoadHandler2Map"),
|
||||
("RO_SPACE", 0x05ad1): (219, "LoadHandler3Map"),
|
||||
("RO_SPACE", 0x05b21): (227, "StoreHandler0Map"),
|
||||
("RO_SPACE", 0x05b71): (227, "StoreHandler1Map"),
|
||||
("RO_SPACE", 0x05bc1): (227, "StoreHandler2Map"),
|
||||
("RO_SPACE", 0x05c11): (227, "StoreHandler3Map"),
|
||||
("MAP_SPACE", 0x00139): (1057, "ExternalMap"),
|
||||
("MAP_SPACE", 0x00189): (1073, "JSMessageObjectMap"),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user