v8/test/intl/segmenter/word-containing.js
Frank Tang 64da43ef36 Reland "[intl] Clean up intl_segmenter flag"
This is a reland of c9c3ec4c14


Original change's description:
> [intl] Clean up intl_segmenter flag
>
> Intl.Segmenter shipped in m87 and launched.
>
> Bug: v8:11225
> Change-Id: I4213e261e1aea717c1281f19785a8c29ff1bbd8b
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2570461
> Commit-Queue: Frank Tang <ftang@chromium.org>
> Reviewed-by: Shu-yu Guo <syg@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#71653}

Bug: v8:11225, v8:11240
Change-Id: Ibded9038671862d90206d328f8a12db51c40e63c
Cq-Include-Trybots: luci.v8.try:v8_linux64_gc_stress_custom_snapshot_dbg_ng,v8_linux_arm64_gc_stress_dbg_ng,v8_linux_gc_stress_dbg_ng,v8_mac64_gc_stress_dbg_ng
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2579043
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: Shu-yu Guo <syg@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71691}
2020-12-10 10:25:48 +00:00

82 lines
2.7 KiB
JavaScript

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
const segmenter = new Intl.Segmenter(undefined, {granularity: 'word'});
const waving_hand_light_skin_tone = "\uD83D\uDC4B\uD83C\uDFFB";
const man_light_skin_tone_red_hair =
"\uD83D\uDC68\uD83C\uDFFB\u200D\uD83E\uDDB0";
// 1 2
// 034566 89012 3 4 56 89 01
const input = "ซิ่ง Ame\u0301lie台北\uD800\uDCB0 " + waving_hand_light_skin_tone +
// 2
// 2345678
man_light_skin_tone_red_hair;
const segments = segmenter.segment(input);
// Test less than 0
assertEquals(undefined, segments.containing(-1));
// Test longer than input
assertEquals(undefined, segments.containing(input.length));
assertEquals(undefined, segments.containing(input.length + 1));
// Test modifier in Thai
const thai = "ซิ่ง";
let pos = 0;
for (let i = pos; i < pos + thai.length; i++) {
assertEquals({segment: "ซิ่ง", index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += thai.length;
// Test SPACE
assertEquals({segment: " ", index: pos, input, isWordLike: false},
segments.containing(pos));
pos++;
// Test Latin with modifier
const latin_with_modifier = "Ame\u0301lie";
for (let i = pos; i < pos + latin_with_modifier.length; i++) {
assertEquals(
{segment: latin_with_modifier, index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += latin_with_modifier.length;
// Test Han
const taipei = "台北";
for (let i = pos; i < pos + taipei.length; i++) {
assertEquals({segment: taipei, index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += taipei.length;
// Test Surrogate pair
const surrogate = "\uD800\uDCB0";
for (let i = pos; i < pos + surrogate.length; i++) {
assertEquals({segment: surrogate, index: pos, input, isWordLike: true},
segments.containing(14));
}
pos += surrogate.length;
// Test SPACE
assertEquals({segment: " ", index: pos, input, isWordLike: false},
segments.containing(pos));
pos++;
// Test Emoji modifier: U+1F44B U+1F3FB
for (let i = pos; i < pos + waving_hand_light_skin_tone.length; i++) {
assertEquals({segment: waving_hand_light_skin_tone, index: pos, input,
isWordLike: false},
segments.containing(i));
}
pos += waving_hand_light_skin_tone.length;
// Test Emoji modifiers sequence: U+1F468 U+1F3FB U+200D U+1F9B0
for (let i = pos; i < pos + man_light_skin_tone_red_hair.length; i++) {
assertEquals({segment: man_light_skin_tone_red_hair, index: pos, input,
isWordLike: false},
segments.containing(i));
}
pos += man_light_skin_tone_red_hair.length;