ICU-13637 Break Iterator Rule Updates for Indic Grapheme Clusters.

This commit is contained in:
Andy Heninger 2019-05-30 16:41:44 -07:00
parent c43455749b
commit fa240d49cc
14 changed files with 1236 additions and 93 deletions

View File

@ -22,6 +22,7 @@ def generate(config, glob, common_vars):
exit(1)
requests += generate_cnvalias(config, glob, common_vars)
requests += generate_ulayout(config, glob, common_vars)
requests += generate_confusables(config, glob, common_vars)
requests += generate_conversion_mappings(config, glob, common_vars)
requests += generate_brkitr_brk(config, glob, common_vars)
@ -31,7 +32,6 @@ def generate(config, glob, common_vars):
requests += generate_coll_ucadata(config, glob, common_vars)
requests += generate_full_unicore_data(config, glob, common_vars)
requests += generate_unames(config, glob, common_vars)
requests += generate_ulayout(config, glob, common_vars)
requests += generate_misc(config, glob, common_vars)
requests += generate_curr_supplemental(config, glob, common_vars)
requests += generate_translit(config, glob, common_vars)
@ -189,7 +189,7 @@ def generate_brkitr_brk(config, glob, common_vars):
RepeatedExecutionRequest(
name = "brkitr_brk",
category = "brkitr_rules",
dep_targets = [DepTarget("cnvalias")],
dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
input_files = input_files,
output_files = output_files,
tool = IcuTool("genbrk"),

View File

@ -25,6 +25,13 @@ $Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
# From cldr/common/properties/segments/
# and issue CLDR-10994
#
$Virama = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Virama}];
$LinkingConsonant = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Consonant}];
$ExtCccZwj = [[\p{gcb=Extend}-\p{ccc=0}] \p{gcb=ZWJ}];
# Korean Syllable Definitions
#
$L = [\p{Grapheme_Cluster_Break = L}];
@ -57,6 +64,9 @@ $L ($L | $V | $LV | $LVT);
# GB 9b
$Prepend [^$Control $CR $LF];
# GB 9.3, from CLDR-10994
$LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* $LinkingConsonant;
# GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
$Extended_Pict $Extend* $ZWJ $Extended_Pict;

View File

@ -135,13 +135,13 @@ CharClass *BreakRules::addCharClass(const UnicodeString &name, const UnicodeStri
printf("epandedDef: %s\n", CStr(expandedDef)());
}
UnicodeSet *s = new UnicodeSet(expandedDef, USET_IGNORE_SPACE, NULL, status);
LocalPointer<UnicodeSet> s(new UnicodeSet(expandedDef, USET_IGNORE_SPACE, NULL, status), status);
if (U_FAILURE(status)) {
IntlTest::gTest->errln("%s:%d: error %s creating UnicodeSet %s", __FILE__, __LINE__,
u_errorName(status), CStr(name)());
return NULL;
IntlTest::gTest->errln("%s:%d: error %s creating UnicodeSet %s\n Expanded set definition: %s",
__FILE__, __LINE__, u_errorName(status), CStr(name)(), CStr(expandedDef)());
return nullptr;
}
CharClass *cclass = new CharClass(name, definition, expandedDef, s);
CharClass *cclass = new CharClass(name, definition, expandedDef, s.orphan());
CharClass *previousClass = static_cast<CharClass *>(uhash_put(fCharClasses.getAlias(),
new UnicodeString(name), // Key, owned by hash table.
cclass, // Value, owned by hash table.

View File

@ -1611,6 +1611,9 @@ private:
UnicodeSet *fLVTSet;
UnicodeSet *fHangulSet;
UnicodeSet *fExtendedPictSet;
UnicodeSet *fViramaSet;
UnicodeSet *fLinkingConsonantSet;
UnicodeSet *fExtCccZwjSet;
UnicodeSet *fAnySet;
const UnicodeString *fText;
@ -1643,6 +1646,11 @@ RBBICharMonkey::RBBICharMonkey() {
fHangulSet->addAll(*fLVTSet);
fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status);
fViramaSet = new UnicodeSet(u"[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
"\\p{Indic_Syllabic_Category=Virama}]", status);
fLinkingConsonantSet = new UnicodeSet(u"[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
"\\p{Indic_Syllabic_Category=Consonant}]", status);
fExtCccZwjSet = new UnicodeSet(u"[[\\p{gcb=Extend}-\\p{ccc=0}] \\p{gcb=ZWJ}]", status);
fAnySet = new UnicodeSet(0, 0x10ffff);
fSets = new UVector(status);
@ -1658,6 +1666,9 @@ RBBICharMonkey::RBBICharMonkey() {
fSets->addElement(fAnySet, status);
fSets->addElement(fZWJSet, status);
fSets->addElement(fExtendedPictSet, status);
fSets->addElement(fViramaSet, status);
fSets->addElement(fLinkingConsonantSet, status);
fSets->addElement(fExtCccZwjSet, status);
if (U_FAILURE(status)) {
deferredStatus = status;
}
@ -1777,6 +1788,22 @@ int32_t RBBICharMonkey::next(int32_t prevPos) {
continue;
}
// Rule (GB9.3) LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
// Note: Viramas are also included in the ExtCccZwj class.
if (fLinkingConsonantSet->contains(c2)) {
int pi = p1;
bool sawVirama = false;
while (pi > 0 && fExtCccZwjSet->contains(fText->char32At(pi))) {
if (fViramaSet->contains(fText->char32At(pi))) {
sawVirama = true;
}
pi = fText->moveIndex32(pi, -1);
}
if (sawVirama && fLinkingConsonantSet->contains(fText->char32At(pi))) {
continue;
}
}
// Rule (GB11) Extended_Pictographic Extend * ZWJ x Extended_Pictographic
if (fExtendedPictSet->contains(cBase) && fZWJSet->contains(c1) && fExtendedPictSet->contains(c2)) {
continue;
@ -1827,7 +1854,9 @@ RBBICharMonkey::~RBBICharMonkey() {
delete fAnySet;
delete fZWJSet;
delete fExtendedPictSet;
}
delete fViramaSet;
delete fLinkingConsonantSet;
delete fExtCccZwjSet;}
//------------------------------------------------------------------------------------------
//

View File

@ -1,5 +1,5 @@
# GraphemeBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# GraphemeBreakTest-12.0.0.txt
# Date: 2019-02-21, 07:57:26 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -48,10 +48,14 @@
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0020 × 0308 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -82,10 +86,14 @@
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@ -116,10 +124,14 @@
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@ -150,10 +162,14 @@
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0915 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 094D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 094D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@ -184,10 +200,14 @@
÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 034F × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -218,10 +238,14 @@
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -252,10 +276,14 @@
÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0600 × 0308 ÷ 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
@ -286,10 +314,14 @@
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0903 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -320,10 +352,14 @@
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1100 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -354,10 +390,14 @@
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1160 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -388,10 +428,14 @@
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -422,10 +466,14 @@
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC00 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -456,14 +504,56 @@
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC01 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0915 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0915 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0915 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0915 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0915 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0915 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0915 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0915 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0915 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0915 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0915 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0915 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0915 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0915 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0915 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0915 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0915 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0915 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0915 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0915 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0915 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0915 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0915 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0915 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0915 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0915 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0915 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0915 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0915 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0915 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0915 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0915 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0915 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0915 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0915 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0915 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0915 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@ -490,10 +580,14 @@
÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 231A × 0308 ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -524,14 +618,56 @@
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0300 × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 094D ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 094D × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 094D ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 094D × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 094D ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 094D × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 094D ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 094D × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 094D × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 094D × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 094D ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 094D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 094D ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 094D × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 094D × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 094D × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 094D ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 094D × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 094D ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 094D × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 094D ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 094D × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 094D ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 094D × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 094D ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 094D × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 094D ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 094D × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 094D ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 094D × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 094D × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 094D × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 094D × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 094D × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 094D × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 094D × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 094D ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 094D × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@ -558,10 +694,14 @@
÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 200D × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -592,10 +732,14 @@
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0378 × 0308 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 094D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 094D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@ -616,6 +760,15 @@
÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0915 ÷ 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 093C × 200D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 093C × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 094D × 0924 × 094D × 092F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (LinkingConsonant) ÷ [0.3]
÷ 0915 × 094D ÷ 0061 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]
÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
@ -625,6 +778,6 @@
÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
#
# Lines: 602
# Lines: 755
#
# EOF

View File

@ -37,6 +37,13 @@ LVT = [\p{Grapheme_Cluster_Break = LVT}];
Extended_Pict = [:ExtPict:];
# Indic Sequences
Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
GB5: . ÷ (Control | CR | LF);
@ -46,6 +53,7 @@ GB7: (LV | V) (V | T);
GB8: (LVT | T) T;
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;

View File

@ -1,76 +0,0 @@
file: testdata/break_rules/readme.txt
Copyright (C) 2016 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html#License
Copyright (c) 2015-2016, International Business Machines Corporation and others. All Rights Reserved.
This directory contains the break iterator reference rule files used by intltest rbbi/RBBIMonkeyTest/testMonkey.
The rules in this directory track the boundary rules from Unicode UAX 14 and 29. They are interpreted
to provide an expected set of boundary positions to compare with the results from ICU break iteration.
ICU4J also includes copies of the test reference rules, located in the directory
main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/
The copies should be kept synchronized; there should be no differences.
Each set of reference break rules lives in a separate file.
The list of rule files to run by default is hard coded into the test code, in rbbimonkeytest.cpp.
Each test file includes
- The type of ICU break iterator to create (word, line, sentence, etc.)
- The locale to use
- Character Class definitions
- Rule definitions
To Do
- Extend the syntax to support rule tailoring.
Character Class Definition:
name = set_regular_expression;
Rule Definition:
rule_regular_expression;
name:
[A-Za-z_][A-Za-z0-9_]*
set_regular_expression:
The intersection of an ICU regular expression [set] expression and a UnicodeSet pattern.
(They are mostly the same)
May include previously defined set names, which are logically expanded in-place.
rule_regular_expression:
An ICU Regular Expression.
May include set names, which are logically expanded in-place.
May include a '÷', which defines a boundary position.
Application of the rules:
Matching begins at the start of text, or after a previously identified boundary.
The pseudo-code below finds the next boundary.
while position < end of text
for each rule
if the text at position matches this rule
if the rule has a '÷'
Boundary is found.
return the position of the '÷' within the match.
else
position = last character of the rule match.
break from the inner rule loop, continue the outer loop.
This differs from the Unicode UAX algorithm in that each position in the text is
not tested separately. Instead, when a rule match is found, rule application restarts with the last
character of the preceding rule match. ICU's break rules also operate this way.
Expressing rules this way simplifies UAX rules that have leading or trailing context; it
is no longer necessary to write expressions that match the context starting from
any position within it.
This rule form differs from ICU rules in that the rules are applied sequentially, as they
are with the Unicode UAX rules. With the main ICU break rules, all are applied in parallel.
Word Dictionaries
The monkey test does not test dictionary based breaking. The set named 'dictionary' is special,
as it is in the main ICU rules. For the monkey test, no characters from the dictionary set are
included in the randomly-generated test data.

View File

@ -164,6 +164,498 @@
#
#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
#
# ICU-13637 and CLDR-10994 - Indic Grapheme Cluster Boundary changes to support aksaras
# New rule: LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
# Sample Chars: LinkingConsonant: \u0915
# Virama: \u094d [also Extend]
# ExtCccZWJ: \u0308
# Extend but not ExtCCCZWJ \u093A
<char>
<data>•\u0915\u094d\u0915•</data>
<data>•\u0915\u0308\u0308\u094d\u0308\u0308\u0915•</data>
<data>•\u0915\u0308\u0308\u094d\u0308\u0308•\u0041•</data>
<data>•\u0915\u0308\u0308\u094d\u093A\u093A•\u0915•</data>
#
# From cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Bengali.txt
#
# ব্যক্তিত্বের ;
<data>•ব্য•ক্তি•ত্বে•র•</data>
# আত্মবিশ্বাস ;
<data>•আ•ত্ম•বি•শ্বা•স•</data>
# ব্যাক্টেরিয়া ;
<data>•ব্যা•ক্টে•রি•য়া•</data>
# সমস্যার ;
<data>•স•ম•স্যা•র•</data>
# মিশ্রণ ;
<data>•মি•শ্র•ণ•</data>
# দুর্গন্ধ ;
<data>•দু•র্গ•ন্ধ•</data>
# পরীক্ষার ;
<data>•প•রী•ক্ষা•র•</data>
# কোলেস্টেরল ;
<data>•কো•লে•স্টে•র•ল•</data>
# ব্যায়ামকে ;
<data>•ব্যা•য়া•ম•কে•</data>
# সপ্তাহে ;
<data>•স•প্তা•হে•</data>
# পরীক্ষার ;
<data>•প•রী•ক্ষা•র•</data>
# চর্বিজাতীয় ;
<data>•চ•র্বি•জা•তী•য়•</data>
# নিয়ণ্ত্রণ ;
<data>•নি•য়•ণ্ত্র•ণ•</data>
# অবশ্যই ;
<data>•অ•ব•শ্য•ই•</data>
# নয়াদিল্লির ;
<data>•ন•য়া•দি•ল্লি•র•</data>
# সমীক্ষাটা ;
<data>•স•মী•ক্ষা•টা•</data>
# #ভূমিকম্পের ;
# <data>•ভূ•মি•ক•ম্পের•</data> # line 17 in TestSegmenter-Bengali.txt
# কেন্দ্রীয় ;
<data>•কে•ন্দ্রী•য়•</data>
# উস্কানিই ;
<data>•উ•স্কা•নি•ই•</data>
# সমীক্ষকরা ;
<data>•স•মী•ক্ষ•ক•রা•</data>
# মুহূর্তে ;
<data>•মু•হূ•র্তে•</data>
# সম্পর্কে ;
<data>•স•ম্প•র্কে•</data>
# পৌষসংক্রান্তির ;
<data>•পৌ•ষ•সং•ক্রা•ন্তি•র•</data>
# মুখ্যমন্ত্রী ;
<data>•মু•খ্য•ম•ন্ত্রী•</data>
#
# from cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Devanagari.txt
#
# संदिग्ध ;
<data>•सं•दि•ग्ध•</data>
# सुरक्षा ;
<data>•सु•र•क्षा•</data>
# टक्कर ;
<data>•ट•क्क•र•</data>
# सत्र ;
<data>•स•त्र•</data>
# दक्षिण ;
<data>•द•क्षि•ण•</data>
# मिश्रणाने ;
<data>•मि•श्र•णा•ने•</data>
# दुर्घटनाग्रस्त ;
<data>•दु•र्घ•ट•ना•ग्र•स्त•</data>
# मुहूर्त ;
<data>•मु•हू•र्त•</data>
# शर्करायुक्त ;
<data>•श•र्क•रा•यु•क्त•</data>
# अंतरराष्ट्रीय ;
<data>•अं•त•र•रा•ष्ट्री•य•</data>
# राष्ट्रपति ;
<data>•रा•ष्ट्र•प•ति•</data>
# फ्रांस ;
<data>•फ्रां•स•</data>
# ट्रैक्टर ;
<data>•ट्रै•क्ट•र•</data>
# सिट्रोनेलाचे ;
<data>•सि•ट्रो•ने•ला•चे•</data>
# टुक्रालाई ;
<data>•टु•क्रा•ला•ई•</data>
# इकट्ठा ;
<data>•इ•क•ट्ठा•</data>
# शास्त्र ;
<data>•शा•स्त्र•</data>
# स्त्री ;
<data>•स्त्री•</data>
# लक्ष्य ;
<data>•ल•क्ष्य•</data>
# तीक्ष्ण ;
<data>•ती•क्ष्ण•</data>
# Words ;
<data>•W•o•r•d•s•</data>
# त्रिवेदी ;
<data>•त्रि•वे•दी•</data>
# कृत्रिम ;
<data>•कृ•त्रि•म•</data>
# मात्रामा ;
<data>•मा•त्रा•मा•</data>
# सिद्धार्थनगर ;
<data>•सि•द्धा•र्थ•न•ग•र•</data>
# श्रद्धालुओं ;
<data>•श्र•द्धा•लु•ओं•</data>
# वृद्धिसँग ;
<data>•वृ•द्धि•सँ•ग•</data>
# अंतःज्ञानी ;
<data>•अं•तः•ज्ञा•नी••</data>
# गन्नदी॑धिम ;
<data>•ग•न्न•दी॑•धि•म•</data>
# प्प्रप॑द्ये॒ ;
<data>•प्प्र•प॑•द्ये॒•</data>
# मनस्तापः ;
<data>•म•न•स्ता•पः•</data>
# हविष्करोमि ;
<data>•ह•वि•ष्क•रो•मि•</data>
# अहर्पतिः ;
<data>•अ•ह•र्प•तिः•</data>
# गच्छति ;
<data>•ग•च्छ•ति•</data>
# अयम् ;
<data>•अ•य•म्•</data>
# शिवश्चोदति ;
<data>•शि•व•श्चो•द•ति•</data>
# मनष्टालयति ;
<data>•म•न•ष्टा•ल•य•ति•</data>
# अश्वष्ठक्कस्य ;
<data>•अ•श्व•ष्ठ•क्क•स्य•</data>
# दुष्पुत्रः ;
<data>•दु•ष्पु•त्रः•</data>
# द्विःपक्वम् ;
<data>•द्विः•प•क्व•म्•</data>
# द्विष्कामः ;
<data>•द्वि•ष्का•मः•</data>
# भर्तुर्भोगः ;
<data>•भ•र्तु•र्भो•गः•</data>
# शॆत्युल ;
<data>•शॆ•त्यु•ल••</data>
# महारॆन्य ;
<data>•म•हा•रॆ•न्य•</data>
# सॆक्युल ;
<data>•सॆ•क्यु•ल•</data>
# ल्यॊदुर ;
<data>•ल्यॊ•दु•र•</data>
# फयॊक ;
<data>•फ•यॊ•क•</data>
# मॊहन्युव ;
<data>•मॊ•ह•न्यु•व•</data>
# अन्यर ;
<data>•अ•न्य•र•</data>
# ख्वजि ;
<data>•ख्व•जि•</data>
# खॅरिन्य ;
<data>•खॅ•रि•न्य•</data>
# उच्छ्वास ;
<data>•उ•च्छ्वा•स•</data>
# व्यक्तिमत्व ;
<data>•व्य•क्ति•म•त्व•</data>
# दातांच्यामध्ये ;
<data>•दा•तां•च्या•म•ध्ये•</data>
# दुर्गंधी ;
<data>•दु•र्गं•धी•</data>
# दुर्गंधीपासूनसुद्धा ;
<data>•दु•र्गं•धी•पा•सू•न•सु•द्धा•</data>
# नित्यकर्मामध्ये ;
<data>•नि•त्य•क•र्मा•म•ध्ये•</data>
# आजारांपासूनसुद्धा ;
<data>•आ•जा•रां•पा•सू•न•सु•द्धा•</data>
# भाज्यांमध्ये ;
<data>•भा•ज्यां•म•ध्ये•</data>
# उच्छ्वासाच्या ;
<data>•उ•च्छ्वा•सा•च्या•</data>
# सुकिल्लीं ;
<data>•सु•कि•ल्लीं•</data>
# स्लिपां ;
<data>•स्लि•पां•</data>
# मिसळिल्ल्यान ;
<data>•मि•स•ळि•ल्ल्या•न•</data>
# रोंप्यांची ;
<data>•रों•प्यां•ची•</data>
# वर्सांतल्यान ;
<data>•व•र्सां•त•ल्या•न•</data>
# रोंप्याच्या ;
<data>•रों•प्या•च्या•</data>
# नाशिल्ल्यान ;
<data>•ना•शि•ल्ल्या•न•</data>
# जिल्ल्याच्या ;
<data>•जि•ल्ल्या•च्या•</data>
# कुरुक्षेत्रांतल्या ;
<data>•कु•रु•क्षे•त्रां•त•ल्या•</data>
# भाज्ज्यांची ;
<data>•भा•ज्ज्यां•ची•</data>
# सिट्रोनेलाका ;
<data>•सि•ट्रो•ने•ला•का•</data>
# गरिनुपर्छ ;
<data>•ग•रि•नु•प•र्छ•</data>
# सामान्यतः ;
<data>•सा•मा•न्य•तः•</data>
# वृद्धिसँग ;
<data>•वृ•द्धि•सँ•ग•</data>
# रिपोर्टनि ;
<data>•रि•पो•र्ट•नि•</data>
# टोस्टर्ज़ ;
<data>•टो•स्ट•र्ज़•</data>
# वक्तव्य ;
<data>•व•क्त•व्य•</data>
# प्रक्रिया ;
<data>•प्र•क्रि•या•</data>
# निर्दिष्ट ;
<data>•नि•र्दि•ष्ट•</data>
# अस्वीकृत ;
<data>•अ•स्वी•कृ•त•</data>
# प्रयोक्ता ;
<data>•प्र•यो•क्ता•</data>
# प्रकार्यक ;
<data>•प्र•का•र्य•क•</data>
# ट्रेक्टरु ;
<data>•ट्रे•क्ट•रु•</data>
# स्थानधारक ;
<data>•स्था•न•धा•र•क•</data>
# प्रकार्यक ;
<data>•प्र•का•र्य•क•</data>
# अनुच्छेदसँ ;
<data>•अ•नु•च्छे•द•सँ•</data>
# गर्मीपदु ;
<data>•ग•र्मी•प•दु•</data>
# शास्त्रु ;
<data>•शा•स्त्रु•</data>
# इन्द्री ;
<data>•इ•न्द्री•</data>
# श्रधालू ;
<data>•श्र•धा•लू•</data>
# आस्तिकु ;
<data>•आ•स्ति•कु•</data>
# सकार्थो ;
<data>•स•का•र्थो•</data>
# सन्ॿंधु ;
<data>•स•न्ॿं•धु•</data>
# मनुक्खो ;
<data>•म•नु•क्खो•</data>
# हानिफ्राय ;
<data>•हा•नि•फ्रा•य•</data>
# दैथाइहरग्रा ;
<data>•दै•था•इ•ह•र•ग्रा•</data>
# बोसोरब्रै ;
<data>•बो•सो•र•ब्रै•</data>
# रांखान्थियारि ;
<data>•रां•खा•न्थि•या•रि•</data>
# खान्थियाव ;
<data>•खा•न्थि•या•व•</data>
# स्लिप्स ;
<data>•स्लि•प्स•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Gujarati.txt
#
# અંગ્રેજી ;
<data>•અં•ગ્રે•જી•</data>
# શબ્દકોશ ;
<data>•શ•બ્દ•કો•શ•</data>
# બાપ્તિસ્મા ;
<data>•બા•પ્તિ•સ્મા•</data>
# મિસ્ત્રી ;
<data>•મિ•સ્ત્રી•</data>
# સિક્કા ;
<data>•સિ•ક્કા•</data>
#6 ; એકત્રીસમું ; એ÷કત્રી÷સ÷મું
# સ્વસ્થાને ;
<data>•સ્વ•સ્થા•ને•</data>
# પ્રશ્નાર્થ ;
<data>•પ્ર•શ્ના•ર્થ•</data>
# વર્તમાનકૃદંત ;
<data>•વ•ર્ત•મા•ન•કૃ•દં•ત•</data>
# વાક્યની ;
<data>•વા•ક્ય•ની•</data>
# સાર્વજનિક ;
<data>•સા•ર્વ•જ•નિ•ક•</data>
# સમાપ્તિ ;
<data>•સ•મા•પ્તિ•</data>
# પધાર્યા ;
<data>•પ•ધા•ર્યા•</data>
# વ્યક્તિત્વને ;
<data>•વ્ય•ક્તિ•ત્વ•ને•</data>
# આત્મવિશ્વાસ ;
<data>•આ•ત્મ•વિ•શ્વા•સ•</data>
# વ્યાયામથી ;
<data>•વ્યા•યા•મ•થી•</data>
# યુક્ત ;
<data>•યુ•ક્ત•</data>
# #18 ; પુરુષોત્તમ ;
<data>•પ•રુ•ષો•ત્ત•મ•</data>
# કેન્દ્રીય ;
<data>•કે•ન્દ્રી•ય•</data>
# ક્ષત્રિય ;
<data>•ક્ષ•ત્રિ•ય•</data>
# ફોર્મ્યુલા ;
<data>•ફો•ર્મ્યુ•લા•</data>
# કેન્દ્રમાં ;
<data>•કે•ન્દ્ર•માં•</data>
# સ્પ્રિંગ ;
<data>•સ્પ્રિં•ગ•</data>
# પ્રારંભ ;
<data>•પ્રા•રં•ભ•</data>
# વિદ્યાર્થીઓ ;
<data>•વિ•દ્યા•ર્થી•ઓ•</data>
# સ્વાર્થની ;
<data>•સ્વા•ર્થ•ની•</data>
# લોન્ગયરબ્યેન ;
<data>•લો•ન્ગ•ય•ર•બ્યે•ન•</data>
# સ્થાનિક ;
<data>•સ્થા•નિ•ક•</data>
# બિલ્ડિંગની ;
<data>•બિ•લ્ડિં•ગ•ની•</data>
# ઉત્પાદક ;
<data>•ઉ•ત્પા•દ•ક•</data>
# ઝૂકાવ્યું ;
<data>•ઝૂ•કા•વ્યું•</data>
# પ્રપૌત્ર ;
<data>•પ્ર•પૌ•ત્ર•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Malayalam.txt
#
# സ്ഥാനമൊഴിയുക ;
<data>•സ്ഥാ•ന•മൊ•ഴി•യു•ക•</data>
# വ്യക്തി ;
<data>•വ്യ•ക്തി•</data>
# കൗമാരക്കാരി ;
<data>•കൗ•മാ•ര•ക്കാ•രി•</data>
# കല്യാണം ;
<data>•ക•ല്യാ•ണം•</data>
# റദ്ദാക്കിയ ;
<data>•റ•ദ്ദാ•ക്കി•യ•</data>
# വ്യാപിക്കുക ;
<data>•വ്യാ•പി•ക്കു•ക•</data>
# സ്തുതി ;
<data>•സ്തു•തി•</data>
# ഭക്ഷ്യസുരക്ഷ ;
<data>•ഭ•ക്ഷ്യ•സു•ര•ക്ഷ•</data>
# പൂഴ്ത്തിവെക്കുക ;
<data>•പൂ•ഴ്ത്തി•വെ•ക്കു•ക•</data>
# നിശ്ചയിച്ച ;
<data>•നി•ശ്ച•യി•ച്ച•</data>
# പ്രശ്നം ;
<data>•പ്ര•ശ്നം•</data>
# സംസ്ഥാനം ;
<data>•സം•സ്ഥാ•നം•</data>
# പ്രോത്സാഹം ;
<data>•പ്രോ•ത്സാ•ഹം•</data>
# ഉദ്യോഗസ്ഥ ;
<data>•ഉ•ദ്യോ•ഗ•സ്ഥ•</data>
# സ്ഥാപനം ;
<data>•സ്ഥാ•പ•നം•</data>
# അത്ഭുതം ;
<data>•അ•ത്ഭു•തം•</data>
# പ്രഖ്യാപനം ;
<data>•പ്ര•ഖ്യാ•പ•നം•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Odia.txt
#
# ଅସ୍ବରାନ୍ତ ;
<data>•ଅ•ସ୍ବ•ରା•ନ୍ତ•</data>
# ଅକର୍ମଣ୍ୟତା ;
<data>•ଅ•କ•ର୍ମ•ଣ୍ୟ•ତା•</data>
# ଅକର୍ମା ;
<data>•ଅ•କ•ର୍ମା•</data>
# ଆକର୍ଣ୍ଣ ;
<data>•ଆ•କ•ର୍ଣ୍ଣ•</data>
# ଆକସ୍ମିକୀ ;
<data>•ଆ•କ•ସ୍ମି•କୀ•</data>
# ଇଞ୍ଚମନୌତୀ ;
<data>•ଇ•ଞ୍ଚ•ମ•ନୌ•ତୀ•</data>
# ଅଗତ୍ୟା ;
<data>•ଅ•ଗ•ତ୍ୟା•</data>
# ଇନ୍ଦ୍ରଧ୍ବଜ ;
<data>•ଇ•ନ୍ଦ୍ର•ଧ୍ବ•ଜ•</data>
# ଊରୁତ୍ରାଣ ;
<data>•ଊ•ରୁ•ତ୍ରା•ଣ•</data>
# ଐଶ୍ବର୍ଯ୍ୟ ;
<data>•ଐ•ଶ୍ବ•ର୍ଯ୍ୟ•</data>
# ଅଗତ୍ୟା ;
<data>•ଅ•ଗ•ତ୍ୟା•</data>
# ଔପନ୍ୟାସିକ ;
<data>•ଔ•ପ•ନ୍ୟା•ସି•କ•</data>
# ଔଷ୍ଠ୍ୟ ;
<data>•ଔ•ଷ୍ଠ୍ୟ•</data>
# ଯଜୁର୍ବେଦୀ ;
<data>•ଯ•ଜୁ•ର୍ବେ•ଦୀ•</data>
# ପକ୍ଷ୍ମ ;
<data>•ପ•କ୍ଷ୍ମ•</data>
# ପଞ୍ଚଭୌତିକ ;
<data>•ପ•ଞ୍ଚ•ଭୌ•ତି•କ•</data>
# ତନ୍ତ୍ରିକାତନ୍ତ୍ର ;
<data>•ତ•ନ୍ତ୍ରି•କା•ତ•ନ୍ତ୍ର•</data>
# ସ୍ନାୟୁତନ୍ତ୍ର ;
<data>•ସ୍ନା•ୟୁ•ତ•ନ୍ତ୍ର•</data>
# ତପ୍ତକୁଣ୍ଡ ;
<data>•ତ•ପ୍ତ•କୁ•ଣ୍ଡ•</data>
# ଚଣ୍ଡୋଦରୀ ;
<data>•ଚ•ଣ୍ଡୋ•ଦ•ରୀ•</data>
# ଝଙ୍କାର ;
<data>•ଝ•ଙ୍କା•ର•</data>
# କଙ୍କପୃଷ୍ଠୀ ;
<data>•କ•ଙ୍କ•ପୃ•ଷ୍ଠୀ•</data>
# ଖଣ୍ଡନୀୟ ;
<data>•ଖ•ଣ୍ଡ•ନୀ•ୟ•</data>
# ଖମ୍ଭାବତୀ ;
<data>•ଖ•ମ୍ଭା•ବ•ତୀ•</data>
# ଘାଣେନ୍ଦ୍ରିୟ ;
<data>•ଘା•ଣେ•ନ୍ଦ୍ରି•ୟ•</data>
# ଘୁଞ୍ଚାଇବା ;
<data>•ଘୁ•ଞ୍ଚା•ଇ•ବା•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Telugu.txt
#
# అదృశ్యం ;
<data>•అ•దృ•శ్యం•</data>
# ఉద్యోగాలు ;
<data>•ఉ•ద్యో•గా•లు•</data>
# ఉన్నాయన్న ;
<data>•ఉ•న్నా•య•న్న•</data>
# కార్యదర్శి ;
<data>•కా•ర్య•ద•ర్శి•</data>
# సామర్థ్యం ;
<data>•సా•మ•ర్థ్యం•</data>
# అభిప్రాయం ;
<data>•అ•భి•ప్రా•యం•</data>
# రాష్ట్రస్థాయి ;
<data>•రా•ష్ట్ర•స్థా•యి•</data>
# నిర్లక్ష్యం ;
<data>•ని•ర్ల•క్ష్యం•</data>
# వ్యాజ్యాలన్నీ ;
<data>•వ్యా•జ్యా•ల•న్నీ•</data>
# న్యాయవ్యవస్థ ;
<data>•న్యా•య•వ్య•వ•స్థ•</data>
# వ్యాఖ్యలు ;
<data>•వ్యా•ఖ్య•లు•</data>
# నేతృత్వం ;
<data>•నే•తృ•త్వం•</data>
# ఉద్రిక్తత ;
<data>•ఉ•ద్రి•క్త•త•</data>
# వ్యాఖ్యలు ;
<data>•వ్యా•ఖ్య•లు•</data>
# అత్యున్నత ;
<data>•అ•త్యు•న్న•త•</data>
# మనస్పర్ధలు ;
<data>•మ•న•స్ప•ర్ధ•లు•</data>
# కార్యక్రమం ;
<data>•కా•ర్య•క్ర•మం•</data>
# గుప్పిస్తున్నారు ;
<data>•గు•ప్పి•స్తు•న్నా•రు•</data>
# నటిస్తున్నారు ;
<data>•న•టి•స్తు•న్నా•రు•</data>
# ద్వితీయార్ధం ;
<data>•ద్వి•తీ•యా•ర్ధం•</data>
# జీర్ణవ్యవస్థ ;
<data>•జీ•ర్ణ•వ్య•వ•స్థ•</data>
# ఉత్సాహం ;
<data>•ఉ•త్సా•హం•</data>
# హృద్రోగాలు ;
<data>•హృ•ద్రో•గా•లు•</data>
# పాల్గొనాల్సింది ;
<data>•పా•ల్గొ•నా•ల్సిం•ది•</data>
# మార్గదర్శకాలు ;
<data>•మా•ర్గ•ద•ర్శ•కా•లు•</data>
########################################################################################
#

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:daf3da9b26c03ccb859820cecb59e45827db628cc63730995287f0eecb648b1c
size 12842333
oid sha256:5c8773434e9708bca02ad11319c35e01f29f62748851a38ae89de1334c279cca
size 12842785

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e20cb098cebe9ec0c5ee25c3b28d1918fd4147525e8f893a9dcaa763b962409b
size 94073
oid sha256:744f6e6d4a252e51b13ac3c0b9a580e21ba469cd2959b2561e3022636ecf126c
size 94060

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3a09da92d612c34c7f468f073b356c7323e14f0fb53b0eb34483beed0a296ac4
size 723338
oid sha256:bcb07f3738f1e8c216ac47e9b091a0946e107e3f74251e6be3a21adba8dd71c4
size 723370

View File

@ -93,6 +93,9 @@ public class RBBITestMonkey extends TestFmwk {
UnicodeSet fHangulSet;
UnicodeSet fZWJSet;
UnicodeSet fExtendedPictSet;
UnicodeSet fViramaSet;
UnicodeSet fLinkingConsonantSet;
UnicodeSet fExtCccZwjSet;
UnicodeSet fAnySet;
@ -122,6 +125,11 @@ public class RBBITestMonkey extends TestFmwk {
fHangulSet.addAll(fLVTSet);
fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");
fViramaSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Virama}]");
fLinkingConsonantSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Consonant}]");
fExtCccZwjSet = new UnicodeSet("[[\\p{gcb=Extend}-\\p{ccc=0}] \\p{gcb=ZWJ}]");
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]");
@ -138,6 +146,9 @@ public class RBBITestMonkey extends TestFmwk {
fSets.add(fAnySet);
fSets.add(fZWJSet);
fSets.add(fExtendedPictSet);
fSets.add(fViramaSet);
fSets.add(fLinkingConsonantSet);
fSets.add(fExtCccZwjSet);
}
@ -253,6 +264,22 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
// Rule (GB9.3) LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
// Note: Viramas are also included in the ExtCccZwj class.
if (fLinkingConsonantSet.contains(c2)) {
int pi = p1;
boolean sawVirama = false;
while (pi > 0 && fExtCccZwjSet.contains(fText.codePointAt(pi))) {
if (fViramaSet.contains(fText.codePointAt(pi))) {
sawVirama = true;
}
pi = fText.offsetByCodePoints(pi, -1);
}
if (sawVirama && fLinkingConsonantSet.contains(fText.codePointAt(pi))) {
continue;
}
}
// Rule (GB11) Extended_Pictographic ZWJ x Extended_Pictographic
if (fExtendedPictSet.contains(cBase) && fZWJSet.contains(c1) && fExtendedPictSet.contains(c2) ) {
continue;

View File

@ -37,6 +37,13 @@ LVT = [\p{Grapheme_Cluster_Break = LVT}];
Extended_Pict = [:ExtPict:];
# Indic Sequences
Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
GB5: . ÷ (Control | CR | LF);
@ -46,6 +53,7 @@ GB7: (LV | V) (V | T);
GB8: (LVT | T) T;
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;

View File

@ -164,6 +164,498 @@
#
#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
#
# ICU-13637 and CLDR-10994 - Indic Grapheme Cluster Boundary changes to support aksaras
# New rule: LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
# Sample Chars: LinkingConsonant: \u0915
# Virama: \u094d [also Extend]
# ExtCccZWJ: \u0308
# Extend but not ExtCCCZWJ \u093A
<char>
<data>•\u0915\u094d\u0915•</data>
<data>•\u0915\u0308\u0308\u094d\u0308\u0308\u0915•</data>
<data>•\u0915\u0308\u0308\u094d\u0308\u0308•\u0041•</data>
<data>•\u0915\u0308\u0308\u094d\u093A\u093A•\u0915•</data>
#
# From cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Bengali.txt
#
# ব্যক্তিত্বের ;
<data>•ব্য•ক্তি•ত্বে•র•</data>
# আত্মবিশ্বাস ;
<data>•আ•ত্ম•বি•শ্বা•স•</data>
# ব্যাক্টেরিয়া ;
<data>•ব্যা•ক্টে•রি•য়া•</data>
# সমস্যার ;
<data>•স•ম•স্যা•র•</data>
# মিশ্রণ ;
<data>•মি•শ্র•ণ•</data>
# দুর্গন্ধ ;
<data>•দু•র্গ•ন্ধ•</data>
# পরীক্ষার ;
<data>•প•রী•ক্ষা•র•</data>
# কোলেস্টেরল ;
<data>•কো•লে•স্টে•র•ল•</data>
# ব্যায়ামকে ;
<data>•ব্যা•য়া•ম•কে•</data>
# সপ্তাহে ;
<data>•স•প্তা•হে•</data>
# পরীক্ষার ;
<data>•প•রী•ক্ষা•র•</data>
# চর্বিজাতীয় ;
<data>•চ•র্বি•জা•তী•য়•</data>
# নিয়ণ্ত্রণ ;
<data>•নি•য়•ণ্ত্র•ণ•</data>
# অবশ্যই ;
<data>•অ•ব•শ্য•ই•</data>
# নয়াদিল্লির ;
<data>•ন•য়া•দি•ল্লি•র•</data>
# সমীক্ষাটা ;
<data>•স•মী•ক্ষা•টা•</data>
# #ভূমিকম্পের ;
# <data>•ভূ•মি•ক•ম্পের•</data> # line 17 in TestSegmenter-Bengali.txt
# কেন্দ্রীয় ;
<data>•কে•ন্দ্রী•য়•</data>
# উস্কানিই ;
<data>•উ•স্কা•নি•ই•</data>
# সমীক্ষকরা ;
<data>•স•মী•ক্ষ•ক•রা•</data>
# মুহূর্তে ;
<data>•মু•হূ•র্তে•</data>
# সম্পর্কে ;
<data>•স•ম্প•র্কে•</data>
# পৌষসংক্রান্তির ;
<data>•পৌ•ষ•সং•ক্রা•ন্তি•র•</data>
# মুখ্যমন্ত্রী ;
<data>•মু•খ্য•ম•ন্ত্রী•</data>
#
# from cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Devanagari.txt
#
# संदिग्ध ;
<data>•सं•दि•ग्ध•</data>
# सुरक्षा ;
<data>•सु•र•क्षा•</data>
# टक्कर ;
<data>•ट•क्क•र•</data>
# सत्र ;
<data>•स•त्र•</data>
# दक्षिण ;
<data>•द•क्षि•ण•</data>
# मिश्रणाने ;
<data>•मि•श्र•णा•ने•</data>
# दुर्घटनाग्रस्त ;
<data>•दु•र्घ•ट•ना•ग्र•स्त•</data>
# मुहूर्त ;
<data>•मु•हू•र्त•</data>
# शर्करायुक्त ;
<data>•श•र्क•रा•यु•क्त•</data>
# अंतरराष्ट्रीय ;
<data>•अं•त•र•रा•ष्ट्री•य•</data>
# राष्ट्रपति ;
<data>•रा•ष्ट्र•प•ति•</data>
# फ्रांस ;
<data>•फ्रां•स•</data>
# ट्रैक्टर ;
<data>•ट्रै•क्ट•र•</data>
# सिट्रोनेलाचे ;
<data>•सि•ट्रो•ने•ला•चे•</data>
# टुक्रालाई ;
<data>•टु•क्रा•ला•ई•</data>
# इकट्ठा ;
<data>•इ•क•ट्ठा•</data>
# शास्त्र ;
<data>•शा•स्त्र•</data>
# स्त्री ;
<data>•स्त्री•</data>
# लक्ष्य ;
<data>•ल•क्ष्य•</data>
# तीक्ष्ण ;
<data>•ती•क्ष्ण•</data>
# Words ;
<data>•W•o•r•d•s•</data>
# त्रिवेदी ;
<data>•त्रि•वे•दी•</data>
# कृत्रिम ;
<data>•कृ•त्रि•म•</data>
# मात्रामा ;
<data>•मा•त्रा•मा•</data>
# सिद्धार्थनगर ;
<data>•सि•द्धा•र्थ•न•ग•र•</data>
# श्रद्धालुओं ;
<data>•श्र•द्धा•लु•ओं•</data>
# वृद्धिसँग ;
<data>•वृ•द्धि•सँ•ग•</data>
# अंतःज्ञानी ;
<data>•अं•तः•ज्ञा•नी••</data>
# गन्नदी॑धिम ;
<data>•ग•न्न•दी॑•धि•म•</data>
# प्प्रप॑द्ये॒ ;
<data>•प्प्र•प॑•द्ये॒•</data>
# मनस्तापः ;
<data>•म•न•स्ता•पः•</data>
# हविष्करोमि ;
<data>•ह•वि•ष्क•रो•मि•</data>
# अहर्पतिः ;
<data>•अ•ह•र्प•तिः•</data>
# गच्छति ;
<data>•ग•च्छ•ति•</data>
# अयम् ;
<data>•अ•य•म्•</data>
# शिवश्चोदति ;
<data>•शि•व•श्चो•द•ति•</data>
# मनष्टालयति ;
<data>•म•न•ष्टा•ल•य•ति•</data>
# अश्वष्ठक्कस्य ;
<data>•अ•श्व•ष्ठ•क्क•स्य•</data>
# दुष्पुत्रः ;
<data>•दु•ष्पु•त्रः•</data>
# द्विःपक्वम् ;
<data>•द्विः•प•क्व•म्•</data>
# द्विष्कामः ;
<data>•द्वि•ष्का•मः•</data>
# भर्तुर्भोगः ;
<data>•भ•र्तु•र्भो•गः•</data>
# शॆत्युल ;
<data>•शॆ•त्यु•ल••</data>
# महारॆन्य ;
<data>•म•हा•रॆ•न्य•</data>
# सॆक्युल ;
<data>•सॆ•क्यु•ल•</data>
# ल्यॊदुर ;
<data>•ल्यॊ•दु•र•</data>
# फयॊक ;
<data>•फ•यॊ•क•</data>
# मॊहन्युव ;
<data>•मॊ•ह•न्यु•व•</data>
# अन्यर ;
<data>•अ•न्य•र•</data>
# ख्वजि ;
<data>•ख्व•जि•</data>
# खॅरिन्य ;
<data>•खॅ•रि•न्य•</data>
# उच्छ्वास ;
<data>•उ•च्छ्वा•स•</data>
# व्यक्तिमत्व ;
<data>•व्य•क्ति•म•त्व•</data>
# दातांच्यामध्ये ;
<data>•दा•तां•च्या•म•ध्ये•</data>
# दुर्गंधी ;
<data>•दु•र्गं•धी•</data>
# दुर्गंधीपासूनसुद्धा ;
<data>•दु•र्गं•धी•पा•सू•न•सु•द्धा•</data>
# नित्यकर्मामध्ये ;
<data>•नि•त्य•क•र्मा•म•ध्ये•</data>
# आजारांपासूनसुद्धा ;
<data>•आ•जा•रां•पा•सू•न•सु•द्धा•</data>
# भाज्यांमध्ये ;
<data>•भा•ज्यां•म•ध्ये•</data>
# उच्छ्वासाच्या ;
<data>•उ•च्छ्वा•सा•च्या•</data>
# सुकिल्लीं ;
<data>•सु•कि•ल्लीं•</data>
# स्लिपां ;
<data>•स्लि•पां•</data>
# मिसळिल्ल्यान ;
<data>•मि•स•ळि•ल्ल्या•न•</data>
# रोंप्यांची ;
<data>•रों•प्यां•ची•</data>
# वर्सांतल्यान ;
<data>•व•र्सां•त•ल्या•न•</data>
# रोंप्याच्या ;
<data>•रों•प्या•च्या•</data>
# नाशिल्ल्यान ;
<data>•ना•शि•ल्ल्या•न•</data>
# जिल्ल्याच्या ;
<data>•जि•ल्ल्या•च्या•</data>
# कुरुक्षेत्रांतल्या ;
<data>•कु•रु•क्षे•त्रां•त•ल्या•</data>
# भाज्ज्यांची ;
<data>•भा•ज्ज्यां•ची•</data>
# सिट्रोनेलाका ;
<data>•सि•ट्रो•ने•ला•का•</data>
# गरिनुपर्छ ;
<data>•ग•रि•नु•प•र्छ•</data>
# सामान्यतः ;
<data>•सा•मा•न्य•तः•</data>
# वृद्धिसँग ;
<data>•वृ•द्धि•सँ•ग•</data>
# रिपोर्टनि ;
<data>•रि•पो•र्ट•नि•</data>
# टोस्टर्ज़ ;
<data>•टो•स्ट•र्ज़•</data>
# वक्तव्य ;
<data>•व•क्त•व्य•</data>
# प्रक्रिया ;
<data>•प्र•क्रि•या•</data>
# निर्दिष्ट ;
<data>•नि•र्दि•ष्ट•</data>
# अस्वीकृत ;
<data>•अ•स्वी•कृ•त•</data>
# प्रयोक्ता ;
<data>•प्र•यो•क्ता•</data>
# प्रकार्यक ;
<data>•प्र•का•र्य•क•</data>
# ट्रेक्टरु ;
<data>•ट्रे•क्ट•रु•</data>
# स्थानधारक ;
<data>•स्था•न•धा•र•क•</data>
# प्रकार्यक ;
<data>•प्र•का•र्य•क•</data>
# अनुच्छेदसँ ;
<data>•अ•नु•च्छे•द•सँ•</data>
# गर्मीपदु ;
<data>•ग•र्मी•प•दु•</data>
# शास्त्रु ;
<data>•शा•स्त्रु•</data>
# इन्द्री ;
<data>•इ•न्द्री•</data>
# श्रधालू ;
<data>•श्र•धा•लू•</data>
# आस्तिकु ;
<data>•आ•स्ति•कु•</data>
# सकार्थो ;
<data>•स•का•र्थो•</data>
# सन्ॿंधु ;
<data>•स•न्ॿं•धु•</data>
# मनुक्खो ;
<data>•म•नु•क्खो•</data>
# हानिफ्राय ;
<data>•हा•नि•फ्रा•य•</data>
# दैथाइहरग्रा ;
<data>•दै•था•इ•ह•र•ग्रा•</data>
# बोसोरब्रै ;
<data>•बो•सो•र•ब्रै•</data>
# रांखान्थियारि ;
<data>•रां•खा•न्थि•या•रि•</data>
# खान्थियाव ;
<data>•खा•न्थि•या•व•</data>
# स्लिप्स ;
<data>•स्लि•प्स•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Gujarati.txt
#
# અંગ્રેજી ;
<data>•અં•ગ્રે•જી•</data>
# શબ્દકોશ ;
<data>•શ•બ્દ•કો•શ•</data>
# બાપ્તિસ્મા ;
<data>•બા•પ્તિ•સ્મા•</data>
# મિસ્ત્રી ;
<data>•મિ•સ્ત્રી•</data>
# સિક્કા ;
<data>•સિ•ક્કા•</data>
#6 ; એકત્રીસમું ; એ÷કત્રી÷સ÷મું
# સ્વસ્થાને ;
<data>•સ્વ•સ્થા•ને•</data>
# પ્રશ્નાર્થ ;
<data>•પ્ર•શ્ના•ર્થ•</data>
# વર્તમાનકૃદંત ;
<data>•વ•ર્ત•મા•ન•કૃ•દં•ત•</data>
# વાક્યની ;
<data>•વા•ક્ય•ની•</data>
# સાર્વજનિક ;
<data>•સા•ર્વ•જ•નિ•ક•</data>
# સમાપ્તિ ;
<data>•સ•મા•પ્તિ•</data>
# પધાર્યા ;
<data>•પ•ધા•ર્યા•</data>
# વ્યક્તિત્વને ;
<data>•વ્ય•ક્તિ•ત્વ•ને•</data>
# આત્મવિશ્વાસ ;
<data>•આ•ત્મ•વિ•શ્વા•સ•</data>
# વ્યાયામથી ;
<data>•વ્યા•યા•મ•થી•</data>
# યુક્ત ;
<data>•યુ•ક્ત•</data>
# #18 ; પુરુષોત્તમ ;
<data>•પ•રુ•ષો•ત્ત•મ•</data>
# કેન્દ્રીય ;
<data>•કે•ન્દ્રી•ય•</data>
# ક્ષત્રિય ;
<data>•ક્ષ•ત્રિ•ય•</data>
# ફોર્મ્યુલા ;
<data>•ફો•ર્મ્યુ•લા•</data>
# કેન્દ્રમાં ;
<data>•કે•ન્દ્ર•માં•</data>
# સ્પ્રિંગ ;
<data>•સ્પ્રિં•ગ•</data>
# પ્રારંભ ;
<data>•પ્રા•રં•ભ•</data>
# વિદ્યાર્થીઓ ;
<data>•વિ•દ્યા•ર્થી•ઓ•</data>
# સ્વાર્થની ;
<data>•સ્વા•ર્થ•ની•</data>
# લોન્ગયરબ્યેન ;
<data>•લો•ન્ગ•ય•ર•બ્યે•ન•</data>
# સ્થાનિક ;
<data>•સ્થા•નિ•ક•</data>
# બિલ્ડિંગની ;
<data>•બિ•લ્ડિં•ગ•ની•</data>
# ઉત્પાદક ;
<data>•ઉ•ત્પા•દ•ક•</data>
# ઝૂકાવ્યું ;
<data>•ઝૂ•કા•વ્યું•</data>
# પ્રપૌત્ર ;
<data>•પ્ર•પૌ•ત્ર•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Malayalam.txt
#
# സ്ഥാനമൊഴിയുക ;
<data>•സ്ഥാ•ന•മൊ•ഴി•യു•ക•</data>
# വ്യക്തി ;
<data>•വ്യ•ക്തി•</data>
# കൗമാരക്കാരി ;
<data>•കൗ•മാ•ര•ക്കാ•രി•</data>
# കല്യാണം ;
<data>•ക•ല്യാ•ണം•</data>
# റദ്ദാക്കിയ ;
<data>•റ•ദ്ദാ•ക്കി•യ•</data>
# വ്യാപിക്കുക ;
<data>•വ്യാ•പി•ക്കു•ക•</data>
# സ്തുതി ;
<data>•സ്തു•തി•</data>
# ഭക്ഷ്യസുരക്ഷ ;
<data>•ഭ•ക്ഷ്യ•സു•ര•ക്ഷ•</data>
# പൂഴ്ത്തിവെക്കുക ;
<data>•പൂ•ഴ്ത്തി•വെ•ക്കു•ക•</data>
# നിശ്ചയിച്ച ;
<data>•നി•ശ്ച•യി•ച്ച•</data>
# പ്രശ്നം ;
<data>•പ്ര•ശ്നം•</data>
# സംസ്ഥാനം ;
<data>•സം•സ്ഥാ•നം•</data>
# പ്രോത്സാഹം ;
<data>•പ്രോ•ത്സാ•ഹം•</data>
# ഉദ്യോഗസ്ഥ ;
<data>•ഉ•ദ്യോ•ഗ•സ്ഥ•</data>
# സ്ഥാപനം ;
<data>•സ്ഥാ•പ•നം•</data>
# അത്ഭുതം ;
<data>•അ•ത്ഭു•തം•</data>
# പ്രഖ്യാപനം ;
<data>•പ്ര•ഖ്യാ•പ•നം•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Odia.txt
#
# ଅସ୍ବରାନ୍ତ ;
<data>•ଅ•ସ୍ବ•ରା•ନ୍ତ•</data>
# ଅକର୍ମଣ୍ୟତା ;
<data>•ଅ•କ•ର୍ମ•ଣ୍ୟ•ତା•</data>
# ଅକର୍ମା ;
<data>•ଅ•କ•ର୍ମା•</data>
# ଆକର୍ଣ୍ଣ ;
<data>•ଆ•କ•ର୍ଣ୍ଣ•</data>
# ଆକସ୍ମିକୀ ;
<data>•ଆ•କ•ସ୍ମି•କୀ•</data>
# ଇଞ୍ଚମନୌତୀ ;
<data>•ଇ•ଞ୍ଚ•ମ•ନୌ•ତୀ•</data>
# ଅଗତ୍ୟା ;
<data>•ଅ•ଗ•ତ୍ୟା•</data>
# ଇନ୍ଦ୍ରଧ୍ବଜ ;
<data>•ଇ•ନ୍ଦ୍ର•ଧ୍ବ•ଜ•</data>
# ଊରୁତ୍ରାଣ ;
<data>•ଊ•ରୁ•ତ୍ରା•ଣ•</data>
# ଐଶ୍ବର୍ଯ୍ୟ ;
<data>•ଐ•ଶ୍ବ•ର୍ଯ୍ୟ•</data>
# ଅଗତ୍ୟା ;
<data>•ଅ•ଗ•ତ୍ୟା•</data>
# ଔପନ୍ୟାସିକ ;
<data>•ଔ•ପ•ନ୍ୟା•ସି•କ•</data>
# ଔଷ୍ଠ୍ୟ ;
<data>•ଔ•ଷ୍ଠ୍ୟ•</data>
# ଯଜୁର୍ବେଦୀ ;
<data>•ଯ•ଜୁ•ର୍ବେ•ଦୀ•</data>
# ପକ୍ଷ୍ମ ;
<data>•ପ•କ୍ଷ୍ମ•</data>
# ପଞ୍ଚଭୌତିକ ;
<data>•ପ•ଞ୍ଚ•ଭୌ•ତି•କ•</data>
# ତନ୍ତ୍ରିକାତନ୍ତ୍ର ;
<data>•ତ•ନ୍ତ୍ରି•କା•ତ•ନ୍ତ୍ର•</data>
# ସ୍ନାୟୁତନ୍ତ୍ର ;
<data>•ସ୍ନା•ୟୁ•ତ•ନ୍ତ୍ର•</data>
# ତପ୍ତକୁଣ୍ଡ ;
<data>•ତ•ପ୍ତ•କୁ•ଣ୍ଡ•</data>
# ଚଣ୍ଡୋଦରୀ ;
<data>•ଚ•ଣ୍ଡୋ•ଦ•ରୀ•</data>
# ଝଙ୍କାର ;
<data>•ଝ•ଙ୍କା•ର•</data>
# କଙ୍କପୃଷ୍ଠୀ ;
<data>•କ•ଙ୍କ•ପୃ•ଷ୍ଠୀ•</data>
# ଖଣ୍ଡନୀୟ ;
<data>•ଖ•ଣ୍ଡ•ନୀ•ୟ•</data>
# ଖମ୍ଭାବତୀ ;
<data>•ଖ•ମ୍ଭା•ବ•ତୀ•</data>
# ଘାଣେନ୍ଦ୍ରିୟ ;
<data>•ଘା•ଣେ•ନ୍ଦ୍ରି•ୟ•</data>
# ଘୁଞ୍ଚାଇବା ;
<data>•ଘୁ•ଞ୍ଚା•ଇ•ବା•</data>
#
# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Telugu.txt
#
# అదృశ్యం ;
<data>•అ•దృ•శ్యం•</data>
# ఉద్యోగాలు ;
<data>•ఉ•ద్యో•గా•లు•</data>
# ఉన్నాయన్న ;
<data>•ఉ•న్నా•య•న్న•</data>
# కార్యదర్శి ;
<data>•కా•ర్య•ద•ర్శి•</data>
# సామర్థ్యం ;
<data>•సా•మ•ర్థ్యం•</data>
# అభిప్రాయం ;
<data>•అ•భి•ప్రా•యం•</data>
# రాష్ట్రస్థాయి ;
<data>•రా•ష్ట్ర•స్థా•యి•</data>
# నిర్లక్ష్యం ;
<data>•ని•ర్ల•క్ష్యం•</data>
# వ్యాజ్యాలన్నీ ;
<data>•వ్యా•జ్యా•ల•న్నీ•</data>
# న్యాయవ్యవస్థ ;
<data>•న్యా•య•వ్య•వ•స్థ•</data>
# వ్యాఖ్యలు ;
<data>•వ్యా•ఖ్య•లు•</data>
# నేతృత్వం ;
<data>•నే•తృ•త్వం•</data>
# ఉద్రిక్తత ;
<data>•ఉ•ద్రి•క్త•త•</data>
# వ్యాఖ్యలు ;
<data>•వ్యా•ఖ్య•లు•</data>
# అత్యున్నత ;
<data>•అ•త్యు•న్న•త•</data>
# మనస్పర్ధలు ;
<data>•మ•న•స్ప•ర్ధ•లు•</data>
# కార్యక్రమం ;
<data>•కా•ర్య•క్ర•మం•</data>
# గుప్పిస్తున్నారు ;
<data>•గు•ప్పి•స్తు•న్నా•రు•</data>
# నటిస్తున్నారు ;
<data>•న•టి•స్తు•న్నా•రు•</data>
# ద్వితీయార్ధం ;
<data>•ద్వి•తీ•యా•ర్ధం•</data>
# జీర్ణవ్యవస్థ ;
<data>•జీ•ర్ణ•వ్య•వ•స్థ•</data>
# ఉత్సాహం ;
<data>•ఉ•త్సా•హం•</data>
# హృద్రోగాలు ;
<data>•హృ•ద్రో•గా•లు•</data>
# పాల్గొనాల్సింది ;
<data>•పా•ల్గొ•నా•ల్సిం•ది•</data>
# మార్గదర్శకాలు ;
<data>•మా•ర్గ•ద•ర్శ•కా•లు•</data>
########################################################################################
#