ICU-13630 Update RBBI for Unicode 11 data.
X-SVN-Rev: 41194
This commit is contained in:
parent
03303a6cb6
commit
4e097dc49f
@ -20,6 +20,9 @@
|
||||
$CR = [\p{Grapheme_Cluster_Break = CR}];
|
||||
$LF = [\p{Grapheme_Cluster_Break = LF}];
|
||||
$Control = [[\p{Grapheme_Cluster_Break = Control}]];
|
||||
# TODO: Enable Virama & LinkingConsonant definitions once rule builder allows empty sets.
|
||||
#$Virama = [[\p{Grapheme_Cluster_Break = Virama}]];
|
||||
#$LinkingConsonant = [[\p{Grapheme_Cluster_Break = LinkingConsonant}]];
|
||||
$Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
|
||||
$ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
|
||||
$Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
|
||||
@ -39,7 +42,6 @@ $LVT = [\p{Grapheme_Cluster_Break = LVT}];
|
||||
# Emoji defintions
|
||||
|
||||
$Extended_Pict = [:ExtPict:];
|
||||
$EmojiNRK = [[\p{Emoji}] - [\p{Grapheme_Cluster_Break = Regional_Indicator}*\u00230-9©®™〰〽]];
|
||||
|
||||
## -------------------------------------------------
|
||||
!!chain;
|
||||
@ -62,7 +64,7 @@ $L ($L | $V | $LV | $LVT);
|
||||
$Prepend [^$Control $CR $LF];
|
||||
|
||||
# GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
|
||||
($Extended_Pict | $EmojiNRK) $Extend* $ZWJ ($Extended_Pict | $EmojiNRK);
|
||||
$Extended_Pict $Extend* $ZWJ $Extended_Pict;
|
||||
|
||||
# GB 12-13. Keep pairs of regional indicators together
|
||||
# Note that hard break '/' rule triggers only if there are three or more initial RIs,
|
||||
@ -77,4 +79,4 @@ $Prepend [^$Control $CR $LF];
|
||||
|
||||
!!safe_reverse;
|
||||
$Regional_Indicator $Regional_Indicator;
|
||||
($Extend | $ZWJ | $EmojiNRK | $Extended_Pict)+ .;
|
||||
($Extend | $ZWJ | $Extended_Pict)+ .;
|
||||
|
@ -46,8 +46,8 @@ $MidLetter = [\p{Word_Break = MidLetter}];
|
||||
$MidNum = [\p{Word_Break = MidNum}];
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]];
|
||||
$Extended_Pict = [:ExtPict:];
|
||||
$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]];
|
||||
|
||||
$Han = [:Han:];
|
||||
$Hiragana = [:Hiragana:];
|
||||
@ -101,8 +101,11 @@ $CR $LF;
|
||||
|
||||
# Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed.
|
||||
#
|
||||
$ZWJ ($Extended_Pict | $EmojiNRK);
|
||||
$ZWJ $Extended_Pict;
|
||||
|
||||
# Rule 3d - Keep horizontal whitespace together.
|
||||
#
|
||||
$WSegSpace $WSegSpace;
|
||||
|
||||
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
|
||||
# of a region of Text. The rule here comes into play when the start of text
|
||||
|
@ -46,8 +46,8 @@ $MidLetter = [\p{Word_Break = MidLetter} - [\:]];
|
||||
$MidNum = [\p{Word_Break = MidNum} [.]];
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]];
|
||||
$Extended_Pict = [:ExtPict:];
|
||||
$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]];
|
||||
|
||||
$Han = [:Han:];
|
||||
$Hiragana = [:Hiragana:];
|
||||
@ -101,8 +101,11 @@ $CR $LF;
|
||||
|
||||
# Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed.
|
||||
#
|
||||
$ZWJ ($Extended_Pict | $EmojiNRK);
|
||||
$ZWJ $Extended_Pict;
|
||||
|
||||
# Rule 3d - Keep horizontal whitespace together.
|
||||
#
|
||||
$WSegSpace $WSegSpace;
|
||||
|
||||
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
|
||||
# of a region of Text. The rule here comes into play when the start of text
|
||||
|
@ -1585,11 +1585,7 @@ private:
|
||||
UnicodeSet *fLVSet;
|
||||
UnicodeSet *fLVTSet;
|
||||
UnicodeSet *fHangulSet;
|
||||
UnicodeSet *fEmojiBaseSet;
|
||||
UnicodeSet *fEmojiModifierSet;
|
||||
UnicodeSet *fExtendedPictSet;
|
||||
UnicodeSet *fEBGSet;
|
||||
UnicodeSet *fEmojiNRKSet;
|
||||
UnicodeSet *fAnySet;
|
||||
|
||||
const UnicodeString *fText;
|
||||
@ -1621,12 +1617,7 @@ RBBICharMonkey::RBBICharMonkey() {
|
||||
fHangulSet->addAll(*fLVSet);
|
||||
fHangulSet->addAll(*fLVTSet);
|
||||
|
||||
fEmojiBaseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EB}]"), status);
|
||||
fEmojiModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EM}]"), status);
|
||||
fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status);
|
||||
fEBGSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EBG}]"), status);
|
||||
fEmojiNRKSet = new UnicodeSet(UNICODE_STRING_SIMPLE(
|
||||
"[[\\p{Emoji}]-[\\p{Grapheme_Cluster_Break = Regional_Indicator}*#0-9\\u00a9\\u00ae\\u2122\\u3030\\u303d]]"), status);
|
||||
fAnySet = new UnicodeSet(0, 0x10ffff);
|
||||
|
||||
fSets = new UVector(status);
|
||||
@ -1640,12 +1631,8 @@ RBBICharMonkey::RBBICharMonkey() {
|
||||
fSets->addElement(fSpacingSet, status);
|
||||
fSets->addElement(fHangulSet, status);
|
||||
fSets->addElement(fAnySet, status);
|
||||
fSets->addElement(fEmojiBaseSet, status);
|
||||
fSets->addElement(fEmojiModifierSet, status);
|
||||
fSets->addElement(fZWJSet, status);
|
||||
fSets->addElement(fExtendedPictSet, status);
|
||||
fSets->addElement(fEBGSet, status);
|
||||
fSets->addElement(fEmojiNRKSet,status);
|
||||
if (U_FAILURE(status)) {
|
||||
deferredStatus = status;
|
||||
}
|
||||
@ -1765,22 +1752,8 @@ int32_t RBBICharMonkey::next(int32_t prevPos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB10) (Emoji_Base | EBG) Extend * x Emoji_Modifier
|
||||
if ((fEmojiBaseSet->contains(c1) || fEBGSet->contains(c1)) && fEmojiModifierSet->contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
if ((fEmojiBaseSet->contains(cBase) || fEBGSet->contains(cBase)) &&
|
||||
fExtendSet->contains(c1) && fEmojiModifierSet->contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB11) (Glue_After_ZWJ | Emoji) Extend * ZWJ x (Glue_After_ZWJ | Emoji)
|
||||
if ((fExtendedPictSet->contains(c0) || fEmojiNRKSet->contains(c0)) && fZWJSet->contains(c1) &&
|
||||
(fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
if ((fExtendedPictSet->contains(cBase) || fEmojiNRKSet->contains(cBase)) && fExtendSet->contains(c0) && fZWJSet->contains(c1) &&
|
||||
(fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) {
|
||||
// Rule (GB11) Extended_Pictographic Extend * ZWJ x Extended_Pictographic
|
||||
if (fExtendedPictSet->contains(cBase) && fZWJSet->contains(c1) && fExtendedPictSet->contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1827,12 +1800,8 @@ RBBICharMonkey::~RBBICharMonkey() {
|
||||
delete fLVTSet;
|
||||
delete fHangulSet;
|
||||
delete fAnySet;
|
||||
delete fEmojiBaseSet;
|
||||
delete fEmojiModifierSet;
|
||||
delete fZWJSet;
|
||||
delete fExtendedPictSet;
|
||||
delete fEBGSet;
|
||||
delete fEmojiNRKSet;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------
|
||||
@ -1868,13 +1837,10 @@ private:
|
||||
UnicodeSet *fOtherSet;
|
||||
UnicodeSet *fExtendSet;
|
||||
UnicodeSet *fExtendNumLetSet;
|
||||
UnicodeSet *fWSegSpaceSet;
|
||||
UnicodeSet *fDictionarySet;
|
||||
UnicodeSet *fEBaseSet;
|
||||
UnicodeSet *fEBGSet;
|
||||
UnicodeSet *fEModifierSet;
|
||||
UnicodeSet *fZWJSet;
|
||||
UnicodeSet *fExtendedPictSet;
|
||||
UnicodeSet *fEmojiNRKSet;
|
||||
|
||||
const UnicodeString *fText;
|
||||
};
|
||||
@ -1902,14 +1868,10 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status);
|
||||
fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status);
|
||||
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status);
|
||||
fWSegSpaceSet = new UnicodeSet(u"[[\\p{Zs}]-[\\p{Line_Break = GL}]]", status);
|
||||
|
||||
fEBaseSet = new UnicodeSet(u"[\\p{Word_Break = EB}]", status);
|
||||
fEBGSet = new UnicodeSet(u"[\\p{Word_Break = EBG}]", status);
|
||||
fEModifierSet = new UnicodeSet(u"[\\p{Word_Break = EM}]", status);
|
||||
fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status);
|
||||
fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status);
|
||||
fEmojiNRKSet = new UnicodeSet(
|
||||
u"[[\\p{Emoji}]-[\\p{Word_Break = Regional_Indicator}*#0-9\\u00a9\\u00ae\\u2122\\u3030\\u303d]]", status);
|
||||
|
||||
fDictionarySet = new UnicodeSet(u"[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status);
|
||||
fDictionarySet->addAll(*fKatakanaSet);
|
||||
@ -1937,15 +1899,12 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
fOtherSet->removeAll(*fMidNumSet);
|
||||
fOtherSet->removeAll(*fNumericSet);
|
||||
fOtherSet->removeAll(*fExtendNumLetSet);
|
||||
fOtherSet->removeAll(*fWSegSpaceSet);
|
||||
fOtherSet->removeAll(*fFormatSet);
|
||||
fOtherSet->removeAll(*fExtendSet);
|
||||
fOtherSet->removeAll(*fRegionalIndicatorSet);
|
||||
fOtherSet->removeAll(*fEBaseSet);
|
||||
fOtherSet->removeAll(*fEBGSet);
|
||||
fOtherSet->removeAll(*fEModifierSet);
|
||||
fOtherSet->removeAll(*fZWJSet);
|
||||
fOtherSet->removeAll(*fExtendedPictSet);
|
||||
fOtherSet->removeAll(*fEmojiNRKSet);
|
||||
|
||||
// Inhibit dictionary characters from being tested at all.
|
||||
fOtherSet->removeAll(*fDictionarySet);
|
||||
@ -1969,13 +1928,10 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
fSets->addElement(fExtendSet, status);
|
||||
fSets->addElement(fOtherSet, status);
|
||||
fSets->addElement(fExtendNumLetSet, status);
|
||||
fSets->addElement(fWSegSpaceSet, status);
|
||||
|
||||
fSets->addElement(fEBaseSet, status);
|
||||
fSets->addElement(fEBGSet, status);
|
||||
fSets->addElement(fEModifierSet, status);
|
||||
fSets->addElement(fZWJSet, status);
|
||||
fSets->addElement(fExtendedPictSet, status);
|
||||
fSets->addElement(fEmojiNRKSet, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
deferredStatus = status;
|
||||
@ -2059,7 +2015,12 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
|
||||
// get the potential ZWJ, the character immediately preceding c2.
|
||||
// Sloppy UChar32 indexing: p2-1 may reference trail half
|
||||
// but char32At will get the full code point.
|
||||
if (fZWJSet->contains(fText->char32At(p2-1)) && (fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) {
|
||||
if (fZWJSet->contains(fText->char32At(p2-1)) && fExtendedPictSet->contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (3d) Keep horizontal whitespace together.
|
||||
if (fWSegSpaceSet->contains(fText->char32At(p2-1)) && fWSegSpaceSet->contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2192,15 +2153,12 @@ RBBIWordMonkey::~RBBIWordMonkey() {
|
||||
delete fFormatSet;
|
||||
delete fExtendSet;
|
||||
delete fExtendNumLetSet;
|
||||
delete fWSegSpaceSet;
|
||||
delete fRegionalIndicatorSet;
|
||||
delete fDictionarySet;
|
||||
delete fOtherSet;
|
||||
delete fEBaseSet;
|
||||
delete fEBGSet;
|
||||
delete fEModifierSet;
|
||||
delete fZWJSet;
|
||||
delete fExtendedPictSet;
|
||||
delete fEmojiNRKSet;
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,7 +35,6 @@ LVT = [\p{Grapheme_Cluster_Break = LVT}];
|
||||
|
||||
# Emoji defintions
|
||||
|
||||
EmojiNRK = [[\p{Emoji}] - [Regional_Indicator\u002a\u00230-9©®™〰〽]];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
GB3: CR LF;
|
||||
@ -46,7 +45,7 @@ GB6: L (L | V | LV | LVT);
|
||||
GB7: (LV | V) (V | T);
|
||||
GB8: (LVT | T) T;
|
||||
|
||||
GB11: (Extended_Pict | EmojiNRK) Extend* ZWJ (Extended_Pict | EmojiNRK);
|
||||
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
|
||||
GB9: . (Extend | ZWJ);
|
||||
|
||||
GB9a: . SpacingMark;
|
||||
|
@ -32,7 +32,7 @@ MidLetter = [\p{Word_Break = MidLetter}];
|
||||
MidNum = [\p{Word_Break = MidNum}];
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
EmojiNRK = [[\p{Emoji}] - [[Regional_Indicator]\u002a\u00230-9©®™〰〽]];
|
||||
WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
@ -59,7 +59,8 @@ WB3: CR LF;
|
||||
WB3a: (Newline | CR | LF) ÷;
|
||||
WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines.
|
||||
# (but needed with UAX treat-as scheme.)
|
||||
WB3c: ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB3c: ZWJ Extended_Pict;
|
||||
WB3d: WSegSpace WSegSpace;
|
||||
|
||||
WB5: AHLetter ExtFmt* AHLetter;
|
||||
|
||||
@ -81,11 +82,11 @@ WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana);
|
||||
|
||||
# WB rule 15 - 17, pairs of Regional Indicators stay unbroken.
|
||||
# Interacts with WB3c.
|
||||
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ Extended_Pict;
|
||||
WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
|
||||
|
||||
# Rule WB 999 Any ÷ Any
|
||||
# Interacts with WB3c, do not break between ZWJ and (Extended_Pict | EBG).
|
||||
WB999.1: . ExtFmt* ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB999.1: . ExtFmt* ZWJ Extended_Pict;
|
||||
WB999.2: . ExtFmt* ÷;
|
||||
|
||||
|
@ -31,7 +31,7 @@ MidLetter = [\p{Word_Break = MidLetter} - [\:]];
|
||||
MidNum = [\p{Word_Break = MidNum} [.]];
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
EmojiNRK = [[\p{Emoji}] - [[Regional_Indicator]\u002a\u00230-9©®™〰〽]];
|
||||
WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
@ -58,7 +58,8 @@ WB3: CR LF;
|
||||
WB3a: (Newline | CR | LF) ÷;
|
||||
WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines.
|
||||
# (but needed with UAX treat-as scheme.)
|
||||
WB3c: ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB3c: ZWJ Extended_Pict;
|
||||
WB3d: WSegSpace WSegSpace;
|
||||
|
||||
WB5: AHLetter ExtFmt* AHLetter;
|
||||
|
||||
@ -80,11 +81,11 @@ WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana);
|
||||
|
||||
# WB rule 15 - 17, pairs of Regional Indicators stay unbroken.
|
||||
# Interacts with WB3c.
|
||||
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ Extended_Pict;
|
||||
WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
|
||||
|
||||
# Rule WB 999 Any ÷ Any
|
||||
# Interacts with WB3c, do not break between ZWJ and (Extended_Pict | EBG).
|
||||
WB999.1: . ExtFmt* ZWJ (Extended_Pict | EmojiNRK);
|
||||
WB999.1: . ExtFmt* ZWJ Extended_Pict;
|
||||
WB999.2: . ExtFmt* ÷;
|
||||
|
||||
|
24
icu4c/source/test/testdata/rbbitst.txt
vendored
24
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -184,7 +184,7 @@
|
||||
#
|
||||
|
||||
<word>
|
||||
<data>•This<200> •is<200> •a<200> •word<200> •break<200>.• • •Isn't<200> •it<200>?• •2.25<100></data>
|
||||
<data>•This<200> •is<200> •a<200> •word<200> •break<200>.• •Isn't<200> •it<200>?• •2.25<100></data>
|
||||
|
||||
|
||||
|
||||
@ -195,7 +195,7 @@
|
||||
<data>• •\u0939\u093f\u0928\u094d\u200d\u0926\u0940<200> •\u0939\u0948<200> •\u0905\u093e\u092a<200> •\u0938\u093f\u0916\u094b\u0917\u0947<200>?•</data>
|
||||
|
||||
#Hindi Numbers
|
||||
<data>• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100> •\N{RUPEE SIGN}•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> • •\u0905\u092e\u091c<200>\n•</data>
|
||||
<data>• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100> •\N{RUPEE SIGN}•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> •\u0905\u092e\u091c<200>\n•</data>
|
||||
|
||||
<data>•\u0938\u094d\u200d\u0935\u0924\u0902deadTA\u0930<200>\r•It's<200> •$•30.10<100> •12,34<100>¢•£•¤•¥•alpha\u05f3beta\u05f4gamma<200> •</data>
|
||||
|
||||
@ -1143,7 +1143,7 @@ Bangkok)•</data>
|
||||
# woman zwj woman zwj girl zwj girl, woman/fitz-1-2 zwj woman/fitz-4 zwj boy/fitz-6
|
||||
<data>•\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F467•\U0001F469\U0001F3FB\u200D\U0001F469\U0001F3FD\u200D\U0001F466\U0001F3FF•</data>
|
||||
# woman zwj baby/fitz-3, older_woman/fitz-5, runner/fitz-4, raised_fist/fitz-3, fuel_pump, fitz-3
|
||||
<data>•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC•</data>
|
||||
#TODO: <data>•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC•</data>
|
||||
# man zwj hvy_blk_heart zwj man, woman, man zwj hvy_blk_heart/evar zwj man, woman
|
||||
<data>•\U0001F468\u200D\u2764\u200D\U0001F468•\U0001F469•\U0001F468\u200D\u2764\uFE0F\u200D\U0001F468•\U0001F469•</data>
|
||||
# woman zwj hvy_blk_heart/evar zwj kiss_mark zwj woman, sleuth/fitz-4, horse_racing/fitz-5
|
||||
@ -1162,7 +1162,7 @@ Bangkok)•</data>
|
||||
<data>•\U0001F3C3\u200D\u2640\uFE0F•\U0001F3C3\u200D\u2640•\U0001F3C3\U0001F3FD\u200D\u2640\uFE0F•\U0001F3C3\U0001F3FD\u200D\u2640•\u0020•</data>
|
||||
# 9.0 + professions
|
||||
# black heart, fitz 4, squid, fitz4, man dancing /fitz4, mother xmas /fitz4
|
||||
<data>•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020•</data>
|
||||
#TODO: <data>•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020•</data>
|
||||
# facepalm, facepalm / fitz4, facepalm + woman +var, facepalm + woman -var, facepalm/fitz4 + woman +var, facepalm/fitz4 + woman -var
|
||||
<data>•\U0001F926•\U0001F926\U0001F3FD•\U0001F926\u200D\u2640\uFE0F•\U0001F926\u200D\u2640•\U0001F926\U0001F3FD\u200D\u2640\uFE0F•\U0001F926\U0001F3FD\u200D\u2640•\u0020•</data>
|
||||
# handball, handball / fitz4, handball + man +var, handball + man -var, handball/fitz4 + man +var, handball/fitz4 + man -var
|
||||
@ -1218,7 +1218,7 @@ Bangkok)•</data>
|
||||
# woman zwj woman zwj girl zwj girl, woman/fitz-1-2 zwj woman/fitz-4 zwj boy/fitz-6
|
||||
<data>•\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F467•\U0001F469\U0001F3FB\u200D\U0001F469\U0001F3FD\u200D\U0001F466\U0001F3FF•</data>
|
||||
# woman zwj baby/fitz-3, older_woman/fitz-5, runner/fitz-4, raised_fist/fitz-3, fuel_pump, fitz-3
|
||||
<data>•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC•</data>
|
||||
#TODO: <data>•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC•</data>
|
||||
# man zwj hvy_blk_heart zwj man, woman, man zwj hvy_blk_heart/evar zwj man, woman
|
||||
<data>•\U0001F468\u200D\u2764\u200D\U0001F468•\U0001F469•\U0001F468\u200D\u2764\uFE0F\u200D\U0001F468•\U0001F469•</data>
|
||||
# woman zwj hvy_blk_heart/evar zwj kiss_mark zwj woman, sleuth/fitz-4, horse_racing/fitz-5
|
||||
@ -1237,7 +1237,7 @@ Bangkok)•</data>
|
||||
<data>•\U0001F3C3\u200D\u2640\uFE0F•\U0001F3C3\u200D\u2640•\U0001F3C3\U0001F3FD\u200D\u2640\uFE0F•\U0001F3C3\U0001F3FD\u200D\u2640•\u0020•</data>
|
||||
# 9.0 + professions
|
||||
# black heart, fitz 4, squid, fitz4, man dancing /fitz4, mother xmas /fitz4
|
||||
<data>•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020•</data>
|
||||
#TODO: <data>•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020•</data>
|
||||
# facepalm, facepalm / fitz4, facepalm + woman +evar, facepalm + woman -evar, facepalm/fitz4 + woman +evar, facepalm/fitz4 + woman -evar
|
||||
<data>•\U0001F926•\U0001F926\U0001F3FD•\U0001F926\u200D\u2640\uFE0F•\U0001F926\u200D\u2640•\U0001F926\U0001F3FD\u200D\u2640\uFE0F•\U0001F926\U0001F3FD\u200D\u2640•\u0020•</data>
|
||||
# handball, handball / fitz4, handball + man +evar, handball + man -evar, handball/fitz4 + man +evar, handball/fitz4 + man -evar
|
||||
@ -1436,17 +1436,17 @@ Bangkok)•</data>
|
||||
<word>
|
||||
# Burmese
|
||||
<data>•အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200>
|
||||
• • • • • •မဟာ<200>ဓမ္မရာဇာ<200>မိ<200>ပတိ<200>လ<200>က္<200>ထ<200>က္<200>တ္<200>ဝ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •မ<200>င္<200>ရိ<200>မ္<200>မ<200>သ<200>က္<200>ဖ္<200>ရ<200>စ္<200>နေ<200>သ<200>ည္<200>။• •မဏိ<200>ပူ<200>ရ<200> •က<200>သ<200>ည္<200>မ္<200>ယား<200>က<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>၏• •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>ဖ္<200>ယ<200>က္<200>ဆီး<200>သ<200>ည္<200>။• •အော<200>က္<200>မ္<200>ရ<200>န္<200>မာ<200>နုိ<200>င္<200>ငံ<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>သား<200>တုိ့<200>က<200>လ<200>ည္<200> •ပု<200>န္<200>က<200>န္<200>သ<200>ည္<200>။• •မတ္တ<200>ရာ<200>အု<200>တ္<200>ဖုိ<200>ရ္<200>ဟိ<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •ထ<200>က္<200>ရ္<200>ဝ<200>သ<200>ည္<200>။•
|
||||
• •မဟာ<200>ဓမ္မရာဇာ<200>မိ<200>ပတိ<200>လ<200>က္<200>ထ<200>က္<200>တ္<200>ဝ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •မ<200>င္<200>ရိ<200>မ္<200>မ<200>သ<200>က္<200>ဖ္<200>ရ<200>စ္<200>နေ<200>သ<200>ည္<200>။• •မဏိ<200>ပူ<200>ရ<200> •က<200>သ<200>ည္<200>မ္<200>ယား<200>က<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>၏• •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>ဖ္<200>ယ<200>က္<200>ဆီး<200>သ<200>ည္<200>။• •အော<200>က္<200>မ္<200>ရ<200>န္<200>မာ<200>နုိ<200>င္<200>ငံ<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>သား<200>တုိ့<200>က<200>လ<200>ည္<200> •ပု<200>န္<200>က<200>န္<200>သ<200>ည္<200>။• •မတ္တ<200>ရာ<200>အု<200>တ္<200>ဖုိ<200>ရ္<200>ဟိ<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •ထ<200>က္<200>ရ္<200>ဝ<200>သ<200>ည္<200>။•
|
||||
•
|
||||
• • • • •ထုိ<200>အ<200>ခ္<200>ယိ<200>န္<200>တ္<200>ဝ<200>င္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သူ<200>က္<200>ရီး<200> •အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မိမိ<200>၏•ရ္<200>ဝာ<200>ကုိ<200> •လုံ<200>ခ္<200>ရုံ<200>အော<200>င္<200>ထ<200>န္<200>လုံး<200>တ<200>ပ္<200>မ္<200>ယား<200>ကာ<200>ရ<200>သ<200>ည္<200>။• •အနီး<200>အ<200>ပား<200> •က္<200>ယေး<200>ရ္<200>ဝာ<200> •လေး<200>ဆ<200>ယ့္<200>ခ္<200>ရော<200>က္<200>ရ္<200>ဝာ<200>ကုိ<200> •သိ<200>မ္း<200>သ္<200>ဝ<200>င္<200>ထား<200>သ<200>ည္<200>။• •မ<200>က္<200>ရာ<200>မီ<200>ပ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200> •လ<200>က္<200>တ္<200>ဝ<200>င္<200>သ<200>က္<200>ဆ<200>င္<200>ရ<200>တော့<200>သ<200>ည္<200>။•
|
||||
• •ထုိ<200>အ<200>ခ္<200>ယိ<200>န္<200>တ္<200>ဝ<200>င္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သူ<200>က္<200>ရီး<200> •အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မိမိ<200>၏•ရ္<200>ဝာ<200>ကုိ<200> •လုံ<200>ခ္<200>ရုံ<200>အော<200>င္<200>ထ<200>န္<200>လုံး<200>တ<200>ပ္<200>မ္<200>ယား<200>ကာ<200>ရ<200>သ<200>ည္<200>။• •အနီး<200>အ<200>ပား<200> •က္<200>ယေး<200>ရ္<200>ဝာ<200> •လေး<200>ဆ<200>ယ့္<200>ခ္<200>ရော<200>က္<200>ရ္<200>ဝာ<200>ကုိ<200> •သိ<200>မ္း<200>သ္<200>ဝ<200>င္<200>ထား<200>သ<200>ည္<200>။• •မ<200>က္<200>ရာ<200>မီ<200>ပ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200> •လ<200>က္<200>တ္<200>ဝ<200>င္<200>သ<200>က္<200>ဆ<200>င္<200>ရ<200>တော့<200>သ<200>ည္<200>။•
|
||||
•
|
||||
• • • • •အ<200>င္<200>ဝ<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>ပ္<200>ရီး<200>သော<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>က္<200>ယေး<200>ရ္<200>ဝာ<200>မ္<200>ယား<200>ကုိ<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •လာ<200>က္<200>ရ<200>ရာ<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သုိ့<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •တ<200>ခ္<200>ယိ<200>န္<200>တ<200>ည္<200>မ္<200>ဟာ<200>ပ<200>င္<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>ဖ္<200>ဝဲ့<200>န္<200>ဟ<200>စ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •ခ္<200>ရေ<200>ငံ<200>စ္<200>ဝာ<200> •ဆ<200>က္<200>ဆံ<200>သ<200>ည္<200>။• •မ<200>ည္<200>သူ့<200>သ<200>စ္<200>စာ<200>ကုိ<200>မ္<200>ယ္<200>ဟ<200> •ခံ<200>ယူ<200>ခ္<200>ရ<200>င္<200>မ<200>ပ္<200>ရု<200>ပေ<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •အ<200>ပ္<200>ရ<200>န္<200>ခ<200>ရီး<200>တ္<200>ဝ<200>င္<200> •လ<200>မ္<200>မ္<200>ဟ<200>ဖ္<200>ရ<200>တ္<200>၍• •တုိ<200>က္<200>ခုိ<200>က္<200>သ<200>ည္<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>မ္<200>ယား<200> •အထိ<200>အ<200>ခုိ<200>က္<200>အ<200>က္<200>ယ<200>အ<200>ဆုံး<200>မ္<200>ယား<200>စ္<200>ဝာ<200>ဖ္<200>ရ<200>င္<200> •ပ္<200>ရ<200>န္<200>ရ<200>သ<200>ည္<200>။•
|
||||
• •အ<200>င္<200>ဝ<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>ပ္<200>ရီး<200>သော<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>က္<200>ယေး<200>ရ္<200>ဝာ<200>မ္<200>ယား<200>ကုိ<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •လာ<200>က္<200>ရ<200>ရာ<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သုိ့<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •တ<200>ခ္<200>ယိ<200>န္<200>တ<200>ည္<200>မ္<200>ဟာ<200>ပ<200>င္<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>ဖ္<200>ဝဲ့<200>န္<200>ဟ<200>စ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •ခ္<200>ရေ<200>ငံ<200>စ္<200>ဝာ<200> •ဆ<200>က္<200>ဆံ<200>သ<200>ည္<200>။• •မ<200>ည္<200>သူ့<200>သ<200>စ္<200>စာ<200>ကုိ<200>မ္<200>ယ္<200>ဟ<200> •ခံ<200>ယူ<200>ခ္<200>ရ<200>င္<200>မ<200>ပ္<200>ရု<200>ပေ<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •အ<200>ပ္<200>ရ<200>န္<200>ခ<200>ရီး<200>တ္<200>ဝ<200>င္<200> •လ<200>မ္<200>မ္<200>ဟ<200>ဖ္<200>ရ<200>တ္<200>၍• •တုိ<200>က္<200>ခုိ<200>က္<200>သ<200>ည္<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>မ္<200>ယား<200> •အထိ<200>အ<200>ခုိ<200>က္<200>အ<200>က္<200>ယ<200>အ<200>ဆုံး<200>မ္<200>ယား<200>စ္<200>ဝာ<200>ဖ္<200>ရ<200>င္<200> •ပ္<200>ရ<200>န္<200>ရ<200>သ<200>ည္<200>။•
|
||||
•
|
||||
• • • • •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •လာ<200>ရော<200>က္<200>တုိ<200>က္<200>ခုိ<200>က္<200>က္<200>ရ<200>ပ္<200>ရ<200>န္<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •သ္<200>ဝေး<200>သော<200>က္<200>ရဲ<200>ဘော္<200> •ခ္<200>ရော<200>က္<200>က္<200>ယိ<200>ပ္<200>ရ္<200>ဟ<200>စ္<200>ယော<200>က္<200>န္<200>ဟ<200>င္<200>အတူ<200> •ဦးစီး<200>ကာ<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>စ္<200>ဝာ<200>ခု<200>ခံ<200>တ္<200>ဝ<200>န္<200>လ္<200>ဟ<200>န္<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ထုိ့<200>နော<200>က္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ရ္<200>ဟိ<200> •ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>န္<200>ဟ<200>င္<200> •မ္<200>ရ<200>န္<200>မာ<200>မ္<200>ယား<200>ကုိ<200>လ<200>ည္<200> •ဆ<200>က္<200>သ္<200>ဝ<200>ယ္<200>စ<200>ည္<200>ရုံး<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ဤ<200>သုိ့<200>ဖ္<200>ရ<200>င္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>၏• •အ<200>ရ္<200>ဟိ<200>န္<200>အ<200>ဝာ<200> •မ္<200>ရ<200>င္<200>မား<200>လာ<200>လေ<200>သ<200>ည္<200>။•
|
||||
• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •လာ<200>ရော<200>က္<200>တုိ<200>က္<200>ခုိ<200>က္<200>က္<200>ရ<200>ပ္<200>ရ<200>န္<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •သ္<200>ဝေး<200>သော<200>က္<200>ရဲ<200>ဘော္<200> •ခ္<200>ရော<200>က္<200>က္<200>ယိ<200>ပ္<200>ရ္<200>ဟ<200>စ္<200>ယော<200>က္<200>န္<200>ဟ<200>င္<200>အတူ<200> •ဦးစီး<200>ကာ<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>စ္<200>ဝာ<200>ခု<200>ခံ<200>တ္<200>ဝ<200>န္<200>လ္<200>ဟ<200>န္<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ထုိ့<200>နော<200>က္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ရ္<200>ဟိ<200> •ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>န္<200>ဟ<200>င္<200> •မ္<200>ရ<200>န္<200>မာ<200>မ္<200>ယား<200>ကုိ<200>လ<200>ည္<200> •ဆ<200>က္<200>သ္<200>ဝ<200>ယ္<200>စ<200>ည္<200>ရုံး<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ဤ<200>သုိ့<200>ဖ္<200>ရ<200>င္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>၏• •အ<200>ရ္<200>ဟိ<200>န္<200>အ<200>ဝာ<200> •မ္<200>ရ<200>င္<200>မား<200>လာ<200>လေ<200>သ<200>ည္<200>။•
|
||||
•
|
||||
• • • • •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>လော<200>င္<200>မ<200>င္<200>တ<200>ရား<200>ဘ္<200>ဝဲ့<200>ကုိ<200> •ခံယူ<200>ကာ<200> •ကု<200>န္<200>ဘော<200>င္<200>မ<200>င္<200>ဆ<200>က္<200>ကုိ<200>စ<200>တ<200>င္<200>တ<200>ည္<200>ထော<200>င္<200>သ<200>ည္<200>။• •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •ရ္<200>ဝ္<200>ဟ<200>ဝေ<200>ဘုိ<200>ဟု<200> •သ<200>မု<200>တ္<200>ကာ<200> •မ္<200>ရုိ့<200>န<200>န္<200>တ<200>ည္<200>သ<200>ည္<200>။• •န<200>န္<200>တ<200>ည္<200>သ<200>က္<200>က<200>ရာ<200>ဇ္<200>ဖ္<200>ရ<200>စ္<200>သော<200> •၁၁၁၅<100> •ခု<200>ကုိ<200> •ဥ<200>ဩ<200>အော္<200>မ္<200>ရ<200>ည္<200> •ကု<200>န္<200>ဘော<200>င္<200>တ<200>ည္<200>ဟု<200> •အ<200>မ္<200>ဟ<200>တ္<200>အ<200>သား<200>ပ္<200>ရု<200>က္<200>ရ<200>သ<200>ည္<200>။•
|
||||
• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>လော<200>င္<200>မ<200>င္<200>တ<200>ရား<200>ဘ္<200>ဝဲ့<200>ကုိ<200> •ခံယူ<200>ကာ<200> •ကု<200>န္<200>ဘော<200>င္<200>မ<200>င္<200>ဆ<200>က္<200>ကုိ<200>စ<200>တ<200>င္<200>တ<200>ည္<200>ထော<200>င္<200>သ<200>ည္<200>။• •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •ရ္<200>ဝ္<200>ဟ<200>ဝေ<200>ဘုိ<200>ဟု<200> •သ<200>မု<200>တ္<200>ကာ<200> •မ္<200>ရုိ့<200>န<200>န္<200>တ<200>ည္<200>သ<200>ည္<200>။• •န<200>န္<200>တ<200>ည္<200>သ<200>က္<200>က<200>ရာ<200>ဇ္<200>ဖ္<200>ရ<200>စ္<200>သော<200> •၁၁၁၅<100> •ခု<200>ကုိ<200> •ဥ<200>ဩ<200>အော္<200>မ္<200>ရ<200>ည္<200> •ကု<200>န္<200>ဘော<200>င္<200>တ<200>ည္<200>ဟု<200> •အ<200>မ္<200>ဟ<200>တ္<200>အ<200>သား<200>ပ္<200>ရု<200>က္<200>ရ<200>သ<200>ည္<200>။•
|
||||
•
|
||||
• • • • •အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200>သ<200>ည္<200> •ဧရာ<200>ဝ<200>တီ<200>န္<200>ဟ<200>င္<200>ခ္<200>ယ<200>င္<200>တ္<200>ဝ<200>င္<200> •မ္<200>ရ<200>စ္<200>န္<200>ဟ<200>စ္<200>သ္<200>ဝ<200>ယ္<200>အ<200>က္<200>ရား<200> •ဒေ<200>သ<200>မ္<200>ယား<200>ကုိ<200>အ<200>ခုိ<200>င္<200>အ<200>မာ<200> •စု<200>စ<200>ည္<200>ပ္<200>ရီး<200>နော<200>က္<200> •အ<200>င္<200>ဝ<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>သ<200>ည္<200>။• •ထုိ<200>နော<200>က္<200>တ္<200>ဝ<200>င္<200>ပ္<200>ရ<200>ည္<200>၊• •လ္<200>ဝ<200>န္<200>ဆေး<200>၊• •ဒ<200>ဂုံ<200>မ္<200>ရုိ့<200>မ္<200>ယား<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>သ<200>ည္<200>။• •လ္<200>ဝ<200>န္<200>ဆေး<200> •ကုိ<200>မ္<200>ရ<200>န္<200>အော<200>င္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>သ<200>ည္<200>။• •ဒ<200>ဂုံ<200>ကုိ<200>ရ<200>န္<200>ကု<200>န္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>ထ<200>သ<200>ည္<200>။•</data>
|
||||
• •အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200>သ<200>ည္<200> •ဧရာ<200>ဝ<200>တီ<200>န္<200>ဟ<200>င္<200>ခ္<200>ယ<200>င္<200>တ္<200>ဝ<200>င္<200> •မ္<200>ရ<200>စ္<200>န္<200>ဟ<200>စ္<200>သ္<200>ဝ<200>ယ္<200>အ<200>က္<200>ရား<200> •ဒေ<200>သ<200>မ္<200>ယား<200>ကုိ<200>အ<200>ခုိ<200>င္<200>အ<200>မာ<200> •စု<200>စ<200>ည္<200>ပ္<200>ရီး<200>နော<200>က္<200> •အ<200>င္<200>ဝ<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>သ<200>ည္<200>။• •ထုိ<200>နော<200>က္<200>တ္<200>ဝ<200>င္<200>ပ္<200>ရ<200>ည္<200>၊• •လ္<200>ဝ<200>န္<200>ဆေး<200>၊• •ဒ<200>ဂုံ<200>မ္<200>ရုိ့<200>မ္<200>ယား<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>သ<200>ည္<200>။• •လ္<200>ဝ<200>န္<200>ဆေး<200> •ကုိ<200>မ္<200>ရ<200>န္<200>အော<200>င္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>သ<200>ည္<200>။• •ဒ<200>ဂုံ<200>ကုိ<200>ရ<200>န္<200>ကု<200>န္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>ထ<200>သ<200>ည္<200>။•</data>
|
||||
|
||||
<word>
|
||||
# japanese
|
||||
|
2
icu4c/source/test/testdata/regextst.txt
vendored
2
icu4c/source/test/testdata/regextst.txt
vendored
@ -252,7 +252,7 @@
|
||||
# Unicode word boundary mode
|
||||
#
|
||||
"(?w).*?\b" v "<0></0>hello, world"
|
||||
"(?w).*?(\b.+?\b).*" v "<0><1> </1> 123.45 </0>"
|
||||
"(?w).*?(\b.+?\b).*" v "<0><1> </1>123.45 </0>"
|
||||
"(?w).*?(\b\d.*?\b).*" v "<0> <1>123.45</1> </0>"
|
||||
".*?(\b.+?\b).*" "<0> <1>123</1>.45 </0>"
|
||||
"(?w:.*?(\b\d.*?\b).*)" v "<0> <1>123.45</1> </0>"
|
||||
|
Loading…
Reference in New Issue
Block a user