ICU-5696 Unicode 5.1 Update
X-SVN-Rev: 23763
This commit is contained in:
parent
71bf003171
commit
39ff2eff25
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:af6554a0d9bdc0c4f73faaff17ba6e1becdba35b86f1e6ac5efa8415ab562d69
|
||||
size 758184
|
||||
oid sha256:354535a77f8a69d732d81bfa18a5b1d8ac3e034cf51289ec97b934c054404404
|
||||
size 757711
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2006, International Business Machines Corporation and *
|
||||
* Copyright (C) 2002-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -97,14 +97,24 @@ public class CollationAPITest extends TestFmwk {
|
||||
col.setStrength(Collator.IDENTICAL);
|
||||
|
||||
byte key2compat[] = {
|
||||
// 3.9 key, UCA 5.1
|
||||
(byte) 0x2c, (byte) 0x2e, (byte) 0x30,
|
||||
(byte) 0x32, (byte) 0x2c, (byte) 0x01,
|
||||
(byte) 0x09, (byte) 0x01, (byte) 0x09,
|
||||
(byte) 0x01, (byte) 0x2b, (byte) 0x01,
|
||||
(byte) 0x92, (byte) 0x93, (byte) 0x94,
|
||||
(byte) 0x95, (byte) 0x92, (byte) 0x00
|
||||
|
||||
// 3.6 key, UCA 5.0
|
||||
/*
|
||||
(byte) 0x29, (byte) 0x2b, (byte) 0x2d,
|
||||
(byte) 0x2f, (byte) 0x29, (byte) 0x01,
|
||||
(byte) 0x09, (byte) 0x01, (byte) 0x09,
|
||||
(byte) 0x01, (byte) 0x28, (byte) 0x01,
|
||||
(byte) 0x92, (byte) 0x93, (byte) 0x94,
|
||||
(byte) 0x95, (byte) 0x92, (byte) 0x00
|
||||
|
||||
*/
|
||||
|
||||
// 3.4 key UCA 4.1
|
||||
/*
|
||||
(byte) 0x28, (byte) 0x2a, (byte) 0x2c,
|
||||
@ -501,7 +511,7 @@ public class CollationAPITest extends TestFmwk {
|
||||
doAssert(col.getVersion().equals(expectedVersion), "Expected version "+expectedVersion.toString()+" got "+col.getVersion().toString());
|
||||
|
||||
logln("Test getUCAVersion");
|
||||
VersionInfo expectedUCAVersion = VersionInfo.getInstance(0x05, 0, 0, 0);
|
||||
VersionInfo expectedUCAVersion = VersionInfo.getInstance(0x05, 1, 0, 0);
|
||||
doAssert(col.getUCAVersion().equals(expectedUCAVersion), "Expected UCA version "+expectedUCAVersion.toString()+" got "+col.getUCAVersion().toString());
|
||||
|
||||
doAssert((col.compare("ab", "abc") < 0), "ab < abc comparison failed");
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 2002-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -285,7 +285,13 @@ public class CollationMiscTest extends TestFmwk {
|
||||
String target = "[";
|
||||
|
||||
for (i = 0; i < bytes.length; i++) {
|
||||
target += Integer.toHexString(bytes[i]);
|
||||
String numStr = Integer.toHexString(bytes[i]);
|
||||
if (numStr.length()>2) {
|
||||
target += numStr.substring(numStr.length()-2);
|
||||
}
|
||||
else {
|
||||
target += numStr;
|
||||
}
|
||||
target += " ";
|
||||
}
|
||||
target += "]";
|
||||
@ -2281,4 +2287,204 @@ public class CollationMiscTest extends TestFmwk {
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void Test6179()
|
||||
{
|
||||
String rules[] = {
|
||||
"&[last primary ignorable]<< a &[first primary ignorable]<<b ",
|
||||
"&[last secondary ignorable]<<< a &[first secondary ignorable]<<<b",
|
||||
};
|
||||
// defined in UCA5.1
|
||||
String firstPrimIgn = "\u0332";
|
||||
String lastPrimIgn = "\uD800\uDDFD";
|
||||
String firstVariable = "\u0009";
|
||||
byte[] secIgnKey = {1,1,4,0};
|
||||
|
||||
int i=0;
|
||||
{
|
||||
|
||||
RuleBasedCollator coll = null;
|
||||
try {
|
||||
coll = new RuleBasedCollator(rules[i]);
|
||||
} catch (Exception e) {
|
||||
warnln("Unable to open collator with rules " + rules[i]);
|
||||
}
|
||||
|
||||
logln("Test rule["+i+"]"+rules[i]);
|
||||
|
||||
CollationKey keyA = coll.getCollationKey("a");
|
||||
logln("Key for \"a\":"+ prettify(keyA));
|
||||
if (keyA.compareTo(coll.getCollationKey(lastPrimIgn))<=0) {
|
||||
CollationKey key = coll.getCollationKey(lastPrimIgn);
|
||||
logln("Collation key for 0xD800 0xDDFD: "+prettify(key));
|
||||
errln("Error! String \"a\" must be greater than \uD800\uDDFD -"+
|
||||
"[Last Primary Ignorable]");
|
||||
}
|
||||
if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
|
||||
CollationKey key = coll.getCollationKey(firstVariable);
|
||||
logln("Collation key for 0x0009: "+prettify(key));
|
||||
errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
|
||||
}
|
||||
CollationKey keyB = coll.getCollationKey("b");
|
||||
logln("Key for \"b\":"+ prettify(keyB));
|
||||
if (keyB.compareTo(coll.getCollationKey(firstPrimIgn))<=0) {
|
||||
CollationKey key = coll.getCollationKey(firstPrimIgn);
|
||||
logln("Collation key for 0x0332: "+prettify(key));
|
||||
errln("Error! String \"b\" must be greater than 0x0332 -"+
|
||||
"[First Primary Ignorable]");
|
||||
}
|
||||
if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
|
||||
CollationKey key = coll.getCollationKey(firstVariable);
|
||||
logln("Collation key for 0x0009: "+prettify(key));
|
||||
errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
|
||||
}
|
||||
}
|
||||
{
|
||||
i=1;
|
||||
RuleBasedCollator coll = null;
|
||||
try {
|
||||
coll = new RuleBasedCollator(rules[i]);
|
||||
} catch (Exception e) {
|
||||
warnln("Unable to open collator with rules " + rules[i]);
|
||||
}
|
||||
|
||||
logln("Test rule["+i+"]"+rules[i]);
|
||||
|
||||
CollationKey keyA = coll.getCollationKey("a");
|
||||
logln("Key for \"a\":"+ prettify(keyA));
|
||||
byte[] keyAInBytes = keyA.toByteArray();
|
||||
for (int j=0; j<keyAInBytes.length && j<secIgnKey.length; j++) {
|
||||
if (keyAInBytes[j]!=secIgnKey[j]) {
|
||||
if ((char)keyAInBytes[j]<=(char)secIgnKey[j]) {
|
||||
logln("Error! String \"a\" must be greater than [Last Secondary Ignorable]");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
|
||||
errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
|
||||
CollationKey key = coll.getCollationKey(firstVariable);
|
||||
logln("Collation key for 0x0009: "+prettify(key));
|
||||
}
|
||||
CollationKey keyB = coll.getCollationKey("b");
|
||||
logln("Key for \"b\":"+ prettify(keyB));
|
||||
byte[] keyBInBytes = keyB.toByteArray();
|
||||
for (int j=0; j<keyBInBytes.length && j<secIgnKey.length; j++) {
|
||||
if (keyBInBytes[j]!=secIgnKey[j]) {
|
||||
if ((char)keyBInBytes[j]<=(char)secIgnKey[j]) {
|
||||
errln("Error! String \"b\" must be greater than [Last Secondary Ignorable]");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
|
||||
CollationKey key = coll.getCollationKey(firstVariable);
|
||||
logln("Collation key for 0x0009: "+prettify(key));
|
||||
errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void TestUCAPrecontext()
|
||||
{
|
||||
String rules[] = {
|
||||
"& \u00B7<a ",
|
||||
"& L\u00B7 << a", // 'a' is an expansion.
|
||||
};
|
||||
String cases[] = {
|
||||
"\u00B7",
|
||||
"\u0387",
|
||||
"a",
|
||||
"l",
|
||||
"L\u0332",
|
||||
"l\u00B7",
|
||||
"l\u0387",
|
||||
"L\u0387",
|
||||
"la\u0387",
|
||||
"La\u00b7",
|
||||
};
|
||||
|
||||
// Test en sort
|
||||
RuleBasedCollator en = null;
|
||||
|
||||
logln("EN sort:");
|
||||
try {
|
||||
en = (RuleBasedCollator)Collator.getInstance(
|
||||
new Locale("en", ""));
|
||||
for (int j=0; j<cases.length; j++) {
|
||||
CollationKey key = en.getCollationKey(cases[j]);
|
||||
if (j>0) {
|
||||
CollationKey prevKey = en.getCollationKey(cases[j-1]);
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! EN test["+j+"]:"+"source:" + cases[j]+
|
||||
"is not greater than previous test.");
|
||||
}
|
||||
}
|
||||
/*
|
||||
if ( key.compareTo(expectingKey)!=0) {
|
||||
errln("Error! Test case["+i+"]:"+"source:" + key.getSourceString());
|
||||
errln("expecting:"+prettify(expectingKey)+ "got:"+ prettify(key));
|
||||
}
|
||||
*/
|
||||
logln("String:"+cases[j]+" Key:"+ prettify(key));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
warnln("Error creating Vietnese collator");
|
||||
return;
|
||||
}
|
||||
|
||||
// Test ja sort
|
||||
RuleBasedCollator ja = null;
|
||||
logln("JA sort:");
|
||||
try {
|
||||
ja = (RuleBasedCollator)Collator.getInstance(
|
||||
new Locale("ja", ""));
|
||||
for (int j=0; j<cases.length; j++) {
|
||||
CollationKey key = ja.getCollationKey(cases[j]);
|
||||
if (j>0) {
|
||||
CollationKey prevKey = ja.getCollationKey(cases[j-1]);
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! JA test["+j+"]:"+"source:" + cases[j]+
|
||||
"is not greater than previous test.");
|
||||
}
|
||||
}
|
||||
logln("String:"+cases[j]+" Key:"+ prettify(key));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
warnln("Error creating Vietnese collator");
|
||||
return;
|
||||
}
|
||||
for(int i = 0; i < rules.length; i++) {
|
||||
|
||||
RuleBasedCollator coll = null;
|
||||
logln("Tailoring rule:"+rules[i]);
|
||||
try {
|
||||
coll = new RuleBasedCollator(rules[i]);
|
||||
} catch (Exception e) {
|
||||
warnln("Unable to open collator with rules " + rules[i]);
|
||||
}
|
||||
|
||||
for (int j=0; j<cases.length; j++) {
|
||||
CollationKey key = coll.getCollationKey(cases[j]);
|
||||
if (j>0) {
|
||||
CollationKey prevKey = coll.getCollationKey(cases[j-1]);
|
||||
if (i==1 && j==3) {
|
||||
if (key.compareTo(prevKey)>0) {
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
|
||||
cases[j]+"is not greater than previous test.");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
|
||||
cases[j]+"is not greater than previous test.");
|
||||
}
|
||||
}
|
||||
}
|
||||
logln("String:"+cases[j]+" Key:"+ prettify(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -361,12 +361,16 @@ public class TestUScript extends TestFmwk {
|
||||
*/
|
||||
String[] expectedLong = new String[]{
|
||||
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
|
||||
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
|
||||
"Lepc", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Cuneiform",
|
||||
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kayah_Li", "Latf", "Latg",
|
||||
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
|
||||
"Zxxx", "Unknown",
|
||||
"Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
|
||||
"Carian", "Jpan", "Lana", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
|
||||
"Moon", "Mtei",
|
||||
|
||||
// ICU 4.0
|
||||
"Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
|
||||
"Samr", "Tavt", "Zmth", "Zsym",
|
||||
};
|
||||
String[] expectedShort = new String[]{
|
||||
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
|
||||
@ -375,7 +379,11 @@ public class TestUScript extends TestFmwk {
|
||||
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
|
||||
"Zxxx", "Zzzz",
|
||||
"Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
|
||||
"Moon", "Mtei",
|
||||
"Moon", "Mtei",
|
||||
|
||||
// ICU 4.0
|
||||
"Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
|
||||
"Samr", "Tavt", "Zmth", "Zsym",
|
||||
};
|
||||
int j = 0;
|
||||
int i = 0;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -15,7 +15,9 @@ import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import java.util.Locale;
|
||||
import java.io.BufferedReader;
|
||||
@ -289,6 +291,22 @@ public final class UCharacterCaseTest extends TestFmwk
|
||||
}
|
||||
}
|
||||
|
||||
public void TestTitleRegression() throws java.io.IOException {
|
||||
UCaseProps props = new UCaseProps();
|
||||
int type = props.getTypeOrIgnorable('\'');
|
||||
assertEquals("Case Ignorable check", -1, type); // should be case-ignorable (-1)
|
||||
UnicodeSet allCaseIgnorables = new UnicodeSet();
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
if (props.getTypeOrIgnorable(cp) < 0) {
|
||||
allCaseIgnorables.add(cp);
|
||||
}
|
||||
}
|
||||
logln(allCaseIgnorables.toString());
|
||||
assertEquals("Titlecase check",
|
||||
"The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
|
||||
UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
|
||||
}
|
||||
|
||||
public void TestTitle()
|
||||
{
|
||||
try{
|
||||
@ -912,5 +930,3 @@ public final class UCharacterCaseTest extends TestFmwk
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2005, International Business Machines Corporation and *
|
||||
* Copyright (C) 2004-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -22,11 +22,15 @@ public final class UCharacterSurrogateTest extends TestFmwk {
|
||||
}
|
||||
|
||||
public void TestUnicodeBlockForName() {
|
||||
String[] names = {"Optical Character Recognition",
|
||||
"CJK Unified Ideographs Extension A", "Supplemental Arrows-B",
|
||||
"Supplementary Private Use Area-B",
|
||||
"supplementary_Private_Use_Area-b",
|
||||
"supplementary_PRIVATE_Use_Area_b"};
|
||||
String[] names = {"Latin-1 Supplement",
|
||||
"Optical Character Recognition",
|
||||
"CJK Unified Ideographs Extension A",
|
||||
"Supplemental Arrows-B",
|
||||
"Supplemental arrows b",
|
||||
"supp-lement-al arrowsb",
|
||||
"Supplementary Private Use Area-B",
|
||||
"supplementary_Private_Use_Area-b",
|
||||
"supplementary_PRIVATE_Use_Area_b"};
|
||||
for (int i = 0; i < names.length; ++i) {
|
||||
try {
|
||||
UCharacter.UnicodeBlock b = UCharacter.UnicodeBlock
|
||||
@ -416,4 +420,3 @@ public final class UCharacterSurrogateTest extends TestFmwk {
|
||||
test.test(s, 2, 1, 2, 1, 3, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -44,7 +44,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
/**
|
||||
* ICU4J data version number
|
||||
*/
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("5.0.0.0");
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("5.1.0.0");
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
@ -1616,14 +1616,16 @@ public final class UCharacterTest extends TestFmwk
|
||||
{ 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },
|
||||
|
||||
{ 0x0341, UProperty.DEPRECATED, 1 },
|
||||
{ 0xe0041, UProperty.DEPRECATED, 0 },
|
||||
{ 0xe0041, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */
|
||||
|
||||
{ 0x00a0, UProperty.GRAPHEME_BASE, 1 },
|
||||
{ 0x0a4d, UProperty.GRAPHEME_BASE, 0 },
|
||||
{ 0xff9f, UProperty.GRAPHEME_BASE, 1 }, /* changed from Unicode 3.2 to 4 */
|
||||
{ 0xff9d, UProperty.GRAPHEME_BASE, 1 },
|
||||
{ 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
|
||||
|
||||
{ 0x0300, UProperty.GRAPHEME_EXTEND, 1 },
|
||||
{ 0xff9f, UProperty.GRAPHEME_EXTEND, 0 }, /* changed from Unicode 3.2 to 4 */
|
||||
{ 0xff9d, UProperty.GRAPHEME_EXTEND, 0 },
|
||||
{ 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
|
||||
{ 0x0603, UProperty.GRAPHEME_EXTEND, 0 },
|
||||
|
||||
{ 0x0a4d, UProperty.GRAPHEME_LINK, 1 },
|
||||
@ -1671,7 +1673,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
{ 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
|
||||
{ 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
|
||||
|
||||
{ 0x0606, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x0605, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x061c, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -2292,63 +2292,64 @@ public class BasicTest extends TestFmwk {
|
||||
}
|
||||
skipSets[D].applyPattern(
|
||||
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
|
||||
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
|
||||
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
|
||||
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
|
||||
+ "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
|
||||
+ "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
|
||||
+ "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
|
||||
+ "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
|
||||
+ "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
|
||||
+ "-\\u045E\\u0476\\u0477\\u0483-\\u0486\\u04C1\\u04C2\\u04D0-"
|
||||
+ "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
|
||||
+ "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
|
||||
+ "\\u05C5\\u05C7\\u0610-\\u0615\\u0622-\\u0626\\u064B-\\u065E"
|
||||
+ "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
|
||||
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
+ "\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
+ "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
|
||||
+ "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
|
||||
+ "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
|
||||
+ "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
|
||||
+ "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u135F\\u1714\\u1734\\u17D2\\u17DD"
|
||||
+ "\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A\\u1B0C"
|
||||
+ "\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44"
|
||||
+ "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E99\\u1E9B\\u1EA0-"
|
||||
+ "\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-"
|
||||
+ "\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D"
|
||||
+ "\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-"
|
||||
+ "\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-"
|
||||
+ "\\u1FFD\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF"
|
||||
+ "\\u2126\\u212A\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
|
||||
+ "\\u2209\\u220C\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260"
|
||||
+ "\\u2262\\u226D-\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281"
|
||||
+ "\\u2284\\u2285\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3"
|
||||
+ "\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC\\u302A-\\u302F\\u304C"
|
||||
+ "\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E"
|
||||
+ "\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074"
|
||||
+ "\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A"
|
||||
+ "\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA"
|
||||
+ "\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1"
|
||||
+ "\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4"
|
||||
+ "\\u30F7-\\u30FA\\u30FE\\uA806\\uAC00-\\uD7A3\\uF900-\\uFA0D"
|
||||
+ "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
|
||||
+ "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
|
||||
+ "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
|
||||
+ "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
|
||||
+ "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
|
||||
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
+ "F800-\\U0002FA1D]", false);
|
||||
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
|
||||
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
|
||||
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
|
||||
+ "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
|
||||
+ "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
|
||||
+ "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
|
||||
+ "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
|
||||
+ "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
|
||||
+ "-\\u045E\\u0476\\u0477\\u0483-\\u0487\\u04C1\\u04C2\\u04D0-"
|
||||
+ "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
|
||||
+ "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
|
||||
+ "\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E"
|
||||
+ "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
|
||||
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
+ "\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
+ "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
|
||||
+ "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
|
||||
+ "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
|
||||
+ "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
|
||||
+ "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734"
|
||||
+ "\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08"
|
||||
+ "\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41"
|
||||
+ "\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
|
||||
+ "\\u1DFE-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-"
|
||||
+ "\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59"
|
||||
+ "\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC"
|
||||
+ "\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
|
||||
+ "\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000\\u2001\\u20D0-"
|
||||
+ "\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u219A\\u219B"
|
||||
+ "\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u2241"
|
||||
+ "\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
|
||||
+ "\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
|
||||
+ "\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC"
|
||||
+ "\\u2DE0-\\u2DFF\\u302A-\\u302F\\u304C\\u304E\\u3050\\u3052"
|
||||
+ "\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065"
|
||||
+ "\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079"
|
||||
+ "\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A\\u309E\\u30AC\\u30AE"
|
||||
+ "\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0"
|
||||
+ "\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6"
|
||||
+ "\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE"
|
||||
+ "\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00"
|
||||
+ "-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
|
||||
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
|
||||
+ "\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E"
|
||||
+ "\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20-\\uFE26"
|
||||
+ "\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010"
|
||||
+ "A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
|
||||
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001"
|
||||
+ "D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-\\U0002FA1D]", false);
|
||||
|
||||
skipSets[C].applyPattern(
|
||||
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
|
||||
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
|
||||
+ "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
|
||||
+ "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124"
|
||||
+ "\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143"
|
||||
@ -2362,9 +2363,9 @@ public class BasicTest extends TestFmwk {
|
||||
+ "\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406"
|
||||
+ "\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
|
||||
+ "\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
|
||||
+ "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486\\u04D8\\u04D9"
|
||||
+ "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487\\u04D8\\u04D9"
|
||||
+ "\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
|
||||
+ "\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648\\u064A-"
|
||||
+ "\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-"
|
||||
+ "\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
|
||||
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
+ "\\u07F3\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
@ -2378,303 +2379,309 @@ public class BasicTest extends TestFmwk {
|
||||
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u1100-\\u1112\\u1161-"
|
||||
+ "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
|
||||
+ "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
|
||||
+ "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
|
||||
+ "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-\\u1E0F"
|
||||
+ "\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
|
||||
+ "\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99"
|
||||
+ "\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
|
||||
+ "\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
|
||||
+ "\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
|
||||
+ "\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
|
||||
+ "\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC"
|
||||
+ "\\u1FBE\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB"
|
||||
+ "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE"
|
||||
+ "\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2126"
|
||||
+ "\\u212A\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203"
|
||||
+ "\\u2208\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D"
|
||||
+ "\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D"
|
||||
+ "\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9"
|
||||
+ "\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u302A-\\u302F"
|
||||
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
|
||||
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
|
||||
+ "\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD"
|
||||
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
|
||||
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
|
||||
+ "\\u30EF-\\u30F2\\u30FD\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70"
|
||||
+ "\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C"
|
||||
+ "\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68"
|
||||
+ "\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64"
|
||||
+ "\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060"
|
||||
+ "\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C"
|
||||
+ "\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258"
|
||||
+ "\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354"
|
||||
+ "\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450"
|
||||
+ "\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C"
|
||||
+ "\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648"
|
||||
+ "\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744"
|
||||
+ "\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840"
|
||||
+ "\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C"
|
||||
+ "\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38"
|
||||
+ "\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34"
|
||||
+ "\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30"
|
||||
+ "\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C"
|
||||
+ "\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28"
|
||||
+ "\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24"
|
||||
+ "\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020"
|
||||
+ "\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C"
|
||||
+ "\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218"
|
||||
+ "\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314"
|
||||
+ "\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410"
|
||||
+ "\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C"
|
||||
+ "\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608"
|
||||
+ "\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704"
|
||||
+ "\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800"
|
||||
+ "\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC"
|
||||
+ "\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8"
|
||||
+ "\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4"
|
||||
+ "\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0"
|
||||
+ "\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC"
|
||||
+ "\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8"
|
||||
+ "\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4"
|
||||
+ "\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0"
|
||||
+ "\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC"
|
||||
+ "\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8"
|
||||
+ "\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4"
|
||||
+ "\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0"
|
||||
+ "\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC"
|
||||
+ "\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8"
|
||||
+ "\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4"
|
||||
+ "\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D"
|
||||
+ "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
|
||||
+ "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
|
||||
+ "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
|
||||
+ "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
|
||||
+ "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
|
||||
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
+ "F800-\\U0002FA1D]", false);
|
||||
+ "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112"
|
||||
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
|
||||
+ "\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
|
||||
+ "\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
|
||||
+ "\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27"
|
||||
+ "\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87"
|
||||
+ "\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9"
|
||||
+ "\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39"
|
||||
+ "\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71"
|
||||
+ "\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81"
|
||||
+ "\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8"
|
||||
+ "\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE\\u1FBF\\u1FC3\\u1FC6"
|
||||
+ "\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3\\u1FEB\\u1FEE\\u1FEF"
|
||||
+ "\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000\\u2001\\u20D0-"
|
||||
+ "\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u2190\\u2192"
|
||||
+ "\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225"
|
||||
+ "\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272"
|
||||
+ "\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287"
|
||||
+ "\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329"
|
||||
+ "\\u232A\\u2ADC\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3046\\u304B"
|
||||
+ "\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
|
||||
+ "\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
|
||||
+ "\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
|
||||
+ "\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
|
||||
+ "\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
|
||||
+ "\\u30FD\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953"
|
||||
+ "\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0"
|
||||
+ "\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC"
|
||||
+ "\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8"
|
||||
+ "\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4"
|
||||
+ "\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0"
|
||||
+ "\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC"
|
||||
+ "\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8"
|
||||
+ "\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4"
|
||||
+ "\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0"
|
||||
+ "\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC"
|
||||
+ "\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8"
|
||||
+ "\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4"
|
||||
+ "\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0"
|
||||
+ "\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC"
|
||||
+ "\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8"
|
||||
+ "\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4"
|
||||
+ "\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0"
|
||||
+ "\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C"
|
||||
+ "\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98"
|
||||
+ "\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94"
|
||||
+ "\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090"
|
||||
+ "\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C"
|
||||
+ "\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288"
|
||||
+ "\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384"
|
||||
+ "\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480"
|
||||
+ "\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C"
|
||||
+ "\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678"
|
||||
+ "\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774"
|
||||
+ "\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870"
|
||||
+ "\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C"
|
||||
+ "\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68"
|
||||
+ "\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64"
|
||||
+ "\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60"
|
||||
+ "\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C"
|
||||
+ "\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58"
|
||||
+ "\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54"
|
||||
+ "\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050"
|
||||
+ "\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C"
|
||||
+ "\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248"
|
||||
+ "\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344"
|
||||
+ "\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440"
|
||||
+ "\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C"
|
||||
+ "\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638"
|
||||
+ "\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734"
|
||||
+ "\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-"
|
||||
+ "\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-"
|
||||
+ "\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-"
|
||||
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20"
|
||||
+ "-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A"
|
||||
+ "\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001"
|
||||
+ "D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD"
|
||||
+ "\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-"
|
||||
+ "\\U0002FA1D]", false);
|
||||
|
||||
skipSets[KD].applyPattern(
|
||||
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
|
||||
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
|
||||
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
|
||||
+ "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
|
||||
+ "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
|
||||
+ "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
|
||||
+ "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
|
||||
+ "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
|
||||
+ "-\\u0486\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
|
||||
+ "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
|
||||
+ "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
|
||||
+ "-\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
|
||||
+ "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
|
||||
+ "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
|
||||
+ "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
|
||||
+ "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
|
||||
+ "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
|
||||
+ "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
|
||||
+ "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
|
||||
+ "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
|
||||
+ "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
|
||||
+ "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
|
||||
+ "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u10FC\\u135F\\u1714\\u1734\\u17D2"
|
||||
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A"
|
||||
+ "\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43"
|
||||
+ "\\u1B44\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-"
|
||||
+ "\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E9B"
|
||||
+ "\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45"
|
||||
+ "\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-"
|
||||
+ "\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
|
||||
+ "\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-"
|
||||
+ "\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
|
||||
+ "\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
|
||||
+ "\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
|
||||
+ "\\u20E5-\\u20EF\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
|
||||
+ "\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
|
||||
+ "\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
|
||||
+ "\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u219A\\u219B\\u21AE"
|
||||
+ "\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
|
||||
+ "\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
|
||||
+ "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
|
||||
+ "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
|
||||
+ "\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
|
||||
+ "\\u2D6F\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
|
||||
+ "\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
|
||||
+ "\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
|
||||
+ "\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
|
||||
+ "\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
|
||||
+ "\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
|
||||
+ "\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
|
||||
+ "\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
|
||||
+ "\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
|
||||
+ "\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA806\\uAC00-"
|
||||
+ "\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
|
||||
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
|
||||
+ "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
|
||||
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
|
||||
+ "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
|
||||
+ "\\uFE19\\uFE20-\\uFE23\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
|
||||
+ "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
|
||||
+ "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
|
||||
+ "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010"
|
||||
+ "A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169"
|
||||
+ "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
|
||||
+ "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
|
||||
+ "D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
|
||||
+ "\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
|
||||
+ "A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
|
||||
+ "\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
|
||||
+ "D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
|
||||
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
|
||||
+ "D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
|
||||
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
|
||||
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
|
||||
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
|
||||
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
|
||||
+ "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
|
||||
+ "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
|
||||
+ "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
|
||||
+ "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
|
||||
+ "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
|
||||
+ "-\\u0487\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
|
||||
+ "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
|
||||
+ "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
|
||||
+ "-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
|
||||
+ "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
|
||||
+ "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
|
||||
+ "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
|
||||
+ "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
|
||||
+ "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
|
||||
+ "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
|
||||
+ "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
|
||||
+ "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
|
||||
+ "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
|
||||
+ "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
|
||||
+ "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714"
|
||||
+ "\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06"
|
||||
+ "\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40"
|
||||
+ "\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-"
|
||||
+ "\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78"
|
||||
+ "\\u1D9B-\\u1DE6\\u1DFE-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15"
|
||||
+ "\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57"
|
||||
+ "\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-"
|
||||
+ "\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-"
|
||||
+ "\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-"
|
||||
+ "\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-"
|
||||
+ "\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-"
|
||||
+ "\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-"
|
||||
+ "\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-"
|
||||
+ "\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D"
|
||||
+ "\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149"
|
||||
+ "\\u2153-\\u217F\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
|
||||
+ "\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D\\u222F\\u2230\\u2241"
|
||||
+ "\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
|
||||
+ "\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
|
||||
+ "\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2460-"
|
||||
+ "\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0"
|
||||
+ "-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
|
||||
+ "\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
|
||||
+ "\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
|
||||
+ "\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
|
||||
+ "\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
|
||||
+ "\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
|
||||
+ "\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
|
||||
+ "\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
|
||||
+ "\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
|
||||
+ "\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C"
|
||||
+ "\\uA67D\\uA770\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00-"
|
||||
+ "\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
|
||||
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
|
||||
+ "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
|
||||
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
|
||||
+ "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
|
||||
+ "\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
|
||||
+ "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
|
||||
+ "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
|
||||
+ "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
|
||||
+ "A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-"
|
||||
+ "\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001"
|
||||
+ "D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0"
|
||||
+ "\\U0001D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-"
|
||||
+ "\\U0001D49C\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4"
|
||||
+ "A6\\U0001D4A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB"
|
||||
+ "\\U0001D4BD-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-"
|
||||
+ "\\U0001D50A\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001"
|
||||
+ "D51E-\\U0001D539\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544"
|
||||
+ "\\U0001D546\\U0001D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001"
|
||||
+ "D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
|
||||
|
||||
skipSets[KC].applyPattern(
|
||||
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
|
||||
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
|
||||
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
|
||||
+ "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
|
||||
+ "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
|
||||
+ "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
|
||||
+ "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
|
||||
+ "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
|
||||
+ "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
|
||||
+ "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
|
||||
+ "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
|
||||
+ "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
|
||||
+ "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
|
||||
+ "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486"
|
||||
+ "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
|
||||
+ "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627"
|
||||
+ "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
|
||||
+ "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
|
||||
+ "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
|
||||
+ "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
|
||||
+ "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
|
||||
+ "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
|
||||
+ "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
|
||||
+ "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
|
||||
+ "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
|
||||
+ "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
|
||||
+ "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
|
||||
+ "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
|
||||
+ "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
|
||||
+ "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
|
||||
+ "\\u1025\\u102E\\u1037\\u1039\\u10FC\\u1100-\\u1112\\u1161-"
|
||||
+ "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
|
||||
+ "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
|
||||
+ "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
|
||||
+ "\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D"
|
||||
+ "\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-"
|
||||
+ "\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-"
|
||||
+ "\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-"
|
||||
+ "\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18"
|
||||
+ "\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49"
|
||||
+ "\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77"
|
||||
+ "\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91"
|
||||
+ "\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-"
|
||||
+ "\\u1FC1\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD"
|
||||
+ "-\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9"
|
||||
+ "\\u1FFB-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026"
|
||||
+ "\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049"
|
||||
+ "\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094"
|
||||
+ "\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2100-\\u2103"
|
||||
+ "\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D"
|
||||
+ "\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-"
|
||||
+ "\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2153-"
|
||||
+ "\\u217F\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
|
||||
+ "\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
|
||||
+ "\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
|
||||
+ "\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
|
||||
+ "\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
|
||||
+ "-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2D6F\\u2E9F\\u2EF3"
|
||||
+ "\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
|
||||
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
|
||||
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
|
||||
+ "\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
|
||||
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
|
||||
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
|
||||
+ "\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
|
||||
+ "\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
|
||||
+ "\\u3300-\\u33FF\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
|
||||
+ "\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
|
||||
+ "\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
|
||||
+ "\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
|
||||
+ "\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
|
||||
+ "\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
|
||||
+ "\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
|
||||
+ "\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
|
||||
+ "\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
|
||||
+ "\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
|
||||
+ "\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
|
||||
+ "\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
|
||||
+ "\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
|
||||
+ "\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
|
||||
+ "\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
|
||||
+ "\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
|
||||
+ "\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
|
||||
+ "\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
|
||||
+ "\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
|
||||
+ "\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
|
||||
+ "\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
|
||||
+ "\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
|
||||
+ "\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
|
||||
+ "\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
|
||||
+ "\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
|
||||
+ "\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
|
||||
+ "\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
|
||||
+ "\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
|
||||
+ "\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
|
||||
+ "\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
|
||||
+ "\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
|
||||
+ "\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
|
||||
+ "\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
|
||||
+ "\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
|
||||
+ "\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
|
||||
+ "\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
|
||||
+ "\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
|
||||
+ "\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
|
||||
+ "\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
|
||||
+ "\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
|
||||
+ "\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
|
||||
+ "\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
|
||||
+ "\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
|
||||
+ "\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
|
||||
+ "\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
|
||||
+ "\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
|
||||
+ "\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
|
||||
+ "\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
|
||||
+ "\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
|
||||
+ "\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE23"
|
||||
+ "\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
|
||||
+ "\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
|
||||
+ "\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
|
||||
+ "\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010A0F\\U00010A38-\\U00010"
|
||||
+ "A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
|
||||
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
|
||||
+ "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
|
||||
+ "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
|
||||
+ "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
|
||||
+ "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
|
||||
+ "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
|
||||
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
|
||||
+ "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
|
||||
+ "\\U0002F800-\\U0002FA1D]", false);
|
||||
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
|
||||
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
|
||||
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
|
||||
+ "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
|
||||
+ "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
|
||||
+ "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
|
||||
+ "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
|
||||
+ "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
|
||||
+ "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
|
||||
+ "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
|
||||
+ "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
|
||||
+ "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
|
||||
+ "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
|
||||
+ "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487"
|
||||
+ "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
|
||||
+ "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627"
|
||||
+ "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
|
||||
+ "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
|
||||
+ "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
|
||||
+ "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
|
||||
+ "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
|
||||
+ "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
|
||||
+ "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
|
||||
+ "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
|
||||
+ "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
|
||||
+ "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
|
||||
+ "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
|
||||
+ "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
|
||||
+ "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
|
||||
+ "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
|
||||
+ "\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112"
|
||||
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
|
||||
+ "\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
|
||||
+ "\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-\\u1D2E"
|
||||
+ "\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-"
|
||||
+ "\\u1DE6\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-"
|
||||
+ "\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-"
|
||||
+ "\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-"
|
||||
+ "\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38"
|
||||
+ "\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-"
|
||||
+ "\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80"
|
||||
+ "\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1"
|
||||
+ "\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1\\u1FC3\\u1FC6\\u1FC9"
|
||||
+ "\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-\\u1FDF\\u1FE3\\u1FEB"
|
||||
+ "\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000-"
|
||||
+ "\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
|
||||
+ "\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
|
||||
+ "\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
|
||||
+ "\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
|
||||
+ "\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
|
||||
+ "\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
|
||||
+ "\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u2190\\u2192\\u2194"
|
||||
+ "\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225\\u222C"
|
||||
+ "\\u222D\\u222F\\u2230\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
|
||||
+ "\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
|
||||
+ "\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
|
||||
+ "\\u22B2-\\u22B5\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-"
|
||||
+ "\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3"
|
||||
+ "\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
|
||||
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
|
||||
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
|
||||
+ "\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
|
||||
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
|
||||
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
|
||||
+ "\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
|
||||
+ "\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
|
||||
+ "\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D\\uA770\\uA806\\uA8C4\\uA92B"
|
||||
+ "-\\uA92D\\uA953\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8"
|
||||
+ "\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4"
|
||||
+ "\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0"
|
||||
+ "\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C"
|
||||
+ "\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098"
|
||||
+ "\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194"
|
||||
+ "\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290"
|
||||
+ "\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C"
|
||||
+ "\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488"
|
||||
+ "\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584"
|
||||
+ "\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680"
|
||||
+ "\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C"
|
||||
+ "\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878"
|
||||
+ "\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974"
|
||||
+ "\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70"
|
||||
+ "\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C"
|
||||
+ "\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68"
|
||||
+ "\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64"
|
||||
+ "\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60"
|
||||
+ "\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C"
|
||||
+ "\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058"
|
||||
+ "\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154"
|
||||
+ "\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250"
|
||||
+ "\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C"
|
||||
+ "\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448"
|
||||
+ "\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544"
|
||||
+ "\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640"
|
||||
+ "\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C"
|
||||
+ "\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838"
|
||||
+ "\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934"
|
||||
+ "\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30"
|
||||
+ "\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C"
|
||||
+ "\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28"
|
||||
+ "\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24"
|
||||
+ "\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20"
|
||||
+ "\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C"
|
||||
+ "\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018"
|
||||
+ "\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114"
|
||||
+ "\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210"
|
||||
+ "\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C"
|
||||
+ "\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408"
|
||||
+ "\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504"
|
||||
+ "\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600"
|
||||
+ "\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC"
|
||||
+ "\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12"
|
||||
+ "\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
|
||||
+ "\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17"
|
||||
+ "\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43"
|
||||
+ "\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-"
|
||||
+ "\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26\\uFE30-"
|
||||
+ "\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B\\uFE70-"
|
||||
+ "\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-\\uFFC7"
|
||||
+ "\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6"
|
||||
+ "\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-"
|
||||
+ "\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
|
||||
+ "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
|
||||
+ "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
|
||||
+ "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
|
||||
+ "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
|
||||
+ "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
|
||||
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
|
||||
+ "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
|
||||
+ "\\U0002F800-\\U0002FA1D]", false);
|
||||
|
||||
return skipSets;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2006, International Business Machines Corporation and *
|
||||
* Copyright (C) 2001-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -154,6 +154,8 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
|
||||
/**
|
||||
* Testing the methods first(), next(), next(int) and following() of RuleBasedBreakIterator
|
||||
* TODO: Most of this test should be retired, rule behavior is much better covered by
|
||||
* TestExtended, which is also easier to understand and maintain.
|
||||
**/
|
||||
public void TestFirstNextFollowing() {
|
||||
int p, q;
|
||||
@ -187,7 +189,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
errln("ERROR: next()/following() at last position returned #"
|
||||
+ p + " and " + q + " instead of" + testString.length() + "\n");
|
||||
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
|
||||
testString = "Write hindi here. \u092d\u093e\u0930\u0301 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
|
||||
testString = "Write hindi here. ";
|
||||
logln("testing char iter - string:- \"" + testString + "\"");
|
||||
charIter1.setText(testString);
|
||||
p = charIter1.first();
|
||||
@ -207,18 +209,6 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
p = q;
|
||||
q = charIter1.next(6);
|
||||
doTest(testString, p, q, 17, " here.");
|
||||
// hindi starts here
|
||||
p = q;
|
||||
q = charIter1.next(4);
|
||||
doTest(testString, p, q, 22, " \u092d\u093e\u0930\u0301"); // Nonsense, but compatible between old and new rules.
|
||||
p = q;
|
||||
q = charIter1.next(2);
|
||||
doTest(testString, p, q, 26, " \u0938\u0941\u0902");
|
||||
|
||||
q = charIter1.following(24);
|
||||
doTest(testString, 24, q, 26, "\u0941\u0902");
|
||||
q = charIter1.following(20);
|
||||
doTest(testString, 20, q, 22, "\u0930\u0301");
|
||||
p = charIter1.following(charIter1.last());
|
||||
q = charIter1.next(charIter1.last());
|
||||
if (p != BreakIterator.DONE || q != BreakIterator.DONE)
|
||||
@ -325,31 +315,6 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
p = wordIter1.preceding(wordIter1.first());
|
||||
if (p != BreakIterator.DONE)
|
||||
errln("ERROR: preceding() at starting position returned #" + p + " instead of 0");
|
||||
testString = "Write hindi here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u0301\u0964";
|
||||
logln("testing character iteration for string \" " + testString + "\" \n");
|
||||
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
|
||||
charIter1.setText(testString);
|
||||
p = charIter1.last();
|
||||
if (p != testString.length())
|
||||
errln("ERROR: first() returned" + p + "instead of" + testString.length());
|
||||
q = charIter1.previous();
|
||||
doTest(testString, p, q, 31, "\u0964");
|
||||
p = q;
|
||||
q = charIter1.previous();
|
||||
doTest(testString, p, q, 29, "\u0939\u0301");
|
||||
q = charIter1.preceding(26);
|
||||
doTest(testString, 26, q, 23, "\u0938\u0941\u0902");
|
||||
q = charIter1.preceding(16);
|
||||
doTest(testString, 16, q, 15, "e");
|
||||
p = q;
|
||||
q = charIter1.previous();
|
||||
doTest(testString, p, q, 14, "r");
|
||||
charIter1.first();
|
||||
p = charIter1.previous();
|
||||
q = charIter1.preceding(charIter1.first());
|
||||
if (p != BreakIterator.DONE || q != BreakIterator.DONE)
|
||||
errln("ERROR: previous()/preceding() at starting position returned #"
|
||||
+ p + " and " + q + " instead of 0\n");
|
||||
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000.";
|
||||
logln("testing sentence iter - String:- \"" + testString + "\"");
|
||||
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(Locale.getDefault());
|
||||
|
@ -9,7 +9,7 @@ package com.ibm.icu.dev.test.rbbi;
|
||||
//Regression testing of RuleBasedBreakIterator
|
||||
//
|
||||
// TODO: These tests should be mostly retired.
|
||||
// Much of the test data that was originaly here was removed when the RBBI rules
|
||||
// Much of the test data that was originally here was removed when the RBBI rules
|
||||
// were updated to match the Unicode boundary TRs, and the data was found to be invalid.
|
||||
// Much of the remaining data has been moved into the rbbitst.txt test data file,
|
||||
// which is common between ICU4C and ICU4J. The remaining test data should also be moved,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2007 International Business Machines Corporation and *
|
||||
* Copyright (C) 2003-2008 International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -59,6 +59,11 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
// Find the next break postion, starting from the specified position.
|
||||
// Return -1 after reaching end of string.
|
||||
abstract int next(int i);
|
||||
|
||||
// A Character Property, one of the constants defined in class UProperty.
|
||||
// The value fo this property will be displayed for the characters
|
||||
// near any test failure.
|
||||
int fCharProperty;
|
||||
}
|
||||
|
||||
|
||||
@ -71,6 +76,13 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
UnicodeSet fCRLFSet;
|
||||
UnicodeSet fControlSet;
|
||||
UnicodeSet fExtendSet;
|
||||
UnicodeSet fPrependSet;
|
||||
UnicodeSet fSpacingSet;
|
||||
UnicodeSet fLSet;
|
||||
UnicodeSet fVSet;
|
||||
UnicodeSet fTSet;
|
||||
UnicodeSet fLVSet;
|
||||
UnicodeSet fLVTSet;
|
||||
UnicodeSet fHangulSet;
|
||||
UnicodeSet fAnySet;
|
||||
|
||||
@ -79,34 +91,149 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
RBBICharMonkey() {
|
||||
fText = null;
|
||||
fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
|
||||
fCRLFSet = new UnicodeSet("[\\r\\n]");
|
||||
fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Extend}]");
|
||||
fHangulSet = new UnicodeSet(
|
||||
"[\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=T}" +
|
||||
"\\p{Hangul_Syllable_Type=LV}\\p{Hangul_Syllable_Type=LVT}]");
|
||||
fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
|
||||
fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]");
|
||||
fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]");
|
||||
fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]");
|
||||
fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]");
|
||||
fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]");
|
||||
fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]");
|
||||
fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]");
|
||||
fHangulSet = new UnicodeSet();
|
||||
fHangulSet.addAll(fLSet);
|
||||
fHangulSet.addAll(fVSet);
|
||||
fHangulSet.addAll(fTSet);
|
||||
fHangulSet.addAll(fLVSet);
|
||||
fHangulSet.addAll(fLVTSet);
|
||||
|
||||
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]");
|
||||
|
||||
fSets = new ArrayList();
|
||||
fSets.add(fCRLFSet);
|
||||
fSets.add(fControlSet);
|
||||
fSets.add(fExtendSet);
|
||||
fSets.add(fPrependSet);
|
||||
fSets.add(fSpacingSet);
|
||||
fSets.add(fHangulSet);
|
||||
fSets.add(fAnySet);
|
||||
}
|
||||
|
||||
|
||||
void setText(StringBuffer s) {
|
||||
fText = s;
|
||||
fText = s;
|
||||
}
|
||||
|
||||
List charClasses() {
|
||||
return fSets;
|
||||
}
|
||||
|
||||
int next(int i) {
|
||||
return nextGC(fText, i);
|
||||
}
|
||||
int next(int prevPos) {
|
||||
int p0, p1, p2, p3; // Indices of the significant code points around the
|
||||
// break position being tested. The candidate break
|
||||
// location is before p2.
|
||||
|
||||
int breakPos = -1;
|
||||
|
||||
int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3.
|
||||
|
||||
// Previous break at end of string. return DONE.
|
||||
if (prevPos >= fText.length()) {
|
||||
return -1;
|
||||
}
|
||||
p0 = p1 = p2 = p3 = prevPos;
|
||||
c3 = UTF16.charAt(fText, prevPos);
|
||||
c0 = c1 = c2 = 0;
|
||||
|
||||
// Loop runs once per "significant" character position in the input text.
|
||||
for (;;) {
|
||||
// Move all of the positions forward in the input string.
|
||||
p0 = p1; c0 = c1;
|
||||
p1 = p2; c1 = c2;
|
||||
p2 = p3; c2 = c3;
|
||||
|
||||
// Advancd p3 by one codepoint
|
||||
p3 = moveIndex32(fText, p3, 1);
|
||||
c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3);
|
||||
|
||||
if (p1 == p2) {
|
||||
// Still warming up the loop. (won't work with zero length strings, but we don't care)
|
||||
continue;
|
||||
}
|
||||
if (p2 == fText.length()) {
|
||||
// Reached end of string. Always a break position.
|
||||
break;
|
||||
}
|
||||
|
||||
// Rule GB3 CR x LF
|
||||
// No Extend or Format characters may appear between the CR and LF,
|
||||
// which requires the additional check for p2 immediately following p1.
|
||||
//
|
||||
if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB4). ( Control | CR | LF ) <break>
|
||||
if (fControlSet.contains(c1) ||
|
||||
c1 == 0x0D ||
|
||||
c1 == 0x0A) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Rule (GB5) <break> ( Control | CR | LF )
|
||||
//
|
||||
if (fControlSet.contains(c2) ||
|
||||
c2 == 0x0D ||
|
||||
c2 == 0x0A) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// Rule (GB6) L x ( L | V | LV | LVT )
|
||||
if (fLSet.contains(c1) &&
|
||||
(fLSet.contains(c2) ||
|
||||
fVSet.contains(c2) ||
|
||||
fLVSet.contains(c2) ||
|
||||
fLVTSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB7) ( LV | V ) x ( V | T )
|
||||
if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
|
||||
(fVSet.contains(c2) || fTSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB8) ( LVT | T) x T
|
||||
if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
|
||||
fTSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB9) Numeric x ALetter
|
||||
if (fExtendSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB9a) x SpacingMark
|
||||
if (fSpacingSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB9b) Prepend x
|
||||
if (fPrependSet.contains(c1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB10) Any <break> Any
|
||||
break;
|
||||
}
|
||||
|
||||
breakPos = p2;
|
||||
return breakPos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -121,8 +248,12 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
List fSets;
|
||||
StringBuffer fText;
|
||||
|
||||
UnicodeSet fCRSet;
|
||||
UnicodeSet fLFSet;
|
||||
UnicodeSet fNewlineSet;
|
||||
UnicodeSet fKatakanaSet;
|
||||
UnicodeSet fALetterSet;
|
||||
UnicodeSet fMidNumLetSet;
|
||||
UnicodeSet fMidLetterSet;
|
||||
UnicodeSet fMidNumSet;
|
||||
UnicodeSet fNumericSet;
|
||||
@ -133,22 +264,26 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
|
||||
RBBIWordMonkey() {
|
||||
fSets = new ArrayList();
|
||||
fCharProperty = UProperty.WORD_BREAK;
|
||||
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}" +
|
||||
"[\\p{Line_Break = Complex_Context}" +
|
||||
"-\\p{Grapheme_Cluster_Break = Extend}" +
|
||||
"-\\p{Grapheme_Cluster_Break = Control}]]");
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}-[\\uff9e\\uff9f]]");
|
||||
fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]");
|
||||
fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]");
|
||||
fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]");
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]");
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
|
||||
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
|
||||
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
|
||||
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
|
||||
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]");
|
||||
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
|
||||
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}\\uff9e\\uff9f]");
|
||||
fOtherSet = new UnicodeSet();
|
||||
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
|
||||
|
||||
fOtherSet = new UnicodeSet();
|
||||
fOtherSet.complement();
|
||||
fOtherSet.removeAll(fCRSet);
|
||||
fOtherSet.removeAll(fLFSet);
|
||||
fOtherSet.removeAll(fNewlineSet);
|
||||
fOtherSet.removeAll(fALetterSet);
|
||||
fOtherSet.removeAll(fKatakanaSet);
|
||||
fOtherSet.removeAll(fMidLetterSet);
|
||||
@ -157,10 +292,17 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fOtherSet.removeAll(fFormatSet);
|
||||
fOtherSet.removeAll(fExtendSet);
|
||||
fOtherSet.removeAll(fExtendNumLetSet);
|
||||
// Inhibit dictionary characters from being tested at all.
|
||||
fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
|
||||
|
||||
fSets = new ArrayList();
|
||||
fSets.add(fCRSet);
|
||||
fSets.add(fLFSet);
|
||||
fSets.add(fNewlineSet);
|
||||
fSets.add(fALetterSet);
|
||||
fSets.add(fKatakanaSet);
|
||||
fSets.add(fMidLetterSet);
|
||||
fSets.add(fMidNumLetSet);
|
||||
fSets.add(fMidNumSet);
|
||||
fSets.add(fNumericSet);
|
||||
fSets.add(fFormatSet);
|
||||
@ -186,7 +328,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3.
|
||||
|
||||
// Prev break at end of string. return DONE.
|
||||
// Previous break at end of string. return DONE.
|
||||
if (prevPos >= fText.length()) {
|
||||
return -1;
|
||||
}
|
||||
@ -204,6 +346,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
p2 = p3; c2 = c3;
|
||||
|
||||
// Advancd p3 by X(Extend | Format)* Rule 4
|
||||
// But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
|
||||
do {
|
||||
p3 = moveIndex32(fText, p3, 1);
|
||||
c3 = -1;
|
||||
@ -211,7 +354,10 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
break;
|
||||
}
|
||||
c3 = UTF16.charAt(fText, p3);
|
||||
}
|
||||
if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3));
|
||||
|
||||
if (p1 == p2) {
|
||||
@ -227,28 +373,37 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
// No Extend or Format characters may appear between the CR and LF,
|
||||
// which requires the additional check for p2 immediately following p1.
|
||||
//
|
||||
if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
|
||||
if (c1==0x0D && c2==0x0A) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (3a) Break before and after newlines (including CR and LF)
|
||||
//
|
||||
if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
|
||||
break;
|
||||
};
|
||||
if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
|
||||
break;
|
||||
};
|
||||
|
||||
// Rule (5). ALetter x ALetter
|
||||
if (fALetterSet.contains(c1) &&
|
||||
fALetterSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (6) ALetter x MidLetter ALetter
|
||||
// Rule (6) ALetter x (MidLetter | MidNumLet) ALetter
|
||||
//
|
||||
if ( fALetterSet.contains(c1) &&
|
||||
fMidLetterSet.contains(c2) &&
|
||||
(fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
|
||||
setContains(fALetterSet, c3)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Rule (7) ALetter MidLetter x ALetter
|
||||
// Rule (7) ALetter (MidLetter | MidNumLet) x ALetter
|
||||
if (fALetterSet.contains(c0) &&
|
||||
fMidLetterSet.contains(c1) &&
|
||||
(fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
|
||||
fALetterSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
@ -273,14 +428,14 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
// Rule (11) Numeric (MidNum | MidNumLet) x Numeric
|
||||
if ( fNumericSet.contains(c0) &&
|
||||
fMidNumSet.contains(c1) &&
|
||||
(fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
|
||||
fNumericSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (12) Numeric x (MidNum | MidNumLet) Numeric
|
||||
if (fNumericSet.contains(c1) &&
|
||||
fMidNumSet.contains(c2) &&
|
||||
(fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
|
||||
setContains(fNumericSet, c3)) {
|
||||
continue;
|
||||
}
|
||||
@ -363,6 +518,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
RBBILineMonkey()
|
||||
{
|
||||
fCharProperty = UProperty.LINE_BREAK;
|
||||
fSets = new ArrayList();
|
||||
|
||||
fBK = new UnicodeSet("[\\p{Line_Break=BK}]");
|
||||
@ -401,6 +557,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fH3 = new UnicodeSet("[\\p{Line_break=H3}]");
|
||||
fSG = new UnicodeSet("[\\ud800-\\udfff]");
|
||||
fXX = new UnicodeSet("[\\p{Line_break=XX}]");
|
||||
|
||||
|
||||
fAL.addAll(fXX); // Default behavior for XX is identical to AL
|
||||
fAL.addAll(fAI); // Default behavior for AI is identical to AL
|
||||
@ -590,13 +747,20 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
|
||||
// LB 12
|
||||
// (!SP) x GL
|
||||
// GL x
|
||||
if ((!fSP.contains(prevChar)) && fGL.contains(thisChar) ||
|
||||
fGL.contains(prevChar)) {
|
||||
if (fGL.contains(prevChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 12a
|
||||
// [^SP BA HY] x GL
|
||||
if (!(fSP.contains(prevChar) ||
|
||||
fBA.contains(prevChar) ||
|
||||
fHY.contains(prevChar) ) && fGL.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// LB 13 Don't break before closings.
|
||||
// NU x CL and NU x IS are not matched here so that they will
|
||||
@ -611,7 +775,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
// LB 14 Don't break after OP SP*
|
||||
// Scan backwards, checking for this sequence.
|
||||
// The OP char could include combining marks, so we acually check for
|
||||
// The OP char could include combining marks, so we actually check for
|
||||
// OP CM* SP* x
|
||||
tPos = prevPos;
|
||||
if (fSP.contains(prevChar)) {
|
||||
@ -626,7 +790,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 15 Do not break withing "[
|
||||
// LB 15 Do not break within "[
|
||||
// QU CM* SP* x OP
|
||||
if (fOP.contains(thisChar)) {
|
||||
// Scan backwards from prevChar to see if it is preceded by QU CM* SP*
|
||||
@ -796,19 +960,8 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 30 Do not break between letters, numbers or oridnary symbols and
|
||||
// opening or closing punctuation.
|
||||
// (AL | NU) x OP
|
||||
// CL x (AL | NU)
|
||||
if ((fAL.contains(prevChar) || fNU.contains(prevChar)) &&
|
||||
fOP.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
if (fCL.contains(prevChar) &&
|
||||
(fAL.contains(thisChar) || fNU.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 30 (Withdrawn as of Unicode 5.1)
|
||||
|
||||
// LB 31 Break everywhere else
|
||||
break;
|
||||
}
|
||||
@ -980,6 +1133,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
UnicodeSet fOLetterSet;
|
||||
UnicodeSet fNumericSet;
|
||||
UnicodeSet fATermSet;
|
||||
UnicodeSet fSContinueSet;
|
||||
UnicodeSet fSTermSet;
|
||||
UnicodeSet fCloseSet;
|
||||
UnicodeSet fOtherSet;
|
||||
@ -988,19 +1142,25 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
|
||||
RBBISentenceMonkey() {
|
||||
fCharProperty = UProperty.SENTENCE_BREAK;
|
||||
|
||||
fSets = new ArrayList();
|
||||
|
||||
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep}]");
|
||||
// Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator
|
||||
// set and made into character classes of their own. For the monkey impl,
|
||||
// they remain in SEP, since Sep always appears with CR and LF in the rules.
|
||||
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]");
|
||||
fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]");
|
||||
fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]");
|
||||
fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]");
|
||||
fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]");
|
||||
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}-[\\uff9e\\uff9f]]");
|
||||
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]");
|
||||
fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]");
|
||||
fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]");
|
||||
fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]");
|
||||
fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]");
|
||||
fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Extend}\\uff9e\\uff9f]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]");
|
||||
fOtherSet = new UnicodeSet();
|
||||
|
||||
|
||||
@ -1013,6 +1173,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fOtherSet.removeAll(fOLetterSet);
|
||||
fOtherSet.removeAll(fNumericSet);
|
||||
fOtherSet.removeAll(fATermSet);
|
||||
fOtherSet.removeAll(fSContinueSet);
|
||||
fOtherSet.removeAll(fSTermSet);
|
||||
fOtherSet.removeAll(fCloseSet);
|
||||
fOtherSet.removeAll(fExtendSet);
|
||||
@ -1026,6 +1187,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fSets.add(fOLetterSet);
|
||||
fSets.add(fNumericSet);
|
||||
fSets.add(fATermSet);
|
||||
fSets.add(fSContinueSet);
|
||||
fSets.add(fSTermSet);
|
||||
fSets.add(fCloseSet);
|
||||
fSets.add(fOtherSet);
|
||||
@ -1170,8 +1332,8 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 8a (STerm | ATerm) Close* Sp* x (Sterm | ATerm)
|
||||
if (fSTermSet.contains(c2) || fATermSet.contains(c2)) {
|
||||
// Rule 8a (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
|
||||
if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
|
||||
p8 = p1;
|
||||
while (setContains(fSpSet, cAt(p8))) {
|
||||
p8 = moveBack(p8);
|
||||
@ -1186,7 +1348,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
}
|
||||
|
||||
|
||||
// Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep)
|
||||
// Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep | CR | LF)
|
||||
int p9 = p1;
|
||||
while (p9>0 && fCloseSet.contains(cAt(p9))) {
|
||||
p9 = moveBack(p9);
|
||||
@ -1198,7 +1360,7 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
// Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep)
|
||||
// Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep | CR | LF)
|
||||
int p10 = p1;
|
||||
while (p10>0 && fSpSet.contains(cAt(p10))) {
|
||||
p10 = moveBack(p10);
|
||||
@ -1214,6 +1376,9 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
|
||||
// Rule (11) (STerm | ATerm) Close* Sp* <break>
|
||||
int p11 = p1;
|
||||
if (p11>0 && fSepSet.contains(cAt(p11))) {
|
||||
p11 = moveBack(p11);
|
||||
}
|
||||
while (p11>0 && fSpSet.contains(cAt(p11))) {
|
||||
p11 = moveBack(p11);
|
||||
}
|
||||
@ -1317,175 +1482,12 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// The following UnicodeSets are used in matching a Grapheme Cluster
|
||||
//
|
||||
private static UnicodeSet GC_Control;
|
||||
|
||||
private static UnicodeSet GC_Extend ;
|
||||
|
||||
private static UnicodeSet GC_L ;
|
||||
|
||||
private static UnicodeSet GC_V ;
|
||||
|
||||
private static UnicodeSet GC_T ;
|
||||
|
||||
private static UnicodeSet GC_LV;
|
||||
|
||||
private static UnicodeSet GC_LVT ;
|
||||
|
||||
protected void init()throws Exception{
|
||||
GC_Control = new UnicodeSet("[[:Zl:][:Zp:][:Cc:][:Cf:]-[\\u000d\\u000a]-[\\p{Grapheme_Cluster_Break=Extend}]]");
|
||||
|
||||
GC_Extend = new UnicodeSet("[\\p{Grapheme_Cluster_Break=Extend}]");
|
||||
|
||||
GC_L = new UnicodeSet("[[:Hangul_Syllable_Type=L:]]");
|
||||
|
||||
GC_V = new UnicodeSet("[[:Hangul_Syllable_Type=V:]]");
|
||||
|
||||
GC_T = new UnicodeSet("[[:Hangul_Syllable_Type=T:]]");
|
||||
|
||||
GC_LV = new UnicodeSet("[[:Hangul_Syllable_Type=LV:]]");
|
||||
|
||||
GC_LVT = new UnicodeSet("[[:Hangul_Syllable_Type=LVT:]]");
|
||||
}
|
||||
/**
|
||||
* Find the end of the extent of a grapheme cluster.
|
||||
* This is the reference implementation used by the monkey test for comparison
|
||||
* with the RBBI results.
|
||||
* @param s The string containing the text to be analyzed
|
||||
* @param i The index of the start of the grapheme cluster.
|
||||
* @return The index of the first code point following the grapheme cluster
|
||||
* @internal
|
||||
*/
|
||||
private static int nextGC(StringBuffer s, int i) {
|
||||
if (i >= s.length() || i == -1 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int c = UTF16.charAt(s, i);
|
||||
int pos = i;
|
||||
|
||||
if (c == 0x0d) {
|
||||
pos = nextCP(s, i);
|
||||
if (pos >= s.length()) {
|
||||
return pos;
|
||||
}
|
||||
c = UTF16.charAt(s, pos);
|
||||
if (c == 0x0a) {
|
||||
pos = nextCP(s, pos);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
if (GC_Control.contains(c) || c == 0x0a) {
|
||||
pos = nextCP(s, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Little state machine to consume Hangul Syllables
|
||||
int hangulState = 1;
|
||||
state_loop: for (;;) {
|
||||
switch (hangulState) {
|
||||
case 1:
|
||||
if (GC_L.contains(c)) {
|
||||
hangulState = 2;
|
||||
break;
|
||||
}
|
||||
if (GC_V.contains(c) || GC_LV.contains(c)) {
|
||||
hangulState = 3;
|
||||
break;
|
||||
}
|
||||
if (GC_T.contains(c) || GC_LVT.contains(c)) {
|
||||
hangulState = 4;
|
||||
break;
|
||||
}
|
||||
break state_loop;
|
||||
case 2:
|
||||
if (GC_L.contains(c)) {
|
||||
// continue in state 2.
|
||||
break;
|
||||
}
|
||||
if (GC_V.contains(c) || GC_LV.contains(c)) {
|
||||
hangulState = 3;
|
||||
break;
|
||||
}
|
||||
if (GC_LVT.contains(c)) {
|
||||
hangulState = 4;
|
||||
break;
|
||||
}
|
||||
if (GC_Extend.contains(c)) {
|
||||
hangulState = 5;
|
||||
break;
|
||||
}
|
||||
break state_loop;
|
||||
case 3:
|
||||
if (GC_V.contains(c)) {
|
||||
// continue in state 3;
|
||||
break;
|
||||
}
|
||||
if (GC_T.contains(c)) {
|
||||
hangulState = 4;
|
||||
break;
|
||||
}
|
||||
if (GC_Extend.contains(c)) {
|
||||
hangulState = 5;
|
||||
break;
|
||||
}
|
||||
break state_loop;
|
||||
case 4:
|
||||
if (GC_T.contains(c)) {
|
||||
// continue in state 4
|
||||
break;
|
||||
}
|
||||
if (GC_Extend.contains(c)) {
|
||||
hangulState = 5;
|
||||
break;
|
||||
}
|
||||
break state_loop;
|
||||
case 5:
|
||||
if (GC_Extend.contains(c)) {
|
||||
hangulState = 5;
|
||||
break;
|
||||
}
|
||||
break state_loop;
|
||||
}
|
||||
// We have exited the switch statement, but are still in the loop.
|
||||
// Still in a Hangul Syllable, advance to the next code point.
|
||||
pos = nextCP(s, pos);
|
||||
if (pos >= s.length()) {
|
||||
break;
|
||||
}
|
||||
c = UTF16.charAt(s, pos);
|
||||
} // end of loop
|
||||
|
||||
if (hangulState != 1) {
|
||||
// We found a Hangul. We're done.
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Ordinary characters. Consume one codepoint unconditionally, then any following Extends.
|
||||
for (;;) {
|
||||
pos = nextCP(s, pos);
|
||||
if (pos >= s.length()) {
|
||||
break;
|
||||
}
|
||||
c = UTF16.charAt(s, pos);
|
||||
if (GC_Extend.contains(c) == false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* random number generator. Not using Java's built-in Randoms for two reasons:
|
||||
* 1. Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
|
||||
* 2. We need to get and restore the seed from values occuring in the middle
|
||||
* 2. We need to get and restore the seed from values occurring in the middle
|
||||
* of a long sequence, to more easily reproduce failing cases.
|
||||
*/
|
||||
private static int m_seed = 1;
|
||||
@ -1495,6 +1497,42 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
return (int)(m_seed >>> 16) % 32768;
|
||||
}
|
||||
|
||||
// Helper function for formatting error output.
|
||||
// Append a string into a fixed-size field in a StringBuffer.
|
||||
// Blank-pad the string if it is shorter than the field.
|
||||
// Truncate the source string if it is too long.
|
||||
//
|
||||
private static void appendToBuf(StringBuilder dest, String src, int fieldLen) {
|
||||
int appendLen = src.length();
|
||||
if (appendLen >= fieldLen) {
|
||||
appendLen = fieldLen;
|
||||
}
|
||||
dest.append(src, 0, appendLen);
|
||||
while (appendLen < fieldLen) {
|
||||
dest.append(' ');
|
||||
appendLen++;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function for formatting error output.
|
||||
// Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
|
||||
private static void appendCharToBuf(StringBuilder dest, int c, int fieldLen) {
|
||||
String hexChars = "0123456789abcdef";
|
||||
if (c < 0x10000) {
|
||||
dest.append("\\u");
|
||||
for (int bn=12; bn>=0; bn-=4) {
|
||||
dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
appendToBuf(dest, " ", fieldLen-6);
|
||||
} else {
|
||||
dest.append("\\U");
|
||||
for (int bn=28; bn>=0; bn-=4) {
|
||||
dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
appendToBuf(dest, " ", fieldLen-10);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a RBBI monkey test. Common routine, for all break iterator types.
|
||||
@ -1748,33 +1786,31 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
|
||||
}
|
||||
|
||||
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
|
||||
StringBuffer errorText = new StringBuffer();
|
||||
errorText.append("<data>");
|
||||
StringBuilder errorText = new StringBuilder();
|
||||
|
||||
String hexChars = "0123456789abcdef";
|
||||
int c; // Char from test data
|
||||
int bn;
|
||||
for (ci = startContext; ci <= endContext && ci != -1; ci = nextCP(testText, ci)) {
|
||||
if (ci == i) {
|
||||
// This is the location of the error.
|
||||
errorText.append("<?>");
|
||||
errorText.append("<?>---------------------------------\n");
|
||||
} else if (expectedBreaks[ci]) {
|
||||
// This a non-error expected break position.
|
||||
errorText.append("<>");
|
||||
errorText.append("------------------------------------\n");
|
||||
}
|
||||
if (ci < testText.length()) {
|
||||
c = UTF16.charAt(testText, ci);
|
||||
if (c < 0x10000) {
|
||||
errorText.append("\\u");
|
||||
for (bn=12; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
} else {
|
||||
errorText.append("\\U");
|
||||
for (bn=28; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
}
|
||||
appendCharToBuf(errorText, c, 11);
|
||||
String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
|
||||
appendToBuf(errorText, gc, 8);
|
||||
int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
|
||||
String extraPropValue =
|
||||
UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
|
||||
appendToBuf(errorText, extraPropValue, 20);
|
||||
|
||||
String charName = UCharacter.getExtendedName(c);
|
||||
appendToBuf(errorText, charName, 40);
|
||||
errorText.append('\n');
|
||||
}
|
||||
}
|
||||
if (ci == testText.length() && ci != -1) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2001-2006 International Business Machines
|
||||
# Copyright (c) 2001-2008 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RBBI Test Data
|
||||
@ -75,14 +75,14 @@ Hi! •This is a simple sample sentence. •It does not have to make any sense a
|
||||
|
||||
|
||||
# Hindi combining chars. (An old test)
|
||||
<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
|
||||
•\u0939•\u094c•\u0964•</data>
|
||||
<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
|
||||
# TODO: Update these tests for Unicode 5.1 Extended Grapheme clusters
|
||||
#<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
|
||||
#•\u0939•\u094c•\u0964•</data>
|
||||
#<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
|
||||
|
||||
|
||||
# Bug 1587. Tamil. \u0baa\u0bc1 should be two separate characters, even though
|
||||
# Hyangmi would perfer that it be one.
|
||||
<data>•\u0baa•\u0bc1•\u0baa•\u0bc1•</data>
|
||||
# Bug 1587. Tamil. \u0baa\u0bc1 is an Extended Grpaheme Cluster
|
||||
<data>•\u0baa\u0bc1•\u0baa\u0bc1•</data>
|
||||
|
||||
# Regression test for bug 1889
|
||||
<data>•\u0f40\u0f7d•\u0000•\u0f7e•</data>
|
||||
@ -485,7 +485,10 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
|
||||
|
||||
# Regression for bug 836
|
||||
<data>•AAA(AAA •</data>
|
||||
# Note: Unicode 5.1 changed this behavior
|
||||
# ICU will want to change it back before releasing,
|
||||
# so there is no break preceding the '('
|
||||
<data>•AAA•(AAA •</data>
|
||||
|
||||
# Try some words from other scripts.
|
||||
# Greek, Cyrillic, Hebrew, Arabic, Arabic, Georgian, Latin
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -200,15 +200,15 @@ public class UnicodeSetTest extends TestFmwk {
|
||||
// Cover applyPattern, applyPropertyAlias
|
||||
s.clear();
|
||||
s.applyPattern("[ab ]", true);
|
||||
expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab"});
|
||||
expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
|
||||
s.clear();
|
||||
s.applyPattern("[ab ]", false);
|
||||
expectToPattern(s, "[\\\u0020ab]", new String[] {"a", "\u0020", NOT, "ab"});
|
||||
expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
|
||||
|
||||
s.clear();
|
||||
s.applyPropertyAlias("nv", "0.5");
|
||||
expectToPattern(s, "[\\u00BD\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
|
||||
// Unicode 4.1 adds \u2CFD\U00010141\U00010175\U00010176 with numeric value 1/2
|
||||
expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
|
||||
// Unicode 5.1 adds Malayalam 1/2 (\u0D74)
|
||||
|
||||
s.clear();
|
||||
s.applyPropertyAlias("gc", "Lu");
|
||||
@ -1252,7 +1252,7 @@ public class UnicodeSetTest extends TestFmwk {
|
||||
// selector, input, output
|
||||
CASE,
|
||||
"[aq\u00DF{Bc}{bC}{Fi}]",
|
||||
"[aAqQ\u00DF\uFB01{ss}{bc}{fi}]",
|
||||
"[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
|
||||
|
||||
CASE,
|
||||
"[\u01F1]", // 'DZ'
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -318,15 +318,15 @@ public final class UCharacterProperty
|
||||
new BinaryProperties( 1, ( 1 << XID_CONTINUE_PROPERTY_) ),
|
||||
new BinaryProperties( 1, ( 1 << XID_START_PROPERTY_) ),
|
||||
new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CASE_SENSITIVE */
|
||||
new BinaryProperties( 2, ( 1 << V2_S_TERM_PROPERTY_) ),
|
||||
new BinaryProperties( 2, ( 1 << V2_VARIATION_SELECTOR_PROPERTY_) ),
|
||||
new BinaryProperties( 1, ( 1 << S_TERM_PROPERTY_) ),
|
||||
new BinaryProperties( 1, ( 1 << VARIATION_SELECTOR_PROPERTY_) ),
|
||||
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFD_INERT */
|
||||
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFKD_INERT */
|
||||
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFC_INERT */
|
||||
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFKC_INERT */
|
||||
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_SEGMENT_STARTER */
|
||||
new BinaryProperties( 2, ( 1 << V2_PATTERN_SYNTAX) ),
|
||||
new BinaryProperties( 2, ( 1 << V2_PATTERN_WHITE_SPACE) ),
|
||||
new BinaryProperties( 1, ( 1 << PATTERN_SYNTAX) ),
|
||||
new BinaryProperties( 1, ( 1 << PATTERN_WHITE_SPACE) ),
|
||||
new BinaryProperties( SRC_CHAR_AND_PROPSVEC, 0 ), /* UCHAR_POSIX_ALNUM */
|
||||
new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_BLANK */
|
||||
new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_GRAPH */
|
||||
@ -798,38 +798,36 @@ public final class UCharacterProperty
|
||||
* ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
|
||||
*/
|
||||
private static final int WHITE_SPACE_PROPERTY_ = 0;
|
||||
//private static final int BIDI_CONTROL_PROPERTY_ = 1;
|
||||
//private static final int JOIN_CONTROL_PROPERTY_ = 2;
|
||||
private static final int DASH_PROPERTY_ = 3;
|
||||
private static final int HYPHEN_PROPERTY_ = 4;
|
||||
private static final int QUOTATION_MARK_PROPERTY_ = 5;
|
||||
private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 6;
|
||||
private static final int MATH_PROPERTY_ = 7;
|
||||
private static final int HEX_DIGIT_PROPERTY_ = 8;
|
||||
private static final int ASCII_HEX_DIGIT_PROPERTY_ = 9;
|
||||
private static final int ALPHABETIC_PROPERTY_ = 10;
|
||||
private static final int IDEOGRAPHIC_PROPERTY_ = 11;
|
||||
private static final int DIACRITIC_PROPERTY_ = 12;
|
||||
private static final int EXTENDER_PROPERTY_ = 13;
|
||||
//private static final int LOWERCASE_PROPERTY_ = 14;
|
||||
//private static final int UPPERCASE_PROPERTY_ = 15;
|
||||
private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 16;
|
||||
private static final int GRAPHEME_EXTEND_PROPERTY_ = 17;
|
||||
private static final int GRAPHEME_LINK_PROPERTY_ = 18;
|
||||
private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 19;
|
||||
private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 20;
|
||||
private static final int RADICAL_PROPERTY_ = 21;
|
||||
private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 22;
|
||||
private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 23;
|
||||
private static final int DEPRECATED_PROPERTY_ = 24;
|
||||
//private static final int SOFT_DOTTED_PROPERTY_ = 25;
|
||||
private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 26;
|
||||
private static final int XID_START_PROPERTY_ = 27;
|
||||
private static final int XID_CONTINUE_PROPERTY_ = 28;
|
||||
private static final int ID_START_PROPERTY_ = 29;
|
||||
private static final int ID_CONTINUE_PROPERTY_ = 30;
|
||||
private static final int GRAPHEME_BASE_PROPERTY_ = 31;
|
||||
//private static final int BINARY_1_TOP_PROPERTY_ = 32;
|
||||
private static final int DASH_PROPERTY_ = 1;
|
||||
private static final int HYPHEN_PROPERTY_ = 2;
|
||||
private static final int QUOTATION_MARK_PROPERTY_ = 3;
|
||||
private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
|
||||
private static final int MATH_PROPERTY_ = 5;
|
||||
private static final int HEX_DIGIT_PROPERTY_ = 6;
|
||||
private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
|
||||
private static final int ALPHABETIC_PROPERTY_ = 8;
|
||||
private static final int IDEOGRAPHIC_PROPERTY_ = 9;
|
||||
private static final int DIACRITIC_PROPERTY_ = 10;
|
||||
private static final int EXTENDER_PROPERTY_ = 11;
|
||||
private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
|
||||
private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
|
||||
private static final int GRAPHEME_LINK_PROPERTY_ = 14;
|
||||
private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
|
||||
private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
|
||||
private static final int RADICAL_PROPERTY_ = 17;
|
||||
private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
|
||||
private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
|
||||
private static final int DEPRECATED_PROPERTY_ = 20;
|
||||
private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
|
||||
private static final int XID_START_PROPERTY_ = 22;
|
||||
private static final int XID_CONTINUE_PROPERTY_ = 23;
|
||||
private static final int ID_START_PROPERTY_ = 24;
|
||||
private static final int ID_CONTINUE_PROPERTY_ = 25;
|
||||
private static final int GRAPHEME_BASE_PROPERTY_ = 26;
|
||||
private static final int S_TERM_PROPERTY_ = 27;
|
||||
private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
|
||||
private static final int PATTERN_SYNTAX = 29; /* new in ICU 3.4 and Unicode 4.1 */
|
||||
private static final int PATTERN_WHITE_SPACE = 30;
|
||||
|
||||
/**
|
||||
* First nibble shift
|
||||
@ -844,11 +842,6 @@ public final class UCharacterProperty
|
||||
*/
|
||||
private static final int AGE_SHIFT_ = 24;
|
||||
|
||||
// boolean properties in vector word 2
|
||||
private static final int V2_S_TERM_PROPERTY_ = 24;
|
||||
private static final int V2_VARIATION_SELECTOR_PROPERTY_ = 25;
|
||||
private static final int V2_PATTERN_SYNTAX = 26; /* new in ICU 3.4 and Unicode 4.1 */
|
||||
private static final int V2_PATTERN_WHITE_SPACE = 27;
|
||||
|
||||
// private constructors --------------------------------------------------
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -157,7 +157,7 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate
|
||||
* Format version; this code works with all versions with the same major
|
||||
* version number and the same Trie bit distribution.
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x4, (byte)0,
|
||||
private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x5, (byte)0,
|
||||
(byte)Trie.INDEX_STAGE_1_SHIFT_,
|
||||
(byte)Trie.INDEX_STAGE_2_SHIFT_};
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ed7c7aef0520ae3182b6c7b9ab01ecc3c40e2af9ed46f68943abbec87ce85300
|
||||
size 5412788
|
||||
oid sha256:3e092ba77dd3f34ebab38fdb9a23ebbb8f23089cba6323f2d941d40c92c59cfe
|
||||
size 5521414
|
||||
|
@ -854,11 +854,113 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
|
||||
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int SUNDANESE_ID = 155; /* [1B80] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int LEPCHA_ID = 156; /* [1C00] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int OL_CHIKI_ID = 157; /* [1C50] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int VAI_ID = 159; /* [A500] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int SAURASHTRA_ID = 161; /* [A880] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int KAYAH_LI_ID = 162; /* [A900] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int REJANG_ID = 163; /* [A930] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int CHAM_ID = 164; /* [AA00] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int LYCIAN_ID = 167; /* [10280] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int CARIAN_ID = 168; /* [102A0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int LYDIAN_ID = 169; /* [10920] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int DOMINO_TILES_ID = 171; /* [1F030] */
|
||||
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int COUNT = 155;
|
||||
public static final int COUNT = 172;
|
||||
|
||||
// blocks objects ---------------------------------------------------
|
||||
|
||||
@ -1674,7 +1776,108 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
|
||||
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
|
||||
|
||||
/**
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
@ -1752,13 +1955,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
for (int i = 0; i < BLOCKS_.length; ++i) {
|
||||
UnicodeBlock b = BLOCKS_[i];
|
||||
String name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG);
|
||||
m.put(name.toUpperCase(), b);
|
||||
m.put(name.replace('_',' ').toUpperCase(), b);
|
||||
m.put(b.toString().toUpperCase(), b);
|
||||
name = name.toUpperCase().replace(" ", "").replace("_", "").replace("-", "");
|
||||
m.put(name, b);
|
||||
}
|
||||
mref = new SoftReference(m);
|
||||
}
|
||||
UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase());
|
||||
UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase().replace(" ", "").replace("_", "").replace("-", ""));
|
||||
if (b == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
@ -1875,7 +2077,26 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
PHOENICIAN,
|
||||
CUNEIFORM,
|
||||
CUNEIFORM_NUMBERS_AND_PUNCTUATION,
|
||||
COUNTING_ROD_NUMERALS
|
||||
COUNTING_ROD_NUMERALS,
|
||||
|
||||
/* New blocks in Unicode 5.8 */
|
||||
SUNDANESE,
|
||||
LEPCHA,
|
||||
OL_CHIKI,
|
||||
CYRILLIC_EXTENDED_A,
|
||||
VAI,
|
||||
CYRILLIC_EXTENDED_B,
|
||||
SAURASHTRA,
|
||||
KAYAH_LI,
|
||||
REJANG,
|
||||
CHAM,
|
||||
ANCIENT_SYMBOLS,
|
||||
PHAISTOS_DISC,
|
||||
LYCIAN,
|
||||
CARIAN,
|
||||
LYDIAN,
|
||||
MAHJONG_TILES,
|
||||
DOMINO_TILES,
|
||||
};
|
||||
|
||||
static {
|
||||
@ -2285,11 +2506,15 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
/**
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
public static final int ZHAIN =53;
|
||||
public static final int ZHAIN = 53;
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 54;
|
||||
public static final int BURUSHASKI_YEH_BARREE = 54;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 55;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2338,10 +2563,18 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
public static final int V = 9;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int SPACING_MARK = 10;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int PREPEND = 11;
|
||||
/**
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
public static final int COUNT = 10;
|
||||
public static final int COUNT = 12;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2383,9 +2616,29 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
*/
|
||||
public static final int EXTENDNUMLET = 7;
|
||||
/**
|
||||
* @stable ICU 3.8
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 8;
|
||||
public static final int CR = 8;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int EXTEND = 9;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int LF = 10;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int MIDNUMLEFT = 11;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int NEWLINE = 12;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 13;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2439,9 +2692,25 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
*/
|
||||
public static final int UPPER = 10;
|
||||
/**
|
||||
* @stable ICU 3.8
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 11;
|
||||
public static final int CR = 11;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int EXTEND = 12;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int LF = 13;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int SCONTINUE = 14;
|
||||
/**
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
public static final int COUNT = 15;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -5014,7 +5283,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
case UProperty.JOINING_TYPE:
|
||||
return gBdp.getJoiningType(ch);
|
||||
case UProperty.LINE_BREAK:
|
||||
return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_;
|
||||
return (int)(PROPERTY_.getAdditional(ch, LB_VWORD)& LB_MASK)>>LB_SHIFT;
|
||||
case UProperty.NUMERIC_TYPE:
|
||||
type=getNumericType(PROPERTY_.getProperty(ch));
|
||||
if(type>NumericType.NUMERIC) {
|
||||
@ -5196,7 +5465,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
case UProperty.GENERAL_CATEGORY:
|
||||
return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
|
||||
case UProperty.LINE_BREAK:
|
||||
return (PROPERTY_.getMaxValues(0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_;
|
||||
return (PROPERTY_.getMaxValues(LB_VWORD) & LB_MASK) >> LB_SHIFT;
|
||||
case UProperty.NUMERIC_TYPE:
|
||||
return NumericType.COUNT - 1;
|
||||
case UProperty.SCRIPT:
|
||||
@ -6148,17 +6417,21 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
/*
|
||||
* Properties in vector word 2
|
||||
* Bits
|
||||
* 31..24 More binary properties (see UCharacterProperty)
|
||||
* 23..19 reserved
|
||||
* 18..14 Sentence Break
|
||||
* 13..10 Word Break
|
||||
* 31..26 reserved
|
||||
* 25..20 Line Break
|
||||
* 19..15 Sentence Break
|
||||
* 14..10 Word Break
|
||||
* 9.. 5 Grapheme Cluster Break
|
||||
* 4.. 0 Decomposition Type
|
||||
*/
|
||||
private static final int SB_MASK = 0x0007c000;
|
||||
private static final int SB_SHIFT = 14;
|
||||
private static final int LB_MASK = 0x03f00000;
|
||||
private static final int LB_SHIFT = 20;
|
||||
private static final int LB_VWORD = 2;
|
||||
|
||||
private static final int WB_MASK = 0x00003c00;
|
||||
private static final int SB_MASK = 0x000f8000;
|
||||
private static final int SB_SHIFT = 15;
|
||||
|
||||
private static final int WB_MASK = 0x00007c00;
|
||||
private static final int WB_SHIFT = 10;
|
||||
|
||||
private static final int GCB_MASK = 0x000003e0;
|
||||
@ -6173,48 +6446,38 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
/*
|
||||
* Properties in vector word 0
|
||||
* Bits
|
||||
* 31..24 DerivedAge version major/minor one nibble each (see UCharacterProperty)
|
||||
* 23..18 Line Break
|
||||
* 17..15 East Asian Width
|
||||
* 14.. 7 UBlockCode
|
||||
* 6.. 0 UScriptCode
|
||||
* 31..24 DerivedAge version major/minor one nibble each
|
||||
* 23..20 reserved
|
||||
* 19..17 East Asian Width
|
||||
* 16.. 8 UBlockCode
|
||||
* 7.. 0 UScriptCode
|
||||
*/
|
||||
|
||||
/**
|
||||
* Integer properties mask and shift values for East Asian cell width.
|
||||
* Equivalent to icu4c UPROPS_EA_MASK
|
||||
*/
|
||||
private static final int EAST_ASIAN_MASK_ = 0x00038000;
|
||||
private static final int EAST_ASIAN_MASK_ = 0x000e0000;
|
||||
/**
|
||||
* Integer properties mask and shift values for East Asian cell width.
|
||||
* Equivalent to icu4c UPROPS_EA_SHIFT
|
||||
*/
|
||||
private static final int EAST_ASIAN_SHIFT_ = 15;
|
||||
/**
|
||||
* Integer properties mask and shift values for line breaks.
|
||||
* Equivalent to icu4c UPROPS_LB_MASK
|
||||
*/
|
||||
private static final int LINE_BREAK_MASK_ = 0x00FC0000;
|
||||
/**
|
||||
* Integer properties mask and shift values for line breaks.
|
||||
* Equivalent to icu4c UPROPS_LB_SHIFT
|
||||
*/
|
||||
private static final int LINE_BREAK_SHIFT_ = 18;
|
||||
private static final int EAST_ASIAN_SHIFT_ = 17;
|
||||
/**
|
||||
* Integer properties mask and shift values for blocks.
|
||||
* Equivalent to icu4c UPROPS_BLOCK_MASK
|
||||
*/
|
||||
private static final int BLOCK_MASK_ = 0x00007f80;
|
||||
private static final int BLOCK_MASK_ = 0x0001ff00;
|
||||
/**
|
||||
* Integer properties mask and shift values for blocks.
|
||||
* Equivalent to icu4c UPROPS_BLOCK_SHIFT
|
||||
*/
|
||||
private static final int BLOCK_SHIFT_ = 7;
|
||||
private static final int BLOCK_SHIFT_ = 8;
|
||||
/**
|
||||
* Integer properties mask and shift values for scripts.
|
||||
* Equivalent to icu4c UPROPS_SHIFT_MASK
|
||||
*/
|
||||
private static final int SCRIPT_MASK_ = 0x0000007f;
|
||||
private static final int SCRIPT_MASK_ = 0x000000ff;
|
||||
|
||||
// private constructor -----------------------------------------------
|
||||
///CLOVER:OFF
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2007 International Business Machines Corporation and *
|
||||
* Copyright (C) 2001-2008 International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -689,11 +689,108 @@ public final class UScript {
|
||||
*/
|
||||
public static final int MEITEI_MAYEK = 115;/* Mtei */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int IMPERIAL_ARAMAIC = 116;/* Armi */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int AVESTAN = 117;/* Avst */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int CHAKMA = 118;/* Cakm */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int KOREAN = 119;/* Kore */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int KAITHI = 120;/* Kthi */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int MANICHAEAN = 121;/* Mani */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int PSALTER_PAHLAVI = 123;/* Phlp */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int BOOK_PAHLAVI = 124;/* Phlv */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int SAMARITAN = 126;/* Samr */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int TAI_VIET = 127;/* Tavt */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */
|
||||
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @draft ICU 4.0
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int SYMBOLS = 129;/* Zsym */
|
||||
/**
|
||||
* Limit
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int CODE_LIMIT = 116;
|
||||
public static final int CODE_LIMIT = 130;
|
||||
|
||||
private static final int SCRIPT_MASK = 0x0000007f;
|
||||
private static final UCharacterProperty prop= UCharacterProperty.getInstance();
|
||||
@ -851,4 +948,3 @@ public final class UScript {
|
||||
private UScript(){}
|
||||
///CLOVER:ON
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -485,6 +485,7 @@ final class CollationParsedRuleBuilder
|
||||
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
|
||||
m_utilElement_.m_prefix_ = 0;
|
||||
m_utilElement_.m_CELength_ = 0;
|
||||
m_utilElement_.m_prefixChars_ = null;
|
||||
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
|
||||
while (CE != CollationElementIterator.NULLORDER) {
|
||||
CE = m_utilColEIter_.next();
|
||||
@ -610,7 +611,8 @@ final class CollationParsedRuleBuilder
|
||||
int offset = 0;
|
||||
while (conts[offset] != 0) {
|
||||
// tailoredCE = ucmpe32_get(t.m_mapping, *conts);
|
||||
int tailoredCE = t.m_mapping_.getValue(conts[offset]);
|
||||
int tailoredCE = t.m_mapping_.getValue(conts[offset]);
|
||||
Elements prefixElm = null;
|
||||
if (tailoredCE != CE_NOT_FOUND_) {
|
||||
boolean needToAdd = true;
|
||||
if (isContractionTableElement(tailoredCE)) {
|
||||
@ -618,6 +620,22 @@ final class CollationParsedRuleBuilder
|
||||
conts, offset + 1) == true) {
|
||||
needToAdd = false;
|
||||
}
|
||||
}
|
||||
if (!needToAdd && isPrefix(tailoredCE) && conts[offset+1]==0) {
|
||||
// pre-context character in UCA
|
||||
// The format for pre-context character is
|
||||
// conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
|
||||
Elements elm = new Elements();
|
||||
elm.m_cPoints_=m_utilElement_.m_uchars_;
|
||||
elm.m_CELength_=0;
|
||||
elm.m_uchars_= UCharacter.toString(conts[offset]);
|
||||
elm.m_prefixChars_=UCharacter.toString(conts[offset+2]);
|
||||
elm.m_prefix_=0; // TODO(claireho) : confirm!
|
||||
prefixElm = (Elements)t.m_prefixLookup_.get(elm);
|
||||
if ((prefixElm== null) ||
|
||||
(prefixElm.m_prefixChars_.charAt(0)!= conts[offset+2])) {
|
||||
needToAdd = true;
|
||||
}
|
||||
}
|
||||
if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) {
|
||||
needToAdd = false;
|
||||
@ -626,23 +644,53 @@ final class CollationParsedRuleBuilder
|
||||
|
||||
if (needToAdd == true) {
|
||||
// we need to add if this contraction is not tailored.
|
||||
m_utilElement_.m_prefix_ = 0;
|
||||
m_utilElement_.m_prefixChars_ = null;
|
||||
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
|
||||
str.delete(0, str.length());
|
||||
str.append(conts[offset]);
|
||||
str.append(conts[offset + 1]);
|
||||
if (conts[offset + 2] != 0) {
|
||||
str.append(conts[offset + 2]);
|
||||
}
|
||||
m_utilElement_.m_uchars_ = str.toString();
|
||||
m_utilElement_.m_CELength_ = 0;
|
||||
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
|
||||
if (conts[offset+1]!=0) { // not precontext
|
||||
m_utilElement_.m_prefix_ = 0;
|
||||
m_utilElement_.m_prefixChars_ = null;
|
||||
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
|
||||
str.delete(0, str.length());
|
||||
str.append(conts[offset]);
|
||||
str.append(conts[offset + 1]);
|
||||
if (conts[offset + 2] != 0) {
|
||||
str.append(conts[offset + 2]);
|
||||
}
|
||||
m_utilElement_.m_uchars_ = str.toString();
|
||||
m_utilElement_.m_CELength_ = 0;
|
||||
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
|
||||
}
|
||||
else { // add a pre-context element
|
||||
int preKeyLen=0;
|
||||
str.delete(0, str.length()); // clean up
|
||||
m_utilElement_.m_cPoints_ = UCharacter.toString(conts[offset]);
|
||||
m_utilElement_.m_CELength_ = 0;
|
||||
m_utilElement_.m_uchars_ = UCharacter.toString(conts[offset]);
|
||||
m_utilElement_.m_prefixChars_ = UCharacter.toString(conts[offset+2]);
|
||||
if (prefixElm==null) {
|
||||
m_utilElement_.m_prefix_=0;
|
||||
}
|
||||
else { // TODO (claireho): confirm!
|
||||
m_utilElement_.m_prefix_= m_utilElement_.m_prefix_;
|
||||
// m_utilElement_.m_prefix_= prefixElm.m_prefix_;
|
||||
}
|
||||
m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
|
||||
while (m_utilColEIter_.next()!=CollationElementIterator.NULLORDER) {
|
||||
// count number of keys for pre-context char.
|
||||
preKeyLen++;
|
||||
}
|
||||
str.append(conts[offset+2]);
|
||||
str.append(conts[offset]);
|
||||
m_utilColEIter_.setText(str.toString());
|
||||
// Skip the keys for prefix character, then copy the rest to el.
|
||||
while ((preKeyLen-->0) &&
|
||||
m_utilColEIter_.next()!= CollationElementIterator.NULLORDER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
while (true) {
|
||||
int CE = m_utilColEIter_.next();
|
||||
if (CE != CollationElementIterator.NULLORDER) {
|
||||
m_utilElement_.m_CEs_[m_utilElement_.m_CELength_
|
||||
++] = CE;
|
||||
m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
@ -1584,14 +1632,18 @@ final class CollationParsedRuleBuilder
|
||||
s --;
|
||||
if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
|
||||
if (strength == Collator.SECONDARY) {
|
||||
low = RuleBasedCollator.COMMON_TOP_2_ << 24;
|
||||
high = 0xFFFFFFFF;
|
||||
if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
|
||||
// Override if low range is less than UCOL_COMMON_TOP2.
|
||||
low = RuleBasedCollator.COMMON_TOP_2_ << 24;
|
||||
}
|
||||
high = 0xFFFFFFFF;
|
||||
}
|
||||
else {
|
||||
// low = 0x02000000;
|
||||
// This needs to be checked - what if low is
|
||||
// not good...
|
||||
high = 0x40000000;
|
||||
else {
|
||||
if ( low < RuleBasedCollator.COMMON_BOTTOM_3<<24 ) {
|
||||
// Override if low range is less than UCOL_COMMON_BOT3.
|
||||
low = RuleBasedCollator.COMMON_BOTTOM_3 <<24;
|
||||
}
|
||||
high = 0x40000000;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -2864,6 +2916,19 @@ final class CollationParsedRuleBuilder
|
||||
t.m_mapping_.setValue(element.m_cPoints_.charAt(
|
||||
element.m_cPointsOffset_),
|
||||
element.m_mapCE_);
|
||||
if (element.m_prefixChars_ != null &&
|
||||
element.m_prefixChars_.length()>0 &&
|
||||
getCETag(CE) != CE_IMPLICIT_TAG_) {
|
||||
// Add CE for standalone precontext char.
|
||||
Elements origElem = new Elements();
|
||||
origElem.m_prefixChars_ = null;
|
||||
origElem.m_uchars_ = element.m_cPoints_;
|
||||
origElem.m_cPoints_ = origElem.m_uchars_;
|
||||
origElem.m_CEs_[0] = CE;
|
||||
origElem.m_mapCE_ = CE;
|
||||
origElem.m_CELength_ = 1;
|
||||
finalizeAddition(t, origElem);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -1,7 +1,7 @@
|
||||
//##header J2SE15
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -1531,6 +1531,7 @@ public final class RuleBasedCollator extends Collator
|
||||
static final byte BYTE_COMMON_ = (byte)0x05;
|
||||
static final int COMMON_TOP_2_ = 0x86; // int for unsigness
|
||||
static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
|
||||
static final int COMMON_BOTTOM_3 = 0x05;
|
||||
/**
|
||||
* Case strength mask
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user