ICU-5696 Unicode 5.1 Update

X-SVN-Rev: 23763
This commit is contained in:
Andy Heninger 2008-04-04 23:55:41 +00:00
parent 71bf003171
commit 39ff2eff25
23 changed files with 30498 additions and 8754 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:af6554a0d9bdc0c4f73faaff17ba6e1becdba35b86f1e6ac5efa8415ab562d69
size 758184
oid sha256:354535a77f8a69d732d81bfa18a5b1d8ac3e034cf51289ec97b934c054404404
size 757711

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2002-2006, International Business Machines Corporation and *
* Copyright (C) 2002-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -97,13 +97,23 @@ public class CollationAPITest extends TestFmwk {
col.setStrength(Collator.IDENTICAL);
byte key2compat[] = {
// 3.9 key, UCA 5.1
(byte) 0x2c, (byte) 0x2e, (byte) 0x30,
(byte) 0x32, (byte) 0x2c, (byte) 0x01,
(byte) 0x09, (byte) 0x01, (byte) 0x09,
(byte) 0x01, (byte) 0x2b, (byte) 0x01,
(byte) 0x92, (byte) 0x93, (byte) 0x94,
(byte) 0x95, (byte) 0x92, (byte) 0x00
// 3.6 key, UCA 5.0
/*
(byte) 0x29, (byte) 0x2b, (byte) 0x2d,
(byte) 0x2f, (byte) 0x29, (byte) 0x01,
(byte) 0x09, (byte) 0x01, (byte) 0x09,
(byte) 0x01, (byte) 0x28, (byte) 0x01,
(byte) 0x92, (byte) 0x93, (byte) 0x94,
(byte) 0x95, (byte) 0x92, (byte) 0x00
*/
// 3.4 key UCA 4.1
/*
@ -501,7 +511,7 @@ public class CollationAPITest extends TestFmwk {
doAssert(col.getVersion().equals(expectedVersion), "Expected version "+expectedVersion.toString()+" got "+col.getVersion().toString());
logln("Test getUCAVersion");
VersionInfo expectedUCAVersion = VersionInfo.getInstance(0x05, 0, 0, 0);
VersionInfo expectedUCAVersion = VersionInfo.getInstance(0x05, 1, 0, 0);
doAssert(col.getUCAVersion().equals(expectedUCAVersion), "Expected UCA version "+expectedUCAVersion.toString()+" got "+col.getUCAVersion().toString());
doAssert((col.compare("ab", "abc") < 0), "ab < abc comparison failed");

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2002-2007, International Business Machines Corporation and *
* Copyright (C) 2002-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -285,7 +285,13 @@ public class CollationMiscTest extends TestFmwk {
String target = "[";
for (i = 0; i < bytes.length; i++) {
target += Integer.toHexString(bytes[i]);
String numStr = Integer.toHexString(bytes[i]);
if (numStr.length()>2) {
target += numStr.substring(numStr.length()-2);
}
else {
target += numStr;
}
target += " ";
}
target += "]";
@ -2281,4 +2287,204 @@ public class CollationMiscTest extends TestFmwk {
}
}
public void Test6179()
{
String rules[] = {
"&[last primary ignorable]<< a &[first primary ignorable]<<b ",
"&[last secondary ignorable]<<< a &[first secondary ignorable]<<<b",
};
// defined in UCA5.1
String firstPrimIgn = "\u0332";
String lastPrimIgn = "\uD800\uDDFD";
String firstVariable = "\u0009";
byte[] secIgnKey = {1,1,4,0};
int i=0;
{
RuleBasedCollator coll = null;
try {
coll = new RuleBasedCollator(rules[i]);
} catch (Exception e) {
warnln("Unable to open collator with rules " + rules[i]);
}
logln("Test rule["+i+"]"+rules[i]);
CollationKey keyA = coll.getCollationKey("a");
logln("Key for \"a\":"+ prettify(keyA));
if (keyA.compareTo(coll.getCollationKey(lastPrimIgn))<=0) {
CollationKey key = coll.getCollationKey(lastPrimIgn);
logln("Collation key for 0xD800 0xDDFD: "+prettify(key));
errln("Error! String \"a\" must be greater than \uD800\uDDFD -"+
"[Last Primary Ignorable]");
}
if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
CollationKey key = coll.getCollationKey(firstVariable);
logln("Collation key for 0x0009: "+prettify(key));
errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
}
CollationKey keyB = coll.getCollationKey("b");
logln("Key for \"b\":"+ prettify(keyB));
if (keyB.compareTo(coll.getCollationKey(firstPrimIgn))<=0) {
CollationKey key = coll.getCollationKey(firstPrimIgn);
logln("Collation key for 0x0332: "+prettify(key));
errln("Error! String \"b\" must be greater than 0x0332 -"+
"[First Primary Ignorable]");
}
if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
CollationKey key = coll.getCollationKey(firstVariable);
logln("Collation key for 0x0009: "+prettify(key));
errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
}
}
{
i=1;
RuleBasedCollator coll = null;
try {
coll = new RuleBasedCollator(rules[i]);
} catch (Exception e) {
warnln("Unable to open collator with rules " + rules[i]);
}
logln("Test rule["+i+"]"+rules[i]);
CollationKey keyA = coll.getCollationKey("a");
logln("Key for \"a\":"+ prettify(keyA));
byte[] keyAInBytes = keyA.toByteArray();
for (int j=0; j<keyAInBytes.length && j<secIgnKey.length; j++) {
if (keyAInBytes[j]!=secIgnKey[j]) {
if ((char)keyAInBytes[j]<=(char)secIgnKey[j]) {
logln("Error! String \"a\" must be greater than [Last Secondary Ignorable]");
}
break;
}
}
if (keyA.compareTo(coll.getCollationKey(firstVariable))>=0) {
errln("Error! String \"a\" must be less than 0x0009 - [First Variable]");
CollationKey key = coll.getCollationKey(firstVariable);
logln("Collation key for 0x0009: "+prettify(key));
}
CollationKey keyB = coll.getCollationKey("b");
logln("Key for \"b\":"+ prettify(keyB));
byte[] keyBInBytes = keyB.toByteArray();
for (int j=0; j<keyBInBytes.length && j<secIgnKey.length; j++) {
if (keyBInBytes[j]!=secIgnKey[j]) {
if ((char)keyBInBytes[j]<=(char)secIgnKey[j]) {
errln("Error! String \"b\" must be greater than [Last Secondary Ignorable]");
}
break;
}
}
if (keyB.compareTo(coll.getCollationKey(firstVariable))>=0) {
CollationKey key = coll.getCollationKey(firstVariable);
logln("Collation key for 0x0009: "+prettify(key));
errln("Error! String \"b\" must be less than 0x0009 - [First Variable]");
}
}
}
public void TestUCAPrecontext()
{
String rules[] = {
"& \u00B7<a ",
"& L\u00B7 << a", // 'a' is an expansion.
};
String cases[] = {
"\u00B7",
"\u0387",
"a",
"l",
"L\u0332",
"l\u00B7",
"l\u0387",
"L\u0387",
"la\u0387",
"La\u00b7",
};
// Test en sort
RuleBasedCollator en = null;
logln("EN sort:");
try {
en = (RuleBasedCollator)Collator.getInstance(
new Locale("en", ""));
for (int j=0; j<cases.length; j++) {
CollationKey key = en.getCollationKey(cases[j]);
if (j>0) {
CollationKey prevKey = en.getCollationKey(cases[j-1]);
if (key.compareTo(prevKey)<0) {
errln("Error! EN test["+j+"]:"+"source:" + cases[j]+
"is not greater than previous test.");
}
}
/*
if ( key.compareTo(expectingKey)!=0) {
errln("Error! Test case["+i+"]:"+"source:" + key.getSourceString());
errln("expecting:"+prettify(expectingKey)+ "got:"+ prettify(key));
}
*/
logln("String:"+cases[j]+" Key:"+ prettify(key));
}
} catch (Exception e) {
warnln("Error creating Vietnese collator");
return;
}
// Test ja sort
RuleBasedCollator ja = null;
logln("JA sort:");
try {
ja = (RuleBasedCollator)Collator.getInstance(
new Locale("ja", ""));
for (int j=0; j<cases.length; j++) {
CollationKey key = ja.getCollationKey(cases[j]);
if (j>0) {
CollationKey prevKey = ja.getCollationKey(cases[j-1]);
if (key.compareTo(prevKey)<0) {
errln("Error! JA test["+j+"]:"+"source:" + cases[j]+
"is not greater than previous test.");
}
}
logln("String:"+cases[j]+" Key:"+ prettify(key));
}
} catch (Exception e) {
warnln("Error creating Vietnese collator");
return;
}
for(int i = 0; i < rules.length; i++) {
RuleBasedCollator coll = null;
logln("Tailoring rule:"+rules[i]);
try {
coll = new RuleBasedCollator(rules[i]);
} catch (Exception e) {
warnln("Unable to open collator with rules " + rules[i]);
}
for (int j=0; j<cases.length; j++) {
CollationKey key = coll.getCollationKey(cases[j]);
if (j>0) {
CollationKey prevKey = coll.getCollationKey(cases[j-1]);
if (i==1 && j==3) {
if (key.compareTo(prevKey)>0) {
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
cases[j]+"is not greater than previous test.");
}
}
else {
if (key.compareTo(prevKey)<0) {
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
cases[j]+"is not greater than previous test.");
}
}
}
logln("String:"+cases[j]+" Key:"+ prettify(key));
}
}
}
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -361,12 +361,16 @@ public class TestUScript extends TestFmwk {
*/
String[] expectedLong = new String[]{
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
"Lepc", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician",
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Cuneiform",
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kayah_Li", "Latf", "Latg",
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician",
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
"Zxxx", "Unknown",
"Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
"Carian", "Jpan", "Lana", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
"Moon", "Mtei",
// ICU 4.0
"Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
"Samr", "Tavt", "Zmth", "Zsym",
};
String[] expectedShort = new String[]{
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
@ -376,6 +380,10 @@ public class TestUScript extends TestFmwk {
"Zxxx", "Zzzz",
"Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
"Moon", "Mtei",
// ICU 4.0
"Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
"Samr", "Tavt", "Zmth", "Zsym",
};
int j = 0;
int i = 0;

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -15,7 +15,9 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.impl.UCaseProps;
import com.ibm.icu.impl.Utility;
import java.util.Locale;
import java.io.BufferedReader;
@ -289,6 +291,22 @@ public final class UCharacterCaseTest extends TestFmwk
}
}
public void TestTitleRegression() throws java.io.IOException {
UCaseProps props = new UCaseProps();
int type = props.getTypeOrIgnorable('\'');
assertEquals("Case Ignorable check", -1, type); // should be case-ignorable (-1)
UnicodeSet allCaseIgnorables = new UnicodeSet();
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
if (props.getTypeOrIgnorable(cp) < 0) {
allCaseIgnorables.add(cp);
}
}
logln(allCaseIgnorables.toString());
assertEquals("Titlecase check",
"The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
}
public void TestTitle()
{
try{
@ -912,5 +930,3 @@ public final class UCharacterCaseTest extends TestFmwk
return result;
}
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 2004-2005, International Business Machines Corporation and *
* Copyright (C) 2004-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -22,11 +22,15 @@ public final class UCharacterSurrogateTest extends TestFmwk {
}
public void TestUnicodeBlockForName() {
String[] names = {"Optical Character Recognition",
"CJK Unified Ideographs Extension A", "Supplemental Arrows-B",
"Supplementary Private Use Area-B",
"supplementary_Private_Use_Area-b",
"supplementary_PRIVATE_Use_Area_b"};
String[] names = {"Latin-1 Supplement",
"Optical Character Recognition",
"CJK Unified Ideographs Extension A",
"Supplemental Arrows-B",
"Supplemental arrows b",
"supp-lement-al arrowsb",
"Supplementary Private Use Area-B",
"supplementary_Private_Use_Area-b",
"supplementary_PRIVATE_Use_Area_b"};
for (int i = 0; i < names.length; ++i) {
try {
UCharacter.UnicodeBlock b = UCharacter.UnicodeBlock
@ -416,4 +420,3 @@ public final class UCharacterSurrogateTest extends TestFmwk {
test.test(s, 2, 1, 2, 1, 3, true);
}
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -44,7 +44,7 @@ public final class UCharacterTest extends TestFmwk
/**
* ICU4J data version number
*/
private final VersionInfo VERSION_ = VersionInfo.getInstance("5.0.0.0");
private final VersionInfo VERSION_ = VersionInfo.getInstance("5.1.0.0");
// constructor ===================================================
@ -1616,14 +1616,16 @@ public final class UCharacterTest extends TestFmwk
{ 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },
{ 0x0341, UProperty.DEPRECATED, 1 },
{ 0xe0041, UProperty.DEPRECATED, 0 },
{ 0xe0041, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */
{ 0x00a0, UProperty.GRAPHEME_BASE, 1 },
{ 0x0a4d, UProperty.GRAPHEME_BASE, 0 },
{ 0xff9f, UProperty.GRAPHEME_BASE, 1 }, /* changed from Unicode 3.2 to 4 */
{ 0xff9d, UProperty.GRAPHEME_BASE, 1 },
{ 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
{ 0x0300, UProperty.GRAPHEME_EXTEND, 1 },
{ 0xff9f, UProperty.GRAPHEME_EXTEND, 0 }, /* changed from Unicode 3.2 to 4 */
{ 0xff9d, UProperty.GRAPHEME_EXTEND, 0 },
{ 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
{ 0x0603, UProperty.GRAPHEME_EXTEND, 0 },
{ 0x0a4d, UProperty.GRAPHEME_LINK, 1 },
@ -1671,7 +1673,7 @@ public final class UCharacterTest extends TestFmwk
{ 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
{ 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
{ 0x0606, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
{ 0x0605, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
{ 0x061c, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
{ 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
{ 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -2292,63 +2292,64 @@ public class BasicTest extends TestFmwk {
}
skipSets[D].applyPattern(
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
+ "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
+ "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
+ "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
+ "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
+ "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
+ "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
+ "-\\u045E\\u0476\\u0477\\u0483-\\u0486\\u04C1\\u04C2\\u04D0-"
+ "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
+ "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
+ "\\u05C5\\u05C7\\u0610-\\u0615\\u0622-\\u0626\\u064B-\\u065E"
+ "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
+ "\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
+ "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
+ "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
+ "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
+ "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
+ "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
+ "\\u0FC6\\u1026\\u1037\\u1039\\u135F\\u1714\\u1734\\u17D2\\u17DD"
+ "\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A\\u1B0C"
+ "\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44"
+ "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E99\\u1E9B\\u1EA0-"
+ "\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-"
+ "\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D"
+ "\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-"
+ "\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-"
+ "\\u1FFD\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF"
+ "\\u2126\\u212A\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
+ "\\u2209\\u220C\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260"
+ "\\u2262\\u226D-\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281"
+ "\\u2284\\u2285\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3"
+ "\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC\\u302A-\\u302F\\u304C"
+ "\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E"
+ "\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074"
+ "\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A"
+ "\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA"
+ "\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1"
+ "\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4"
+ "\\u30F7-\\u30FA\\u30FE\\uA806\\uAC00-\\uD7A3\\uF900-\\uFA0D"
+ "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
+ "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
+ "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
+ "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
+ "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
+ "F800-\\U0002FA1D]", false);
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
+ "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
+ "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
+ "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
+ "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
+ "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
+ "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
+ "-\\u045E\\u0476\\u0477\\u0483-\\u0487\\u04C1\\u04C2\\u04D0-"
+ "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
+ "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
+ "\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E"
+ "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
+ "\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
+ "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
+ "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
+ "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
+ "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
+ "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734"
+ "\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08"
+ "\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41"
+ "\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
+ "\\u1DFE-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-"
+ "\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59"
+ "\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC"
+ "\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
+ "\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000\\u2001\\u20D0-"
+ "\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u219A\\u219B"
+ "\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u2241"
+ "\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
+ "\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
+ "\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC"
+ "\\u2DE0-\\u2DFF\\u302A-\\u302F\\u304C\\u304E\\u3050\\u3052"
+ "\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065"
+ "\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079"
+ "\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A\\u309E\\u30AC\\u30AE"
+ "\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0"
+ "\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6"
+ "\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE"
+ "\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00"
+ "-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
+ "\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E"
+ "\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20-\\uFE26"
+ "\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010"
+ "A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001"
+ "D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-\\U0002FA1D]", false);
skipSets[C].applyPattern(
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
+ "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
+ "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124"
+ "\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143"
@ -2362,9 +2363,9 @@ public class BasicTest extends TestFmwk {
+ "\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406"
+ "\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
+ "\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
+ "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486\\u04D8\\u04D9"
+ "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487\\u04D8\\u04D9"
+ "\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
+ "\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648\\u064A-"
+ "\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-"
+ "\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
+ "\\u07F3\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
@ -2378,303 +2379,309 @@ public class BasicTest extends TestFmwk {
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
+ "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u1100-\\u1112\\u1161-"
+ "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
+ "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
+ "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
+ "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-\\u1E0F"
+ "\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
+ "\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99"
+ "\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
+ "\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
+ "\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
+ "\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
+ "\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC"
+ "\\u1FBE\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB"
+ "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE"
+ "\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2126"
+ "\\u212A\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203"
+ "\\u2208\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D"
+ "\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D"
+ "\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9"
+ "\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u302A-\\u302F"
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
+ "\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD"
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
+ "\\u30EF-\\u30F2\\u30FD\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70"
+ "\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C"
+ "\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68"
+ "\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64"
+ "\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060"
+ "\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C"
+ "\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258"
+ "\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354"
+ "\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450"
+ "\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C"
+ "\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648"
+ "\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744"
+ "\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840"
+ "\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C"
+ "\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38"
+ "\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34"
+ "\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30"
+ "\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C"
+ "\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28"
+ "\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24"
+ "\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020"
+ "\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C"
+ "\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218"
+ "\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314"
+ "\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410"
+ "\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C"
+ "\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608"
+ "\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704"
+ "\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800"
+ "\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC"
+ "\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8"
+ "\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4"
+ "\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0"
+ "\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC"
+ "\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8"
+ "\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4"
+ "\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0"
+ "\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC"
+ "\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8"
+ "\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4"
+ "\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0"
+ "\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC"
+ "\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8"
+ "\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4"
+ "\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D"
+ "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
+ "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
+ "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
+ "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
+ "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
+ "F800-\\U0002FA1D]", false);
+ "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112"
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
+ "\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
+ "\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
+ "\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27"
+ "\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87"
+ "\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9"
+ "\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39"
+ "\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71"
+ "\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81"
+ "\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8"
+ "\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE\\u1FBF\\u1FC3\\u1FC6"
+ "\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3\\u1FEB\\u1FEE\\u1FEF"
+ "\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000\\u2001\\u20D0-"
+ "\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u2190\\u2192"
+ "\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225"
+ "\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272"
+ "\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287"
+ "\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329"
+ "\\u232A\\u2ADC\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3046\\u304B"
+ "\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
+ "\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
+ "\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
+ "\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
+ "\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
+ "\\u30FD\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953"
+ "\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0"
+ "\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC"
+ "\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8"
+ "\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4"
+ "\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0"
+ "\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC"
+ "\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8"
+ "\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4"
+ "\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0"
+ "\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC"
+ "\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8"
+ "\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4"
+ "\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0"
+ "\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC"
+ "\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8"
+ "\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4"
+ "\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0"
+ "\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C"
+ "\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98"
+ "\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94"
+ "\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090"
+ "\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C"
+ "\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288"
+ "\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384"
+ "\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480"
+ "\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C"
+ "\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678"
+ "\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774"
+ "\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870"
+ "\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C"
+ "\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68"
+ "\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64"
+ "\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60"
+ "\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C"
+ "\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58"
+ "\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54"
+ "\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050"
+ "\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C"
+ "\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248"
+ "\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344"
+ "\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440"
+ "\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C"
+ "\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638"
+ "\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734"
+ "\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-"
+ "\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-"
+ "\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-"
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20"
+ "-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A"
+ "\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001"
+ "D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD"
+ "\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-"
+ "\\U0002FA1D]", false);
skipSets[KD].applyPattern(
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
+ "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
+ "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
+ "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
+ "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
+ "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
+ "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
+ "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
+ "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
+ "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
+ "-\\u0486\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
+ "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
+ "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
+ "-\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
+ "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
+ "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
+ "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
+ "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
+ "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
+ "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
+ "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
+ "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
+ "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
+ "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
+ "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
+ "\\u0FC6\\u1026\\u1037\\u1039\\u10FC\\u135F\\u1714\\u1734\\u17D2"
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A"
+ "\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43"
+ "\\u1B44\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-"
+ "\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E9B"
+ "\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45"
+ "\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-"
+ "\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
+ "\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-"
+ "\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
+ "\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
+ "\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
+ "\\u20E5-\\u20EF\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
+ "\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
+ "\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
+ "\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u219A\\u219B\\u21AE"
+ "\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
+ "\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
+ "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
+ "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
+ "\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
+ "\\u2D6F\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
+ "\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
+ "\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
+ "\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
+ "\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
+ "\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
+ "\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
+ "\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
+ "\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
+ "\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA806\\uAC00-"
+ "\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
+ "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
+ "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
+ "\\uFE19\\uFE20-\\uFE23\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
+ "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
+ "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
+ "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010"
+ "A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169"
+ "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
+ "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
+ "D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
+ "\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
+ "A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
+ "\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
+ "D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
+ "D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
+ "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
+ "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
+ "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
+ "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
+ "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
+ "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
+ "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
+ "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
+ "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
+ "-\\u0487\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
+ "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
+ "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
+ "-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
+ "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
+ "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
+ "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
+ "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
+ "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
+ "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
+ "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
+ "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
+ "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
+ "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
+ "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714"
+ "\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06"
+ "\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40"
+ "\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-"
+ "\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78"
+ "\\u1D9B-\\u1DE6\\u1DFE-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15"
+ "\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57"
+ "\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-"
+ "\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-"
+ "\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-"
+ "\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-"
+ "\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-"
+ "\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-"
+ "\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-"
+ "\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D"
+ "\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149"
+ "\\u2153-\\u217F\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
+ "\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D\\u222F\\u2230\\u2241"
+ "\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
+ "\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
+ "\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2460-"
+ "\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0"
+ "-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
+ "\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
+ "\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
+ "\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
+ "\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
+ "\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
+ "\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
+ "\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
+ "\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
+ "\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C"
+ "\\uA67D\\uA770\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00-"
+ "\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
+ "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
+ "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
+ "\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
+ "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
+ "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
+ "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
+ "A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-"
+ "\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001"
+ "D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0"
+ "\\U0001D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-"
+ "\\U0001D49C\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4"
+ "A6\\U0001D4A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB"
+ "\\U0001D4BD-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-"
+ "\\U0001D50A\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001"
+ "D51E-\\U0001D539\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544"
+ "\\U0001D546\\U0001D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001"
+ "D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
skipSets[KC].applyPattern(
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
+ "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
+ "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
+ "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
+ "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
+ "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
+ "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
+ "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
+ "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
+ "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
+ "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
+ "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
+ "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
+ "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
+ "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486"
+ "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
+ "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627"
+ "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
+ "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
+ "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
+ "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
+ "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
+ "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
+ "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
+ "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
+ "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
+ "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
+ "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
+ "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
+ "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
+ "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
+ "\\u1025\\u102E\\u1037\\u1039\\u10FC\\u1100-\\u1112\\u1161-"
+ "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
+ "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
+ "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
+ "\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D"
+ "\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-"
+ "\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-"
+ "\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-"
+ "\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18"
+ "\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49"
+ "\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77"
+ "\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91"
+ "\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-"
+ "\\u1FC1\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD"
+ "-\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9"
+ "\\u1FFB-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026"
+ "\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049"
+ "\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094"
+ "\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2100-\\u2103"
+ "\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D"
+ "\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-"
+ "\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2153-"
+ "\\u217F\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
+ "\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
+ "\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
+ "\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
+ "\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
+ "-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2D6F\\u2E9F\\u2EF3"
+ "\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
+ "\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
+ "\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
+ "\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
+ "\\u3300-\\u33FF\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
+ "\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
+ "\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
+ "\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
+ "\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
+ "\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
+ "\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
+ "\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
+ "\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
+ "\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
+ "\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
+ "\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
+ "\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
+ "\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
+ "\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
+ "\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
+ "\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
+ "\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
+ "\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
+ "\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
+ "\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
+ "\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
+ "\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
+ "\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
+ "\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
+ "\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
+ "\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
+ "\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
+ "\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
+ "\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
+ "\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
+ "\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
+ "\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
+ "\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
+ "\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
+ "\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
+ "\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
+ "\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
+ "\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
+ "\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
+ "\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
+ "\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
+ "\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
+ "\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
+ "\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
+ "\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
+ "\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
+ "\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
+ "\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
+ "\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE23"
+ "\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
+ "\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
+ "\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
+ "\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010A0F\\U00010A38-\\U00010"
+ "A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
+ "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
+ "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
+ "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
+ "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
+ "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
+ "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
+ "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
+ "\\U0002F800-\\U0002FA1D]", false);
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
+ "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
+ "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
+ "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
+ "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
+ "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
+ "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
+ "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
+ "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
+ "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
+ "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
+ "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
+ "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
+ "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
+ "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487"
+ "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
+ "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627"
+ "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
+ "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
+ "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
+ "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
+ "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
+ "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
+ "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
+ "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
+ "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
+ "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
+ "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
+ "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
+ "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
+ "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
+ "\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112"
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
+ "\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
+ "\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-\\u1D2E"
+ "\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-"
+ "\\u1DE6\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-"
+ "\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-"
+ "\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-"
+ "\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38"
+ "\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-"
+ "\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80"
+ "\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1"
+ "\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1\\u1FC3\\u1FC6\\u1FC9"
+ "\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-\\u1FDF\\u1FE3\\u1FEB"
+ "\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000-"
+ "\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
+ "\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
+ "\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
+ "\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
+ "\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
+ "\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
+ "\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u2190\\u2192\\u2194"
+ "\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225\\u222C"
+ "\\u222D\\u222F\\u2230\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
+ "\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
+ "\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
+ "\\u22B2-\\u22B5\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-"
+ "\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3"
+ "\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
+ "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
+ "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
+ "\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
+ "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
+ "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
+ "\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
+ "\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
+ "\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D\\uA770\\uA806\\uA8C4\\uA92B"
+ "-\\uA92D\\uA953\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8"
+ "\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4"
+ "\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0"
+ "\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C"
+ "\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098"
+ "\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194"
+ "\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290"
+ "\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C"
+ "\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488"
+ "\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584"
+ "\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680"
+ "\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C"
+ "\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878"
+ "\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974"
+ "\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70"
+ "\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C"
+ "\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68"
+ "\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64"
+ "\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60"
+ "\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C"
+ "\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058"
+ "\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154"
+ "\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250"
+ "\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C"
+ "\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448"
+ "\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544"
+ "\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640"
+ "\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C"
+ "\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838"
+ "\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934"
+ "\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30"
+ "\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C"
+ "\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28"
+ "\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24"
+ "\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20"
+ "\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C"
+ "\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018"
+ "\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114"
+ "\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210"
+ "\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C"
+ "\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408"
+ "\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504"
+ "\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600"
+ "\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC"
+ "\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12"
+ "\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
+ "\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17"
+ "\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43"
+ "\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-"
+ "\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26\\uFE30-"
+ "\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B\\uFE70-"
+ "\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-\\uFFC7"
+ "\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6"
+ "\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-"
+ "\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
+ "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
+ "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
+ "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
+ "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
+ "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
+ "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
+ "\\U0002F800-\\U0002FA1D]", false);
return skipSets;
}

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2001-2006, International Business Machines Corporation and *
* Copyright (C) 2001-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -154,6 +154,8 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
/**
* Testing the methods first(), next(), next(int) and following() of RuleBasedBreakIterator
* TODO: Most of this test should be retired, rule behavior is much better covered by
* TestExtended, which is also easier to understand and maintain.
**/
public void TestFirstNextFollowing() {
int p, q;
@ -187,7 +189,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
errln("ERROR: next()/following() at last position returned #"
+ p + " and " + q + " instead of" + testString.length() + "\n");
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
testString = "Write hindi here. \u092d\u093e\u0930\u0301 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
testString = "Write hindi here. ";
logln("testing char iter - string:- \"" + testString + "\"");
charIter1.setText(testString);
p = charIter1.first();
@ -207,18 +209,6 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
p = q;
q = charIter1.next(6);
doTest(testString, p, q, 17, " here.");
// hindi starts here
p = q;
q = charIter1.next(4);
doTest(testString, p, q, 22, " \u092d\u093e\u0930\u0301"); // Nonsense, but compatible between old and new rules.
p = q;
q = charIter1.next(2);
doTest(testString, p, q, 26, " \u0938\u0941\u0902");
q = charIter1.following(24);
doTest(testString, 24, q, 26, "\u0941\u0902");
q = charIter1.following(20);
doTest(testString, 20, q, 22, "\u0930\u0301");
p = charIter1.following(charIter1.last());
q = charIter1.next(charIter1.last());
if (p != BreakIterator.DONE || q != BreakIterator.DONE)
@ -325,31 +315,6 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
p = wordIter1.preceding(wordIter1.first());
if (p != BreakIterator.DONE)
errln("ERROR: preceding() at starting position returned #" + p + " instead of 0");
testString = "Write hindi here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u0301\u0964";
logln("testing character iteration for string \" " + testString + "\" \n");
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
charIter1.setText(testString);
p = charIter1.last();
if (p != testString.length())
errln("ERROR: first() returned" + p + "instead of" + testString.length());
q = charIter1.previous();
doTest(testString, p, q, 31, "\u0964");
p = q;
q = charIter1.previous();
doTest(testString, p, q, 29, "\u0939\u0301");
q = charIter1.preceding(26);
doTest(testString, 26, q, 23, "\u0938\u0941\u0902");
q = charIter1.preceding(16);
doTest(testString, 16, q, 15, "e");
p = q;
q = charIter1.previous();
doTest(testString, p, q, 14, "r");
charIter1.first();
p = charIter1.previous();
q = charIter1.preceding(charIter1.first());
if (p != BreakIterator.DONE || q != BreakIterator.DONE)
errln("ERROR: previous()/preceding() at starting position returned #"
+ p + " and " + q + " instead of 0\n");
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000.";
logln("testing sentence iter - String:- \"" + testString + "\"");
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(Locale.getDefault());

View File

@ -9,7 +9,7 @@ package com.ibm.icu.dev.test.rbbi;
//Regression testing of RuleBasedBreakIterator
//
// TODO: These tests should be mostly retired.
// Much of the test data that was originaly here was removed when the RBBI rules
// Much of the test data that was originally here was removed when the RBBI rules
// were updated to match the Unicode boundary TRs, and the data was found to be invalid.
// Much of the remaining data has been moved into the rbbitst.txt test data file,
// which is common between ICU4C and ICU4J. The remaining test data should also be moved,

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2003-2007 International Business Machines Corporation and *
* Copyright (C) 2003-2008 International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -59,6 +59,11 @@ public class RBBITestMonkey extends TestFmwk {
// Find the next break postion, starting from the specified position.
// Return -1 after reaching end of string.
abstract int next(int i);
// A Character Property, one of the constants defined in class UProperty.
// The value fo this property will be displayed for the characters
// near any test failure.
int fCharProperty;
}
@ -71,6 +76,13 @@ public class RBBITestMonkey extends TestFmwk {
UnicodeSet fCRLFSet;
UnicodeSet fControlSet;
UnicodeSet fExtendSet;
UnicodeSet fPrependSet;
UnicodeSet fSpacingSet;
UnicodeSet fLSet;
UnicodeSet fVSet;
UnicodeSet fTSet;
UnicodeSet fLVSet;
UnicodeSet fLVTSet;
UnicodeSet fHangulSet;
UnicodeSet fAnySet;
@ -79,18 +91,32 @@ public class RBBITestMonkey extends TestFmwk {
RBBICharMonkey() {
fText = null;
fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
fCRLFSet = new UnicodeSet("[\\r\\n]");
fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]]");
fExtendSet = new UnicodeSet("[\\p{Grapheme_Extend}]");
fHangulSet = new UnicodeSet(
"[\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=T}" +
"\\p{Hangul_Syllable_Type=LV}\\p{Hangul_Syllable_Type=LVT}]");
fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]");
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]");
fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]");
fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]");
fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]");
fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]");
fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]");
fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]");
fHangulSet = new UnicodeSet();
fHangulSet.addAll(fLSet);
fHangulSet.addAll(fVSet);
fHangulSet.addAll(fTSet);
fHangulSet.addAll(fLVSet);
fHangulSet.addAll(fLVTSet);
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]");
fSets = new ArrayList();
fSets.add(fCRLFSet);
fSets.add(fControlSet);
fSets.add(fExtendSet);
fSets.add(fPrependSet);
fSets.add(fSpacingSet);
fSets.add(fHangulSet);
fSets.add(fAnySet);
}
@ -104,9 +130,110 @@ public class RBBITestMonkey extends TestFmwk {
return fSets;
}
int next(int i) {
return nextGC(fText, i);
}
int next(int prevPos) {
int p0, p1, p2, p3; // Indices of the significant code points around the
// break position being tested. The candidate break
// location is before p2.
int breakPos = -1;
int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3.
// Previous break at end of string. return DONE.
if (prevPos >= fText.length()) {
return -1;
}
p0 = p1 = p2 = p3 = prevPos;
c3 = UTF16.charAt(fText, prevPos);
c0 = c1 = c2 = 0;
// Loop runs once per "significant" character position in the input text.
for (;;) {
// Move all of the positions forward in the input string.
p0 = p1; c0 = c1;
p1 = p2; c1 = c2;
p2 = p3; c2 = c3;
// Advancd p3 by one codepoint
p3 = moveIndex32(fText, p3, 1);
c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3);
if (p1 == p2) {
// Still warming up the loop. (won't work with zero length strings, but we don't care)
continue;
}
if (p2 == fText.length()) {
// Reached end of string. Always a break position.
break;
}
// Rule GB3 CR x LF
// No Extend or Format characters may appear between the CR and LF,
// which requires the additional check for p2 immediately following p1.
//
if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
continue;
}
// Rule (GB4). ( Control | CR | LF ) <break>
if (fControlSet.contains(c1) ||
c1 == 0x0D ||
c1 == 0x0A) {
break;
}
// Rule (GB5) <break> ( Control | CR | LF )
//
if (fControlSet.contains(c2) ||
c2 == 0x0D ||
c2 == 0x0A) {
break;
}
// Rule (GB6) L x ( L | V | LV | LVT )
if (fLSet.contains(c1) &&
(fLSet.contains(c2) ||
fVSet.contains(c2) ||
fLVSet.contains(c2) ||
fLVTSet.contains(c2))) {
continue;
}
// Rule (GB7) ( LV | V ) x ( V | T )
if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
(fVSet.contains(c2) || fTSet.contains(c2))) {
continue;
}
// Rule (GB8) ( LVT | T) x T
if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
fTSet.contains(c2)) {
continue;
}
// Rule (GB9) Numeric x ALetter
if (fExtendSet.contains(c2)) {
continue;
}
// Rule (GB9a) x SpacingMark
if (fSpacingSet.contains(c2)) {
continue;
}
// Rule (GB9b) Prepend x
if (fPrependSet.contains(c1)) {
continue;
}
// Rule (GB10) Any <break> Any
break;
}
breakPos = p2;
return breakPos;
}
}
@ -121,8 +248,12 @@ public class RBBITestMonkey extends TestFmwk {
List fSets;
StringBuffer fText;
UnicodeSet fCRSet;
UnicodeSet fLFSet;
UnicodeSet fNewlineSet;
UnicodeSet fKatakanaSet;
UnicodeSet fALetterSet;
UnicodeSet fMidNumLetSet;
UnicodeSet fMidLetterSet;
UnicodeSet fMidNumSet;
UnicodeSet fNumericSet;
@ -133,22 +264,26 @@ public class RBBITestMonkey extends TestFmwk {
RBBIWordMonkey() {
fSets = new ArrayList();
fCharProperty = UProperty.WORD_BREAK;
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}" +
"[\\p{Line_Break = Complex_Context}" +
"-\\p{Grapheme_Cluster_Break = Extend}" +
"-\\p{Grapheme_Cluster_Break = Control}]]");
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}-[\\uff9e\\uff9f]]");
fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]");
fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]");
fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]");
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]");
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]");
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}\\uff9e\\uff9f]");
fOtherSet = new UnicodeSet();
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
fOtherSet = new UnicodeSet();
fOtherSet.complement();
fOtherSet.removeAll(fCRSet);
fOtherSet.removeAll(fLFSet);
fOtherSet.removeAll(fNewlineSet);
fOtherSet.removeAll(fALetterSet);
fOtherSet.removeAll(fKatakanaSet);
fOtherSet.removeAll(fMidLetterSet);
@ -157,10 +292,17 @@ public class RBBITestMonkey extends TestFmwk {
fOtherSet.removeAll(fFormatSet);
fOtherSet.removeAll(fExtendSet);
fOtherSet.removeAll(fExtendNumLetSet);
// Inhibit dictionary characters from being tested at all.
fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
fSets = new ArrayList();
fSets.add(fCRSet);
fSets.add(fLFSet);
fSets.add(fNewlineSet);
fSets.add(fALetterSet);
fSets.add(fKatakanaSet);
fSets.add(fMidLetterSet);
fSets.add(fMidNumLetSet);
fSets.add(fMidNumSet);
fSets.add(fNumericSet);
fSets.add(fFormatSet);
@ -186,7 +328,7 @@ public class RBBITestMonkey extends TestFmwk {
int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3.
// Prev break at end of string. return DONE.
// Previous break at end of string. return DONE.
if (prevPos >= fText.length()) {
return -1;
}
@ -204,6 +346,7 @@ public class RBBITestMonkey extends TestFmwk {
p2 = p3; c2 = c3;
// Advancd p3 by X(Extend | Format)* Rule 4
// But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
do {
p3 = moveIndex32(fText, p3, 1);
c3 = -1;
@ -211,7 +354,10 @@ public class RBBITestMonkey extends TestFmwk {
break;
}
c3 = UTF16.charAt(fText, p3);
}
if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
break;
}
}
while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3));
if (p1 == p2) {
@ -227,28 +373,37 @@ public class RBBITestMonkey extends TestFmwk {
// No Extend or Format characters may appear between the CR and LF,
// which requires the additional check for p2 immediately following p1.
//
if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
if (c1==0x0D && c2==0x0A) {
continue;
}
// Rule (3a) Break before and after newlines (including CR and LF)
//
if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
break;
};
if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
break;
};
// Rule (5). ALetter x ALetter
if (fALetterSet.contains(c1) &&
fALetterSet.contains(c2)) {
continue;
}
// Rule (6) ALetter x MidLetter ALetter
// Rule (6) ALetter x (MidLetter | MidNumLet) ALetter
//
if ( fALetterSet.contains(c1) &&
fMidLetterSet.contains(c2) &&
(fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
setContains(fALetterSet, c3)) {
continue;
}
// Rule (7) ALetter MidLetter x ALetter
// Rule (7) ALetter (MidLetter | MidNumLet) x ALetter
if (fALetterSet.contains(c0) &&
fMidLetterSet.contains(c1) &&
(fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
fALetterSet.contains(c2)) {
continue;
}
@ -273,14 +428,14 @@ public class RBBITestMonkey extends TestFmwk {
// Rule (11) Numeric (MidNum | MidNumLet) x Numeric
if ( fNumericSet.contains(c0) &&
fMidNumSet.contains(c1) &&
(fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1)) &&
fNumericSet.contains(c2)) {
continue;
}
// Rule (12) Numeric x (MidNum | MidNumLet) Numeric
if (fNumericSet.contains(c1) &&
fMidNumSet.contains(c2) &&
(fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
setContains(fNumericSet, c3)) {
continue;
}
@ -363,6 +518,7 @@ public class RBBITestMonkey extends TestFmwk {
RBBILineMonkey()
{
fCharProperty = UProperty.LINE_BREAK;
fSets = new ArrayList();
fBK = new UnicodeSet("[\\p{Line_Break=BK}]");
@ -402,6 +558,7 @@ public class RBBITestMonkey extends TestFmwk {
fSG = new UnicodeSet("[\\ud800-\\udfff]");
fXX = new UnicodeSet("[\\p{Line_break=XX}]");
fAL.addAll(fXX); // Default behavior for XX is identical to AL
fAL.addAll(fAI); // Default behavior for AI is identical to AL
fAL.addAll(fSA); // Default behavior for SA is XX, which defaults to AL
@ -590,13 +747,20 @@ public class RBBITestMonkey extends TestFmwk {
// LB 12
// (!SP) x GL
// GL x
if ((!fSP.contains(prevChar)) && fGL.contains(thisChar) ||
fGL.contains(prevChar)) {
if (fGL.contains(prevChar)) {
continue;
}
// LB 12a
// [^SP BA HY] x GL
if (!(fSP.contains(prevChar) ||
fBA.contains(prevChar) ||
fHY.contains(prevChar) ) && fGL.contains(thisChar)) {
continue;
}
// LB 13 Don't break before closings.
// NU x CL and NU x IS are not matched here so that they will
@ -611,7 +775,7 @@ public class RBBITestMonkey extends TestFmwk {
// LB 14 Don't break after OP SP*
// Scan backwards, checking for this sequence.
// The OP char could include combining marks, so we acually check for
// The OP char could include combining marks, so we actually check for
// OP CM* SP* x
tPos = prevPos;
if (fSP.contains(prevChar)) {
@ -626,7 +790,7 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
// LB 15 Do not break withing "[
// LB 15 Do not break within "[
// QU CM* SP* x OP
if (fOP.contains(thisChar)) {
// Scan backwards from prevChar to see if it is preceded by QU CM* SP*
@ -796,18 +960,7 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
// LB 30 Do not break between letters, numbers or oridnary symbols and
// opening or closing punctuation.
// (AL | NU) x OP
// CL x (AL | NU)
if ((fAL.contains(prevChar) || fNU.contains(prevChar)) &&
fOP.contains(thisChar)) {
continue;
}
if (fCL.contains(prevChar) &&
(fAL.contains(thisChar) || fNU.contains(thisChar))) {
continue;
}
// LB 30 (Withdrawn as of Unicode 5.1)
// LB 31 Break everywhere else
break;
@ -980,6 +1133,7 @@ public class RBBITestMonkey extends TestFmwk {
UnicodeSet fOLetterSet;
UnicodeSet fNumericSet;
UnicodeSet fATermSet;
UnicodeSet fSContinueSet;
UnicodeSet fSTermSet;
UnicodeSet fCloseSet;
UnicodeSet fOtherSet;
@ -988,19 +1142,25 @@ public class RBBITestMonkey extends TestFmwk {
RBBISentenceMonkey() {
fCharProperty = UProperty.SENTENCE_BREAK;
fSets = new ArrayList();
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep}]");
// Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator
// set and made into character classes of their own. For the monkey impl,
// they remain in SEP, since Sep always appears with CR and LF in the rules.
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]");
fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]");
fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]");
fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]");
fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]");
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}-[\\uff9e\\uff9f]]");
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]");
fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]");
fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]");
fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]");
fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]");
fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]");
fExtendSet = new UnicodeSet("[\\p{Grapheme_Extend}\\uff9e\\uff9f]");
fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]");
fOtherSet = new UnicodeSet();
@ -1013,6 +1173,7 @@ public class RBBITestMonkey extends TestFmwk {
fOtherSet.removeAll(fOLetterSet);
fOtherSet.removeAll(fNumericSet);
fOtherSet.removeAll(fATermSet);
fOtherSet.removeAll(fSContinueSet);
fOtherSet.removeAll(fSTermSet);
fOtherSet.removeAll(fCloseSet);
fOtherSet.removeAll(fExtendSet);
@ -1026,6 +1187,7 @@ public class RBBITestMonkey extends TestFmwk {
fSets.add(fOLetterSet);
fSets.add(fNumericSet);
fSets.add(fATermSet);
fSets.add(fSContinueSet);
fSets.add(fSTermSet);
fSets.add(fCloseSet);
fSets.add(fOtherSet);
@ -1170,8 +1332,8 @@ public class RBBITestMonkey extends TestFmwk {
}
}
// Rule 8a (STerm | ATerm) Close* Sp* x (Sterm | ATerm)
if (fSTermSet.contains(c2) || fATermSet.contains(c2)) {
// Rule 8a (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
p8 = p1;
while (setContains(fSpSet, cAt(p8))) {
p8 = moveBack(p8);
@ -1186,7 +1348,7 @@ public class RBBITestMonkey extends TestFmwk {
}
// Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep)
// Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep | CR | LF)
int p9 = p1;
while (p9>0 && fCloseSet.contains(cAt(p9))) {
p9 = moveBack(p9);
@ -1198,7 +1360,7 @@ public class RBBITestMonkey extends TestFmwk {
}
}
// Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep)
// Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep | CR | LF)
int p10 = p1;
while (p10>0 && fSpSet.contains(cAt(p10))) {
p10 = moveBack(p10);
@ -1214,6 +1376,9 @@ public class RBBITestMonkey extends TestFmwk {
// Rule (11) (STerm | ATerm) Close* Sp* <break>
int p11 = p1;
if (p11>0 && fSepSet.contains(cAt(p11))) {
p11 = moveBack(p11);
}
while (p11>0 && fSpSet.contains(cAt(p11))) {
p11 = moveBack(p11);
}
@ -1319,173 +1484,10 @@ public class RBBITestMonkey extends TestFmwk {
}
//
// The following UnicodeSets are used in matching a Grapheme Cluster
//
private static UnicodeSet GC_Control;
private static UnicodeSet GC_Extend ;
private static UnicodeSet GC_L ;
private static UnicodeSet GC_V ;
private static UnicodeSet GC_T ;
private static UnicodeSet GC_LV;
private static UnicodeSet GC_LVT ;
protected void init()throws Exception{
GC_Control = new UnicodeSet("[[:Zl:][:Zp:][:Cc:][:Cf:]-[\\u000d\\u000a]-[\\p{Grapheme_Cluster_Break=Extend}]]");
GC_Extend = new UnicodeSet("[\\p{Grapheme_Cluster_Break=Extend}]");
GC_L = new UnicodeSet("[[:Hangul_Syllable_Type=L:]]");
GC_V = new UnicodeSet("[[:Hangul_Syllable_Type=V:]]");
GC_T = new UnicodeSet("[[:Hangul_Syllable_Type=T:]]");
GC_LV = new UnicodeSet("[[:Hangul_Syllable_Type=LV:]]");
GC_LVT = new UnicodeSet("[[:Hangul_Syllable_Type=LVT:]]");
}
/**
* Find the end of the extent of a grapheme cluster.
* This is the reference implementation used by the monkey test for comparison
* with the RBBI results.
* @param s The string containing the text to be analyzed
* @param i The index of the start of the grapheme cluster.
* @return The index of the first code point following the grapheme cluster
* @internal
*/
private static int nextGC(StringBuffer s, int i) {
if (i >= s.length() || i == -1 ) {
return -1;
}
int c = UTF16.charAt(s, i);
int pos = i;
if (c == 0x0d) {
pos = nextCP(s, i);
if (pos >= s.length()) {
return pos;
}
c = UTF16.charAt(s, pos);
if (c == 0x0a) {
pos = nextCP(s, pos);
}
return pos;
}
if (GC_Control.contains(c) || c == 0x0a) {
pos = nextCP(s, pos);
return pos;
}
// Little state machine to consume Hangul Syllables
int hangulState = 1;
state_loop: for (;;) {
switch (hangulState) {
case 1:
if (GC_L.contains(c)) {
hangulState = 2;
break;
}
if (GC_V.contains(c) || GC_LV.contains(c)) {
hangulState = 3;
break;
}
if (GC_T.contains(c) || GC_LVT.contains(c)) {
hangulState = 4;
break;
}
break state_loop;
case 2:
if (GC_L.contains(c)) {
// continue in state 2.
break;
}
if (GC_V.contains(c) || GC_LV.contains(c)) {
hangulState = 3;
break;
}
if (GC_LVT.contains(c)) {
hangulState = 4;
break;
}
if (GC_Extend.contains(c)) {
hangulState = 5;
break;
}
break state_loop;
case 3:
if (GC_V.contains(c)) {
// continue in state 3;
break;
}
if (GC_T.contains(c)) {
hangulState = 4;
break;
}
if (GC_Extend.contains(c)) {
hangulState = 5;
break;
}
break state_loop;
case 4:
if (GC_T.contains(c)) {
// continue in state 4
break;
}
if (GC_Extend.contains(c)) {
hangulState = 5;
break;
}
break state_loop;
case 5:
if (GC_Extend.contains(c)) {
hangulState = 5;
break;
}
break state_loop;
}
// We have exited the switch statement, but are still in the loop.
// Still in a Hangul Syllable, advance to the next code point.
pos = nextCP(s, pos);
if (pos >= s.length()) {
break;
}
c = UTF16.charAt(s, pos);
} // end of loop
if (hangulState != 1) {
// We found a Hangul. We're done.
return pos;
}
// Ordinary characters. Consume one codepoint unconditionally, then any following Extends.
for (;;) {
pos = nextCP(s, pos);
if (pos >= s.length()) {
break;
}
c = UTF16.charAt(s, pos);
if (GC_Extend.contains(c) == false) {
break;
}
}
return pos;
}
/**
* random number generator. Not using Java's built-in Randoms for two reasons:
* 1. Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
* 2. We need to get and restore the seed from values occuring in the middle
* 2. We need to get and restore the seed from values occurring in the middle
* of a long sequence, to more easily reproduce failing cases.
*/
private static int m_seed = 1;
@ -1495,6 +1497,42 @@ public class RBBITestMonkey extends TestFmwk {
return (int)(m_seed >>> 16) % 32768;
}
// Helper function for formatting error output.
// Append a string into a fixed-size field in a StringBuffer.
// Blank-pad the string if it is shorter than the field.
// Truncate the source string if it is too long.
//
private static void appendToBuf(StringBuilder dest, String src, int fieldLen) {
int appendLen = src.length();
if (appendLen >= fieldLen) {
appendLen = fieldLen;
}
dest.append(src, 0, appendLen);
while (appendLen < fieldLen) {
dest.append(' ');
appendLen++;
}
}
// Helper function for formatting error output.
// Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
private static void appendCharToBuf(StringBuilder dest, int c, int fieldLen) {
String hexChars = "0123456789abcdef";
if (c < 0x10000) {
dest.append("\\u");
for (int bn=12; bn>=0; bn-=4) {
dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
appendToBuf(dest, " ", fieldLen-6);
} else {
dest.append("\\U");
for (int bn=28; bn>=0; bn-=4) {
dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
appendToBuf(dest, " ", fieldLen-10);
}
}
/**
* Run a RBBI monkey test. Common routine, for all break iterator types.
@ -1748,33 +1786,31 @@ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int
}
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
StringBuffer errorText = new StringBuffer();
errorText.append("<data>");
StringBuilder errorText = new StringBuilder();
String hexChars = "0123456789abcdef";
int c; // Char from test data
int bn;
for (ci = startContext; ci <= endContext && ci != -1; ci = nextCP(testText, ci)) {
if (ci == i) {
// This is the location of the error.
errorText.append("<?>");
errorText.append("<?>---------------------------------\n");
} else if (expectedBreaks[ci]) {
// This a non-error expected break position.
errorText.append("<>");
errorText.append("------------------------------------\n");
}
if (ci < testText.length()) {
c = UTF16.charAt(testText, ci);
if (c < 0x10000) {
errorText.append("\\u");
for (bn=12; bn>=0; bn-=4) {
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
} else {
errorText.append("\\U");
for (bn=28; bn>=0; bn-=4) {
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
}
appendCharToBuf(errorText, c, 11);
String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
appendToBuf(errorText, gc, 8);
int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
String extraPropValue =
UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
appendToBuf(errorText, extraPropValue, 20);
String charName = UCharacter.getExtendedName(c);
appendToBuf(errorText, charName, 40);
errorText.append('\n');
}
}
if (ci == testText.length() && ci != -1) {

View File

@ -1,4 +1,4 @@
# Copyright (c) 2001-2006 International Business Machines
# Copyright (c) 2001-2008 International Business Machines
# Corporation and others. All Rights Reserved.
#
# RBBI Test Data
@ -75,14 +75,14 @@ Hi! •This is a simple sample sentence. •It does not have to make any sense a
# Hindi combining chars. (An old test)
<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
•\u0939•\u094c•\u0964•</data>
<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
# TODO: Update these tests for Unicode 5.1 Extended Grapheme clusters
#<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
#•\u0939•\u094c•\u0964•</data>
#<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
# Bug 1587. Tamil. \u0baa\u0bc1 should be two separate characters, even though
# Hyangmi would perfer that it be one.
<data>•\u0baa•\u0bc1•\u0baa•\u0bc1•</data>
# Bug 1587. Tamil. \u0baa\u0bc1 is an Extended Grpaheme Cluster
<data>•\u0baa\u0bc1•\u0baa\u0bc1•</data>
# Regression test for bug 1889
<data>•\u0f40\u0f7d•\u0000•\u0f7e•</data>
@ -485,7 +485,10 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
# Regression for bug 836
<data>•AAA(AAA •</data>
# Note: Unicode 5.1 changed this behavior
# ICU will want to change it back before releasing,
# so there is no break preceding the '('
<data>•AAA•(AAA •</data>
# Try some words from other scripts.
# Greek, Cyrillic, Hebrew, Arabic, Arabic, Georgian, Latin

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -200,15 +200,15 @@ public class UnicodeSetTest extends TestFmwk {
// Cover applyPattern, applyPropertyAlias
s.clear();
s.applyPattern("[ab ]", true);
expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab"});
expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
s.clear();
s.applyPattern("[ab ]", false);
expectToPattern(s, "[\\\u0020ab]", new String[] {"a", "\u0020", NOT, "ab"});
expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
s.clear();
s.applyPropertyAlias("nv", "0.5");
expectToPattern(s, "[\\u00BD\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
// Unicode 4.1 adds \u2CFD\U00010141\U00010175\U00010176 with numeric value 1/2
expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
// Unicode 5.1 adds Malayalam 1/2 (\u0D74)
s.clear();
s.applyPropertyAlias("gc", "Lu");
@ -1252,7 +1252,7 @@ public class UnicodeSetTest extends TestFmwk {
// selector, input, output
CASE,
"[aq\u00DF{Bc}{bC}{Fi}]",
"[aAqQ\u00DF\uFB01{ss}{bc}{fi}]",
"[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
CASE,
"[\u01F1]", // 'DZ'

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -318,15 +318,15 @@ public final class UCharacterProperty
new BinaryProperties( 1, ( 1 << XID_CONTINUE_PROPERTY_) ),
new BinaryProperties( 1, ( 1 << XID_START_PROPERTY_) ),
new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CASE_SENSITIVE */
new BinaryProperties( 2, ( 1 << V2_S_TERM_PROPERTY_) ),
new BinaryProperties( 2, ( 1 << V2_VARIATION_SELECTOR_PROPERTY_) ),
new BinaryProperties( 1, ( 1 << S_TERM_PROPERTY_) ),
new BinaryProperties( 1, ( 1 << VARIATION_SELECTOR_PROPERTY_) ),
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFD_INERT */
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFKD_INERT */
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFC_INERT */
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_NFKC_INERT */
new BinaryProperties( SRC_NORM, 0 ), /* UCHAR_SEGMENT_STARTER */
new BinaryProperties( 2, ( 1 << V2_PATTERN_SYNTAX) ),
new BinaryProperties( 2, ( 1 << V2_PATTERN_WHITE_SPACE) ),
new BinaryProperties( 1, ( 1 << PATTERN_SYNTAX) ),
new BinaryProperties( 1, ( 1 << PATTERN_WHITE_SPACE) ),
new BinaryProperties( SRC_CHAR_AND_PROPSVEC, 0 ), /* UCHAR_POSIX_ALNUM */
new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_BLANK */
new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_GRAPH */
@ -798,38 +798,36 @@ public final class UCharacterProperty
* ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
*/
private static final int WHITE_SPACE_PROPERTY_ = 0;
//private static final int BIDI_CONTROL_PROPERTY_ = 1;
//private static final int JOIN_CONTROL_PROPERTY_ = 2;
private static final int DASH_PROPERTY_ = 3;
private static final int HYPHEN_PROPERTY_ = 4;
private static final int QUOTATION_MARK_PROPERTY_ = 5;
private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 6;
private static final int MATH_PROPERTY_ = 7;
private static final int HEX_DIGIT_PROPERTY_ = 8;
private static final int ASCII_HEX_DIGIT_PROPERTY_ = 9;
private static final int ALPHABETIC_PROPERTY_ = 10;
private static final int IDEOGRAPHIC_PROPERTY_ = 11;
private static final int DIACRITIC_PROPERTY_ = 12;
private static final int EXTENDER_PROPERTY_ = 13;
//private static final int LOWERCASE_PROPERTY_ = 14;
//private static final int UPPERCASE_PROPERTY_ = 15;
private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 16;
private static final int GRAPHEME_EXTEND_PROPERTY_ = 17;
private static final int GRAPHEME_LINK_PROPERTY_ = 18;
private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 19;
private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 20;
private static final int RADICAL_PROPERTY_ = 21;
private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 22;
private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 23;
private static final int DEPRECATED_PROPERTY_ = 24;
//private static final int SOFT_DOTTED_PROPERTY_ = 25;
private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 26;
private static final int XID_START_PROPERTY_ = 27;
private static final int XID_CONTINUE_PROPERTY_ = 28;
private static final int ID_START_PROPERTY_ = 29;
private static final int ID_CONTINUE_PROPERTY_ = 30;
private static final int GRAPHEME_BASE_PROPERTY_ = 31;
//private static final int BINARY_1_TOP_PROPERTY_ = 32;
private static final int DASH_PROPERTY_ = 1;
private static final int HYPHEN_PROPERTY_ = 2;
private static final int QUOTATION_MARK_PROPERTY_ = 3;
private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
private static final int MATH_PROPERTY_ = 5;
private static final int HEX_DIGIT_PROPERTY_ = 6;
private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
private static final int ALPHABETIC_PROPERTY_ = 8;
private static final int IDEOGRAPHIC_PROPERTY_ = 9;
private static final int DIACRITIC_PROPERTY_ = 10;
private static final int EXTENDER_PROPERTY_ = 11;
private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
private static final int GRAPHEME_LINK_PROPERTY_ = 14;
private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
private static final int RADICAL_PROPERTY_ = 17;
private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
private static final int DEPRECATED_PROPERTY_ = 20;
private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
private static final int XID_START_PROPERTY_ = 22;
private static final int XID_CONTINUE_PROPERTY_ = 23;
private static final int ID_START_PROPERTY_ = 24;
private static final int ID_CONTINUE_PROPERTY_ = 25;
private static final int GRAPHEME_BASE_PROPERTY_ = 26;
private static final int S_TERM_PROPERTY_ = 27;
private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
private static final int PATTERN_SYNTAX = 29; /* new in ICU 3.4 and Unicode 4.1 */
private static final int PATTERN_WHITE_SPACE = 30;
/**
* First nibble shift
@ -844,11 +842,6 @@ public final class UCharacterProperty
*/
private static final int AGE_SHIFT_ = 24;
// boolean properties in vector word 2
private static final int V2_S_TERM_PROPERTY_ = 24;
private static final int V2_VARIATION_SELECTOR_PROPERTY_ = 25;
private static final int V2_PATTERN_SYNTAX = 26; /* new in ICU 3.4 and Unicode 4.1 */
private static final int V2_PATTERN_WHITE_SPACE = 27;
// private constructors --------------------------------------------------

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2006, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -157,7 +157,7 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate
* Format version; this code works with all versions with the same major
* version number and the same Trie bit distribution.
*/
private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x4, (byte)0,
private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x5, (byte)0,
(byte)Trie.INDEX_STAGE_1_SHIFT_,
(byte)Trie.INDEX_STAGE_2_SHIFT_};
}

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ed7c7aef0520ae3182b6c7b9ab01ecc3c40e2af9ed46f68943abbec87ce85300
size 5412788
oid sha256:3e092ba77dd3f34ebab38fdb9a23ebbb8f23089cba6323f2d941d40c92c59cfe
size 5521414

View File

@ -855,10 +855,112 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int SUNDANESE_ID = 155; /* [1B80] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int LEPCHA_ID = 156; /* [1C00] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int OL_CHIKI_ID = 157; /* [1C50] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int VAI_ID = 159; /* [A500] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int SAURASHTRA_ID = 161; /* [A880] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int KAYAH_LI_ID = 162; /* [A900] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int REJANG_ID = 163; /* [A930] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int CHAM_ID = 164; /* [AA00] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int LYCIAN_ID = 167; /* [10280] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int CARIAN_ID = 168; /* [102A0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int LYDIAN_ID = 169; /* [10920] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int DOMINO_TILES_ID = 171; /* [1F030] */
/**
* @stable ICU 2.4
*/
public static final int COUNT = 155;
public static final int COUNT = 172;
// blocks objects ---------------------------------------------------
@ -1675,6 +1777,107 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
/**
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
/**
* @stable ICU 2.4
*/
@ -1752,13 +1955,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
for (int i = 0; i < BLOCKS_.length; ++i) {
UnicodeBlock b = BLOCKS_[i];
String name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG);
m.put(name.toUpperCase(), b);
m.put(name.replace('_',' ').toUpperCase(), b);
m.put(b.toString().toUpperCase(), b);
name = name.toUpperCase().replace(" ", "").replace("_", "").replace("-", "");
m.put(name, b);
}
mref = new SoftReference(m);
}
UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase());
UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase().replace(" ", "").replace("_", "").replace("-", ""));
if (b == null) {
throw new IllegalArgumentException();
}
@ -1875,7 +2077,26 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
PHOENICIAN,
CUNEIFORM,
CUNEIFORM_NUMBERS_AND_PUNCTUATION,
COUNTING_ROD_NUMERALS
COUNTING_ROD_NUMERALS,
/* New blocks in Unicode 5.8 */
SUNDANESE,
LEPCHA,
OL_CHIKI,
CYRILLIC_EXTENDED_A,
VAI,
CYRILLIC_EXTENDED_B,
SAURASHTRA,
KAYAH_LI,
REJANG,
CHAM,
ANCIENT_SYMBOLS,
PHAISTOS_DISC,
LYCIAN,
CARIAN,
LYDIAN,
MAHJONG_TILES,
DOMINO_TILES,
};
static {
@ -2285,11 +2506,15 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
/**
* @stable ICU 2.6
*/
public static final int ZHAIN =53;
public static final int ZHAIN = 53;
/**
* @stable ICU 2.4
* @stable ICU 4.0
*/
public static final int COUNT = 54;
public static final int BURUSHASKI_YEH_BARREE = 54;
/**
* @stable ICU 4.0
*/
public static final int COUNT = 55;
}
/**
@ -2338,10 +2563,18 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 3.4
*/
public static final int V = 9;
/**
* @stable ICU 4.0
*/
public static final int SPACING_MARK = 10;
/**
* @stable ICU 4.0
*/
public static final int PREPEND = 11;
/**
* @stable ICU 3.4
*/
public static final int COUNT = 10;
public static final int COUNT = 12;
}
/**
@ -2383,9 +2616,29 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static final int EXTENDNUMLET = 7;
/**
* @stable ICU 3.8
* @stable ICU 4.0
*/
public static final int COUNT = 8;
public static final int CR = 8;
/**
* @stable ICU 4.0
*/
public static final int EXTEND = 9;
/**
* @stable ICU 4.0
*/
public static final int LF = 10;
/**
* @stable ICU 4.0
*/
public static final int MIDNUMLEFT = 11;
/**
* @stable ICU 4.0
*/
public static final int NEWLINE = 12;
/**
* @stable ICU 4.0
*/
public static final int COUNT = 13;
}
/**
@ -2439,9 +2692,25 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static final int UPPER = 10;
/**
* @stable ICU 3.8
* @stable ICU 4.0
*/
public static final int COUNT = 11;
public static final int CR = 11;
/**
* @stable ICU 4.0
*/
public static final int EXTEND = 12;
/**
* @stable ICU 4.0
*/
public static final int LF = 13;
/**
* @stable ICU 4.0
*/
public static final int SCONTINUE = 14;
/**
* @stable ICU 4.0
*/
public static final int COUNT = 15;
}
/**
@ -5014,7 +5283,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
case UProperty.JOINING_TYPE:
return gBdp.getJoiningType(ch);
case UProperty.LINE_BREAK:
return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_;
return (int)(PROPERTY_.getAdditional(ch, LB_VWORD)& LB_MASK)>>LB_SHIFT;
case UProperty.NUMERIC_TYPE:
type=getNumericType(PROPERTY_.getProperty(ch));
if(type>NumericType.NUMERIC) {
@ -5196,7 +5465,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
case UProperty.GENERAL_CATEGORY:
return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
case UProperty.LINE_BREAK:
return (PROPERTY_.getMaxValues(0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_;
return (PROPERTY_.getMaxValues(LB_VWORD) & LB_MASK) >> LB_SHIFT;
case UProperty.NUMERIC_TYPE:
return NumericType.COUNT - 1;
case UProperty.SCRIPT:
@ -6148,17 +6417,21 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
/*
* Properties in vector word 2
* Bits
* 31..24 More binary properties (see UCharacterProperty)
* 23..19 reserved
* 18..14 Sentence Break
* 13..10 Word Break
* 31..26 reserved
* 25..20 Line Break
* 19..15 Sentence Break
* 14..10 Word Break
* 9.. 5 Grapheme Cluster Break
* 4.. 0 Decomposition Type
*/
private static final int SB_MASK = 0x0007c000;
private static final int SB_SHIFT = 14;
private static final int LB_MASK = 0x03f00000;
private static final int LB_SHIFT = 20;
private static final int LB_VWORD = 2;
private static final int WB_MASK = 0x00003c00;
private static final int SB_MASK = 0x000f8000;
private static final int SB_SHIFT = 15;
private static final int WB_MASK = 0x00007c00;
private static final int WB_SHIFT = 10;
private static final int GCB_MASK = 0x000003e0;
@ -6173,48 +6446,38 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
/*
* Properties in vector word 0
* Bits
* 31..24 DerivedAge version major/minor one nibble each (see UCharacterProperty)
* 23..18 Line Break
* 17..15 East Asian Width
* 14.. 7 UBlockCode
* 6.. 0 UScriptCode
* 31..24 DerivedAge version major/minor one nibble each
* 23..20 reserved
* 19..17 East Asian Width
* 16.. 8 UBlockCode
* 7.. 0 UScriptCode
*/
/**
* Integer properties mask and shift values for East Asian cell width.
* Equivalent to icu4c UPROPS_EA_MASK
*/
private static final int EAST_ASIAN_MASK_ = 0x00038000;
private static final int EAST_ASIAN_MASK_ = 0x000e0000;
/**
* Integer properties mask and shift values for East Asian cell width.
* Equivalent to icu4c UPROPS_EA_SHIFT
*/
private static final int EAST_ASIAN_SHIFT_ = 15;
/**
* Integer properties mask and shift values for line breaks.
* Equivalent to icu4c UPROPS_LB_MASK
*/
private static final int LINE_BREAK_MASK_ = 0x00FC0000;
/**
* Integer properties mask and shift values for line breaks.
* Equivalent to icu4c UPROPS_LB_SHIFT
*/
private static final int LINE_BREAK_SHIFT_ = 18;
private static final int EAST_ASIAN_SHIFT_ = 17;
/**
* Integer properties mask and shift values for blocks.
* Equivalent to icu4c UPROPS_BLOCK_MASK
*/
private static final int BLOCK_MASK_ = 0x00007f80;
private static final int BLOCK_MASK_ = 0x0001ff00;
/**
* Integer properties mask and shift values for blocks.
* Equivalent to icu4c UPROPS_BLOCK_SHIFT
*/
private static final int BLOCK_SHIFT_ = 7;
private static final int BLOCK_SHIFT_ = 8;
/**
* Integer properties mask and shift values for scripts.
* Equivalent to icu4c UPROPS_SHIFT_MASK
*/
private static final int SCRIPT_MASK_ = 0x0000007f;
private static final int SCRIPT_MASK_ = 0x000000ff;
// private constructor -----------------------------------------------
///CLOVER:OFF

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 2001-2007 International Business Machines Corporation and *
* Copyright (C) 2001-2008 International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -689,11 +689,108 @@ public final class UScript {
*/
public static final int MEITEI_MAYEK = 115;/* Mtei */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int IMPERIAL_ARAMAIC = 116;/* Armi */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int AVESTAN = 117;/* Avst */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int CHAKMA = 118;/* Cakm */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int KOREAN = 119;/* Kore */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int KAITHI = 120;/* Kthi */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int MANICHAEAN = 121;/* Mani */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int PSALTER_PAHLAVI = 123;/* Phlp */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int BOOK_PAHLAVI = 124;/* Phlv */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int SAMARITAN = 126;/* Samr */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int TAI_VIET = 127;/* Tavt */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */
/**
* ISO 15924 script code
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public static final int SYMBOLS = 129;/* Zsym */
/**
* Limit
* @stable ICU 2.4
*/
public static final int CODE_LIMIT = 116;
public static final int CODE_LIMIT = 130;
private static final int SCRIPT_MASK = 0x0000007f;
private static final UCharacterProperty prop= UCharacterProperty.getInstance();
@ -851,4 +948,3 @@ public final class UScript {
private UScript(){}
///CLOVER:ON
}

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -485,6 +485,7 @@ final class CollationParsedRuleBuilder
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
m_utilElement_.m_prefix_ = 0;
m_utilElement_.m_CELength_ = 0;
m_utilElement_.m_prefixChars_ = null;
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
while (CE != CollationElementIterator.NULLORDER) {
CE = m_utilColEIter_.next();
@ -610,7 +611,8 @@ final class CollationParsedRuleBuilder
int offset = 0;
while (conts[offset] != 0) {
// tailoredCE = ucmpe32_get(t.m_mapping, *conts);
int tailoredCE = t.m_mapping_.getValue(conts[offset]);
int tailoredCE = t.m_mapping_.getValue(conts[offset]);
Elements prefixElm = null;
if (tailoredCE != CE_NOT_FOUND_) {
boolean needToAdd = true;
if (isContractionTableElement(tailoredCE)) {
@ -618,6 +620,22 @@ final class CollationParsedRuleBuilder
conts, offset + 1) == true) {
needToAdd = false;
}
}
if (!needToAdd && isPrefix(tailoredCE) && conts[offset+1]==0) {
// pre-context character in UCA
// The format for pre-context character is
// conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
Elements elm = new Elements();
elm.m_cPoints_=m_utilElement_.m_uchars_;
elm.m_CELength_=0;
elm.m_uchars_= UCharacter.toString(conts[offset]);
elm.m_prefixChars_=UCharacter.toString(conts[offset+2]);
elm.m_prefix_=0; // TODO(claireho) : confirm!
prefixElm = (Elements)t.m_prefixLookup_.get(elm);
if ((prefixElm== null) ||
(prefixElm.m_prefixChars_.charAt(0)!= conts[offset+2])) {
needToAdd = true;
}
}
if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) {
needToAdd = false;
@ -626,23 +644,53 @@ final class CollationParsedRuleBuilder
if (needToAdd == true) {
// we need to add if this contraction is not tailored.
m_utilElement_.m_prefix_ = 0;
m_utilElement_.m_prefixChars_ = null;
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
str.delete(0, str.length());
str.append(conts[offset]);
str.append(conts[offset + 1]);
if (conts[offset + 2] != 0) {
str.append(conts[offset + 2]);
}
m_utilElement_.m_uchars_ = str.toString();
m_utilElement_.m_CELength_ = 0;
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
if (conts[offset+1]!=0) { // not precontext
m_utilElement_.m_prefix_ = 0;
m_utilElement_.m_prefixChars_ = null;
m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
str.delete(0, str.length());
str.append(conts[offset]);
str.append(conts[offset + 1]);
if (conts[offset + 2] != 0) {
str.append(conts[offset + 2]);
}
m_utilElement_.m_uchars_ = str.toString();
m_utilElement_.m_CELength_ = 0;
m_utilColEIter_.setText(m_utilElement_.m_uchars_);
}
else { // add a pre-context element
int preKeyLen=0;
str.delete(0, str.length()); // clean up
m_utilElement_.m_cPoints_ = UCharacter.toString(conts[offset]);
m_utilElement_.m_CELength_ = 0;
m_utilElement_.m_uchars_ = UCharacter.toString(conts[offset]);
m_utilElement_.m_prefixChars_ = UCharacter.toString(conts[offset+2]);
if (prefixElm==null) {
m_utilElement_.m_prefix_=0;
}
else { // TODO (claireho): confirm!
m_utilElement_.m_prefix_= m_utilElement_.m_prefix_;
// m_utilElement_.m_prefix_= prefixElm.m_prefix_;
}
m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
while (m_utilColEIter_.next()!=CollationElementIterator.NULLORDER) {
// count number of keys for pre-context char.
preKeyLen++;
}
str.append(conts[offset+2]);
str.append(conts[offset]);
m_utilColEIter_.setText(str.toString());
// Skip the keys for prefix character, then copy the rest to el.
while ((preKeyLen-->0) &&
m_utilColEIter_.next()!= CollationElementIterator.NULLORDER) {
continue;
}
}
while (true) {
int CE = m_utilColEIter_.next();
if (CE != CollationElementIterator.NULLORDER) {
m_utilElement_.m_CEs_[m_utilElement_.m_CELength_
++] = CE;
m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
}
else {
break;
@ -1584,14 +1632,18 @@ final class CollationParsedRuleBuilder
s --;
if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
if (strength == Collator.SECONDARY) {
low = RuleBasedCollator.COMMON_TOP_2_ << 24;
high = 0xFFFFFFFF;
if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
// Override if low range is less than UCOL_COMMON_TOP2.
low = RuleBasedCollator.COMMON_TOP_2_ << 24;
}
high = 0xFFFFFFFF;
}
else {
// low = 0x02000000;
// This needs to be checked - what if low is
// not good...
high = 0x40000000;
else {
if ( low < RuleBasedCollator.COMMON_BOTTOM_3<<24 ) {
// Override if low range is less than UCOL_COMMON_BOT3.
low = RuleBasedCollator.COMMON_BOTTOM_3 <<24;
}
high = 0x40000000;
}
break;
}
@ -2864,6 +2916,19 @@ final class CollationParsedRuleBuilder
t.m_mapping_.setValue(element.m_cPoints_.charAt(
element.m_cPointsOffset_),
element.m_mapCE_);
if (element.m_prefixChars_ != null &&
element.m_prefixChars_.length()>0 &&
getCETag(CE) != CE_IMPLICIT_TAG_) {
// Add CE for standalone precontext char.
Elements origElem = new Elements();
origElem.m_prefixChars_ = null;
origElem.m_uchars_ = element.m_cPoints_;
origElem.m_cPoints_ = origElem.m_uchars_;
origElem.m_CEs_[0] = CE;
origElem.m_mapCE_ = CE;
origElem.m_CELength_ = 1;
finalizeAddition(t, origElem);
}
}
}
else {

View File

@ -1,7 +1,7 @@
//##header J2SE15
/**
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -1531,6 +1531,7 @@ public final class RuleBasedCollator extends Collator
static final byte BYTE_COMMON_ = (byte)0x05;
static final int COMMON_TOP_2_ = 0x86; // int for unsigness
static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
static final int COMMON_BOTTOM_3 = 0x05;
/**
* Case strength mask
*/