ICU-12450 move com.ibm.icu.dev.util.BagFormatter, CaseIterator, FileUtilities, ICUPropertyFactory, TransliteratorUtilities, UnicodeProperty, UnicodePropertySymbolTable to org.unicode.cldr.util
X-SVN-Rev: 38623
This commit is contained in:
parent
9d12e081bc
commit
0ba7b2e17e
@ -1,119 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.translit;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.dev.util.UnicodeProperty;
|
||||
import com.ibm.icu.dev.util.UnicodeProperty.Factory;
|
||||
import com.ibm.icu.dev.util.UnicodePropertySymbolTable;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* @author markdavis
|
||||
*
|
||||
*/
|
||||
public class TestUnicodeProperty extends TestFmwk{
|
||||
public static void main(String[] args) {
|
||||
new TestUnicodeProperty().run(args);
|
||||
}
|
||||
static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]");
|
||||
static final UnicodeSet letter = new UnicodeSet("[:gc=L:]");
|
||||
|
||||
|
||||
public void TestBasic() {
|
||||
Factory factory = ICUPropertyFactory.make();
|
||||
UnicodeProperty property = factory.getProperty("gc");
|
||||
List values = property.getAvailableValues();
|
||||
assertTrue("Values contain GC values", values.contains("Unassigned"));
|
||||
final UnicodeSet lu = property.getSet("Lu");
|
||||
if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) {
|
||||
errln("Contents:\t" + lu.complement().complement().toPattern(false));
|
||||
}
|
||||
}
|
||||
|
||||
public void TestSymbolTable() {
|
||||
Factory factory = ICUPropertyFactory.make();
|
||||
UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
|
||||
UnicodeSet.setDefaultXSymbolTable(upst);
|
||||
try {
|
||||
final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
|
||||
assertTrue("Gc=L contains 'A'", luSet.contains('A'));
|
||||
assertTrue("Gc=L contains 'Z'", luSet.contains('Z'));
|
||||
assertFalse("Gc=L contains 'a'", luSet.contains('1'));
|
||||
UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
|
||||
assertEquals("gc=lc are equal", casedLetter, casedLetter2);
|
||||
} finally {
|
||||
// restore the world
|
||||
UnicodeSet.setDefaultXSymbolTable(null);
|
||||
}
|
||||
}
|
||||
|
||||
public void TestSymbolTable2() {
|
||||
Factory factory = new MyUnicodePropertyFactory();
|
||||
UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
|
||||
UnicodeSet.setDefaultXSymbolTable(upst);
|
||||
try {
|
||||
final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
|
||||
assertFalse("Gc=L contains 'A'", luSet.contains('A'));
|
||||
if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) {
|
||||
errln("Contents:\t" + luSet.complement().complement().toPattern(false));
|
||||
}
|
||||
assertFalse("Gc=L contains 'a'", luSet.contains('1'));
|
||||
UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
|
||||
assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2);
|
||||
} finally {
|
||||
// restore the world
|
||||
UnicodeSet.setDefaultXSymbolTable(null);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For testing, override to set A-M to Cn.
|
||||
*/
|
||||
static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty {
|
||||
UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc");
|
||||
{
|
||||
setName(icuProperty.getName());
|
||||
setType(icuProperty.getType());
|
||||
}
|
||||
@Override
|
||||
protected String _getValue(int codepoint) {
|
||||
if (codepoint >= 'A' && codepoint <= 'M') {
|
||||
return "Unassigned";
|
||||
} else {
|
||||
return icuProperty.getValue(codepoint);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
protected List _getValueAliases(String valueAlias, List result) {
|
||||
return icuProperty.getValueAliases(valueAlias, result);
|
||||
}
|
||||
@Override
|
||||
public List _getNameAliases(List result) {
|
||||
return icuProperty.getNameAliases();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing, override to set A-Z to Cn.
|
||||
*/
|
||||
static class MyUnicodePropertyFactory extends ICUPropertyFactory {
|
||||
private MyUnicodePropertyFactory() {
|
||||
add(new MyUnicodeGCProperty());
|
||||
}
|
||||
}
|
||||
|
||||
static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable {
|
||||
public MyUnicodePropertySymbolTable(Factory factory) {
|
||||
super(factory);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.translit;
|
||||
@ -10,17 +10,26 @@ import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.TestBoilerplate;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.util.CollectionUtilities;
|
||||
import com.ibm.icu.dev.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.util.UnicodeMapIterator;
|
||||
import com.ibm.icu.dev.util.UnicodeMap.EntryRange;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
@ -294,4 +303,265 @@ public class UnicodeMapTest extends TestFmwk {
|
||||
assertNull("original-def", test.get("def"));
|
||||
assertEquals("copy-def", (Integer) 4, copy.get("def"));
|
||||
}
|
||||
|
||||
private static final int LIMIT = 0x15; // limit to make testing more realistic in terms of collisions
|
||||
private static final int ITERATIONS = 1000000;
|
||||
private static final boolean SHOW_PROGRESS = false;
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
SortedSet<String> log = new TreeSet<String>();
|
||||
static String[] TEST_VALUES = {"A", "B", "C", "D", "E", "F"};
|
||||
static Random random = new Random(12345);
|
||||
|
||||
public void TestUnicodeMapRandom() {
|
||||
// do random change to both, then compare
|
||||
random.setSeed(12345); // reproducible results
|
||||
logln("Comparing against HashMap");
|
||||
UnicodeMap<String> map1 = new UnicodeMap();
|
||||
Map<Integer, String> map2 = new HashMap<Integer, String>();
|
||||
for (int counter = 0; counter < ITERATIONS; ++counter) {
|
||||
int start = random.nextInt(LIMIT);
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
String logline = Utility.hex(start) + "\t" + value;
|
||||
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
|
||||
log.add(logline);
|
||||
if (DEBUG && counter == 144) {
|
||||
System.out.println(" debug");
|
||||
}
|
||||
map1.put(start, value);
|
||||
map2.put(start, value);
|
||||
check(map1, map2, counter);
|
||||
}
|
||||
checkNext(map1, map2, LIMIT);
|
||||
}
|
||||
|
||||
private static final int SET_LIMIT = 0x10FFFF;
|
||||
private static final int propEnum = UProperty.GENERAL_CATEGORY;
|
||||
|
||||
public void TestUnicodeMapGeneralCategory() {
|
||||
logln("Setting General Category");
|
||||
UnicodeMap<String> map1 = new UnicodeMap();
|
||||
Map<Integer, String> map2 = new HashMap<Integer, String>();
|
||||
//Map<Integer, String> map3 = new TreeMap<Integer, String>();
|
||||
map1 = new UnicodeMap<String>();
|
||||
map2 = new TreeMap<Integer,String>();
|
||||
for (int cp = 0; cp <= SET_LIMIT; ++cp) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
//if (enumValue <= 0) continue; // for smaller set
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
map1.put(cp, value);
|
||||
map2.put(cp, value);
|
||||
}
|
||||
checkNext(map1, map2, Integer.MAX_VALUE);
|
||||
|
||||
logln("Comparing General Category");
|
||||
check(map1, map2, -1);
|
||||
logln("Comparing Values");
|
||||
Set<String> values1 = map1.getAvailableValues(new TreeSet<String>());
|
||||
Set<String> values2 = new TreeSet<String>(map2.values());
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, values1, values2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
logln("Comparing Sets");
|
||||
for (Iterator<String> it = values1.iterator(); it.hasNext();) {
|
||||
String value = it.next();
|
||||
logln(value == null ? "null" : value);
|
||||
UnicodeSet set1 = map1.keySet(value);
|
||||
UnicodeSet set2 = TestBoilerplate.getSet(map2, value);
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, set1, set2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBoilerplate() {
|
||||
// check boilerplate
|
||||
List argList = new ArrayList();
|
||||
argList.add("TestMain");
|
||||
if (params.verbose) argList.add("-verbose");
|
||||
String[] args = new String[argList.size()];
|
||||
argList.toArray(args);
|
||||
new UnicodeMapBoilerplate().run(args);
|
||||
// TODO: the following is not being reached
|
||||
new UnicodeSetBoilerplate().run(args);
|
||||
}
|
||||
|
||||
public void TestAUnicodeMap2() {
|
||||
UnicodeMap foo = new UnicodeMap();
|
||||
@SuppressWarnings("unused")
|
||||
int hash = foo.hashCode(); // make sure doesn't NPE
|
||||
@SuppressWarnings("unused")
|
||||
Set fii = foo.stringKeys(); // make sure doesn't NPE
|
||||
}
|
||||
|
||||
public void TestAUnicodeMapInverse() {
|
||||
UnicodeMap<Character> foo1 = new UnicodeMap<Character>()
|
||||
.putAll('a', 'z', 'b')
|
||||
.put("ab", 'c')
|
||||
.put('x', 'b')
|
||||
.put("xy", 'c')
|
||||
;
|
||||
Map<Character, UnicodeSet> target = new HashMap<Character, UnicodeSet>();
|
||||
foo1.addInverseTo(target);
|
||||
UnicodeMap<Character> reverse = new UnicodeMap().putAllInverse(target);
|
||||
assertEquals("", foo1, reverse);
|
||||
}
|
||||
|
||||
private void checkNext(UnicodeMap<String> map1, Map<Integer,String> map2, int limit) {
|
||||
logln("Comparing nextRange");
|
||||
Map localMap = new TreeMap();
|
||||
UnicodeMapIterator<String> mi = new UnicodeMapIterator<String>(map1);
|
||||
while (mi.nextRange()) {
|
||||
logln(Utility.hex(mi.codepoint) + ".." + Utility.hex(mi.codepointEnd) + " => " + mi.value);
|
||||
for (int i = mi.codepoint; i <= mi.codepointEnd; ++i) {
|
||||
//if (i >= limit) continue;
|
||||
localMap.put(i, mi.value);
|
||||
}
|
||||
}
|
||||
checkMap(map2, localMap);
|
||||
|
||||
logln("Comparing next");
|
||||
mi.reset();
|
||||
localMap = new TreeMap();
|
||||
// String lastValue = null;
|
||||
while (mi.next()) {
|
||||
// if (!UnicodeMap.areEqual(lastValue, mi.value)) {
|
||||
// // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value);
|
||||
// lastValue = mi.value;
|
||||
// }
|
||||
//if (mi.codepoint >= limit) continue;
|
||||
localMap.put(mi.codepoint, mi.value);
|
||||
}
|
||||
checkMap(map2, localMap);
|
||||
}
|
||||
|
||||
public void check(UnicodeMap<String> map1, Map<Integer,String> map2, int counter) {
|
||||
for (int i = 0; i < LIMIT; ++i) {
|
||||
String value1 = map1.getValue(i);
|
||||
String value2 = map2.get(i);
|
||||
if (!UnicodeMap.areEqual(value1, value2)) {
|
||||
errln(counter + " Difference at " + Utility.hex(i)
|
||||
+ "\t UnicodeMap: " + value1
|
||||
+ "\t HashMap: " + value2);
|
||||
errln("UnicodeMap: " + map1);
|
||||
errln("Log: " + TestBoilerplate.show(log));
|
||||
errln("HashMap: " + TestBoilerplate.show(map2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkMap(Map m1, Map m2) {
|
||||
if (m1.equals(m2)) return;
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
Set m1entries = m1.entrySet();
|
||||
Set m2entries = m2.entrySet();
|
||||
getEntries("\r\nIn First, and not Second", m1entries, m2entries, buffer, 20);
|
||||
getEntries("\r\nIn Second, and not First", m2entries, m1entries, buffer, 20);
|
||||
errln(buffer.toString());
|
||||
}
|
||||
|
||||
static Comparator<Map.Entry<Integer, String>> ENTRY_COMPARATOR = new Comparator<Map.Entry<Integer, String>>() {
|
||||
public int compare(Map.Entry<Integer, String> o1, Map.Entry<Integer, String> o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return -1;
|
||||
if (o2 == null) return 1;
|
||||
Map.Entry<Integer, String> a = o1;
|
||||
Map.Entry<Integer, String> b = o2;
|
||||
int result = compare2(a.getKey(), b.getKey());
|
||||
if (result != 0) return result;
|
||||
return compare2(a.getValue(), b.getValue());
|
||||
}
|
||||
private <T extends Comparable> int compare2(T o1, T o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return -1;
|
||||
if (o2 == null) return 1;
|
||||
return o1.compareTo(o2);
|
||||
}
|
||||
};
|
||||
|
||||
private void getEntries(String title, Set<Map.Entry<Integer,String>> m1entries, Set<Map.Entry<Integer, String>> m2entries, StringBuilder buffer, int limit) {
|
||||
Set<Map.Entry<Integer, String>> m1_m2 = new TreeSet<Map.Entry<Integer, String>>(ENTRY_COMPARATOR);
|
||||
m1_m2.addAll(m1entries);
|
||||
m1_m2.removeAll(m2entries);
|
||||
buffer.append(title + ": " + m1_m2.size() + "\r\n");
|
||||
for (Entry<Integer, String> entry : m1_m2) {
|
||||
if (limit-- < 0) return;
|
||||
buffer.append(entry.getKey()).append(" => ")
|
||||
.append(entry.getValue()).append("\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
static class UnicodeMapBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 30) return false;
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
int start = random.nextInt(25);
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
result.put(start, value);
|
||||
}
|
||||
list.add(result);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class StringBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 31) return false;
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result.append((char)random.nextInt(0xFF));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class UnicodeSetBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 32) return false;
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
result.add(random.nextInt(100));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,248 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
// TODO integrate this into the test framework
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.text.Collator;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.util.BagFormatter;
|
||||
import com.ibm.icu.dev.util.FileUtilities;
|
||||
import com.ibm.icu.dev.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.dev.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.util.UnicodeProperty;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
// TODO change to use test framework
|
||||
public class TestBagFormatter {
|
||||
|
||||
static final void generatePropertyAliases(boolean showValues) {
|
||||
generatePropertyAliases(showValues, ICUPropertyFactory.make());
|
||||
}
|
||||
|
||||
static final void generatePropertyAliases(boolean showValues, UnicodeProperty.Factory ups) {
|
||||
Collator order = Collator.getInstance(Locale.ENGLISH);
|
||||
TreeSet props = new TreeSet(order);
|
||||
TreeSet values = new TreeSet(order);
|
||||
BagFormatter bf = new BagFormatter();
|
||||
props.addAll(ups.getAvailableNames());
|
||||
for (int i = UnicodeProperty.BINARY; i < UnicodeProperty.LIMIT_TYPE; ++i) {
|
||||
System.out.println(UnicodeProperty.getTypeName(i));
|
||||
Iterator it = props.iterator();
|
||||
while (it.hasNext()) {
|
||||
String propAlias = (String)it.next();
|
||||
UnicodeProperty up = ups.getProperty(propAlias);
|
||||
int type = up.getType();
|
||||
if (type != i) continue;
|
||||
System.out.println();
|
||||
System.out.println(propAlias + "\t" + bf.join(up.getNameAliases()));
|
||||
if (!showValues) continue;
|
||||
values.clear();
|
||||
if (type == UnicodeProperty.NUMERIC || type == UnicodeProperty.EXTENDED_NUMERIC) {
|
||||
UnicodeMap um = new UnicodeMap();
|
||||
um.putAll(up.getUnicodeMap());
|
||||
System.out.println(um.toString(new NumberComparator()));
|
||||
continue;
|
||||
}
|
||||
values.clear();
|
||||
values.addAll(up.getAvailableValues());
|
||||
Iterator it2 = values.iterator();
|
||||
while (it2.hasNext()) {
|
||||
String valueAlias = (String)it2.next();
|
||||
System.out.println("\t" + bf.join(valueAlias + "\t" + up.getValueAliases(valueAlias)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class NumberComparator implements Comparator {
|
||||
public int compare(Object o1, Object o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return 1;
|
||||
if (o2 == null) return -1;
|
||||
double n1 = Double.parseDouble((String)o1);
|
||||
double n2 = Double.parseDouble((String)o2);
|
||||
return n1 < n2 ? -1 : n1 > n2 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("Start");
|
||||
try {
|
||||
//readCharacters();
|
||||
UnicodeProperty prop = ICUPropertyFactory.make().getProperty("Canonicalcombiningclass");
|
||||
prop.getAvailableValues();
|
||||
|
||||
generatePropertyAliases(true);
|
||||
|
||||
BagFormatter bf = new BagFormatter();
|
||||
|
||||
UnicodeSet us = new UnicodeSet("[:gc=nd:]");
|
||||
FileUtilities.CONSOLE.println("[:gc=nd:]");
|
||||
bf.showSetNames(FileUtilities.CONSOLE,us);
|
||||
|
||||
us = new UnicodeSet("[:numeric_value=2:]");
|
||||
FileUtilities.CONSOLE.println("[:numeric_value=2:]");
|
||||
bf.showSetNames(FileUtilities.CONSOLE,us);
|
||||
|
||||
us = new UnicodeSet("[:numeric_type=numeric:]");
|
||||
FileUtilities.CONSOLE.println("[:numeric_type=numeric:]");
|
||||
bf.showSetNames(FileUtilities.CONSOLE,us);
|
||||
|
||||
UnicodeProperty.Factory ups = ICUPropertyFactory.make();
|
||||
us = ups.getSet("gc=mn", null, null);
|
||||
FileUtilities.CONSOLE.println("gc=mn");
|
||||
bf.showSetNames(FileUtilities.CONSOLE, us);
|
||||
|
||||
if (true) return;
|
||||
//showNames("Name", ".*MARK.*");
|
||||
//showNames("NFD", "a.+");
|
||||
//showNames("NFD", false);
|
||||
//showNames("Lowercase_Mapping", false);
|
||||
//TestUnicodePropertySource.test(true);
|
||||
//showNames(".*\\ \\-.*");
|
||||
|
||||
|
||||
//checkHTML();
|
||||
//testIsRTL();
|
||||
|
||||
//TestTokenizer.test();
|
||||
//RandomCollator.generate("collationTest.txt", null);
|
||||
|
||||
//TestPick.test();
|
||||
//printRandoms();
|
||||
//if (true) return;
|
||||
//testLocales();
|
||||
//if (true) return;
|
||||
/*
|
||||
TestCollator tc = new TestCollator();
|
||||
tc.test(RuleBasedCollator.getInstance(),1000);
|
||||
*/
|
||||
/*
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
sb.setLength(0);
|
||||
rc.nextRule(sb);
|
||||
System.out.println(sb);
|
||||
}
|
||||
*/
|
||||
} finally {
|
||||
System.out.println("End");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void testLocales() throws IOException {
|
||||
Locale[] locales = Collator.getAvailableLocales();
|
||||
Set s = new TreeSet(Collator.getInstance());
|
||||
for (int i = 0; i < locales.length; ++i) {
|
||||
String lang = locales[i].getLanguage();
|
||||
String dlang = locales[i].getDisplayLanguage();
|
||||
String country = locales[i].getCountry();
|
||||
String dcountry = locales[i].getDisplayCountry();
|
||||
if (country.equals("")) continue;
|
||||
s.add(""
|
||||
+ "\t" + dcountry
|
||||
+ "\t" + country
|
||||
+ "\t" + dlang
|
||||
+ "\t" + lang
|
||||
);
|
||||
}
|
||||
//CollectionFormatter cf = new CollectionFormatter();
|
||||
PrintWriter pw = FileUtilities.openUTF8Writer("", "countries.txt");
|
||||
Iterator it = s.iterator();
|
||||
while (it.hasNext()) {
|
||||
pw.println(it.next());
|
||||
}
|
||||
pw.close();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Use the number of significant digits to round get a rounding value.
|
||||
*/
|
||||
/* static final double LOG10 = Math.log(10);
|
||||
public static void useSignificantDigits(double value, int digits) {
|
||||
double log10 = Math.log(value)/LOG10; // log[e]
|
||||
|
||||
}*/
|
||||
|
||||
static final UnicodeSet RTL = new UnicodeSet("[[:L:]&[[:bidi class=R:][:bidi class=AL:]]]");
|
||||
|
||||
static boolean isRTL(Locale loc) {
|
||||
// in 2.8 we can use the exemplar characters, but for 2.6 we have to work around it
|
||||
int[] scripts = UScript.getCode(loc);
|
||||
return new UnicodeSet()
|
||||
.applyIntPropertyValue(UProperty.SCRIPT, scripts == null ? UScript.LATIN : scripts[0])
|
||||
.retainAll(RTL).size() != 0;
|
||||
}
|
||||
|
||||
static void testIsRTL() {
|
||||
Locale[] locales = Locale.getAvailableLocales();
|
||||
Set s = new TreeSet();
|
||||
for (int i = 0; i < locales.length; ++i) {
|
||||
s.add((isRTL(locales[i]) ? "R " : "L ") + locales[i].getDisplayName());
|
||||
}
|
||||
Iterator it = s.iterator();
|
||||
while (it.hasNext()) {
|
||||
System.out.println(it.next());
|
||||
}
|
||||
}
|
||||
|
||||
static final Transliterator toHTML = Transliterator.createFromRules(
|
||||
"any-html",
|
||||
"'<' > '<' ;" +
|
||||
"'&' > '&' ;" +
|
||||
"'>' > '>' ;" +
|
||||
"'\"' > '"' ; ",
|
||||
Transliterator.FORWARD);
|
||||
static final Transliterator fromHTML = Transliterator.createFromRules(
|
||||
"html-any",
|
||||
"'<' < '&'[lL][Tt]';' ;" +
|
||||
"'&' < '&'[aA][mM][pP]';' ;" +
|
||||
"'>' < '&'[gG][tT]';' ;" +
|
||||
"'\"' < '&'[qQ][uU][oO][tT]';' ; ",
|
||||
Transliterator.REVERSE);
|
||||
|
||||
static void checkHTML() {
|
||||
String foo = "& n < b < \"ab\"";
|
||||
String fii = toHTML.transliterate(foo);
|
||||
System.out.println("in: " + foo);
|
||||
System.out.println("out: " + fii);
|
||||
System.out.println("in*: " + fromHTML.transliterate(fii));
|
||||
System.out.println("IN*: " + fromHTML.transliterate(fii.toUpperCase()));
|
||||
}
|
||||
/*
|
||||
static void showNames(String propAlias, boolean matches) {
|
||||
BagFormatter bf = new BagFormatter();
|
||||
UnicodeSet stuff;
|
||||
stuff = new UnicodePropertySource.ICU()
|
||||
.setPropertyAlias(propAlias)
|
||||
.getPropertySet(matches, null);
|
||||
System.out.println(bf.showSetNames(propAlias + " with " + matches, stuff));
|
||||
}
|
||||
|
||||
static void showNames(String propAlias, String pattern) {
|
||||
BagFormatter bf = new BagFormatter();
|
||||
UnicodeSet stuff;
|
||||
stuff = new UnicodePropertySource.ICU()
|
||||
.setPropertyAlias(propAlias)
|
||||
.getPropertySet(Pattern.compile(pattern).matcher(""), null);
|
||||
System.out.println(bf.showSetNames(propAlias + "with " + pattern, stuff));
|
||||
}
|
||||
*/
|
||||
}
|
@ -6,176 +6,19 @@
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.TestBoilerplate;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.util.CollectionUtilities;
|
||||
import com.ibm.icu.dev.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.dev.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.util.UnicodeMapIterator;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestUtilities extends TestFmwk {
|
||||
static final int LIMIT = 0x15; // limit to make testing more realistic in terms of collisions
|
||||
static final int ITERATIONS = 1000000;
|
||||
static final boolean SHOW_PROGRESS = false;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestUtilities().run(args);
|
||||
}
|
||||
|
||||
SortedSet<String> log = new TreeSet<String>();
|
||||
static String[] TEST_VALUES = {"A", "B", "C", "D", "E", "F"};
|
||||
static Random random = new Random(12345);
|
||||
|
||||
public void TestUnicodeMapRandom() {
|
||||
// do random change to both, then compare
|
||||
random.setSeed(12345); // reproducable results
|
||||
logln("Comparing against HashMap");
|
||||
UnicodeMap<String> map1 = new UnicodeMap();
|
||||
Map<Integer, String> map2 = new HashMap<Integer, String>();
|
||||
for (int counter = 0; counter < ITERATIONS; ++counter) {
|
||||
int start = random.nextInt(LIMIT);
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
String logline = Utility.hex(start) + "\t" + value;
|
||||
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
|
||||
log.add(logline);
|
||||
if (DEBUG && counter == 144) {
|
||||
System.out.println(" debug");
|
||||
}
|
||||
map1.put(start, value);
|
||||
map2.put(start, value);
|
||||
check(map1, map2, counter);
|
||||
}
|
||||
checkNext(map1, map2, LIMIT);
|
||||
}
|
||||
|
||||
public void TestUnicodeMapGeneralCategory() {
|
||||
logln("Setting General Category");
|
||||
UnicodeMap<String> map1 = new UnicodeMap();
|
||||
Map<Integer, String> map2 = new HashMap<Integer, String>();
|
||||
//Map<Integer, String> map3 = new TreeMap<Integer, String>();
|
||||
map1 = new UnicodeMap<String>();
|
||||
map2 = new TreeMap<Integer,String>();
|
||||
for (int cp = 0; cp <= SET_LIMIT; ++cp) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
//if (enumValue <= 0) continue; // for smaller set
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
map1.put(cp, value);
|
||||
map2.put(cp, value);
|
||||
}
|
||||
checkNext(map1, map2, Integer.MAX_VALUE);
|
||||
|
||||
logln("Comparing General Category");
|
||||
check(map1, map2, -1);
|
||||
logln("Comparing Values");
|
||||
Set<String> values1 = map1.getAvailableValues(new TreeSet<String>());
|
||||
Set<String> values2 = new TreeSet<String>(map2.values());
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, values1, values2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
logln("Comparing Sets");
|
||||
for (Iterator<String> it = values1.iterator(); it.hasNext();) {
|
||||
String value = it.next();
|
||||
logln(value == null ? "null" : value);
|
||||
UnicodeSet set1 = map1.keySet(value);
|
||||
UnicodeSet set2 = TestBoilerplate.getSet(map2, value);
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, set1, set2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final UnicodeMap<String> SCRIPTS = ICUPropertyFactory.make().getProperty("script").getUnicodeMap_internal();
|
||||
static final UnicodeMap<String> GC = ICUPropertyFactory.make().getProperty("general_category").getUnicodeMap_internal();
|
||||
|
||||
public void TestUnicodeMapCompose() {
|
||||
logln("Getting Scripts");
|
||||
|
||||
UnicodeMap.Composer<String> composer = new UnicodeMap.Composer<String>() {
|
||||
@Override
|
||||
public String compose(int codepoint, String string, String a, String b) {
|
||||
return a.toString() + "_" + b.toString();
|
||||
}
|
||||
};
|
||||
|
||||
logln("Trying Compose");
|
||||
|
||||
// Map<Integer, String> map2 = new HashMap<Integer, String>();
|
||||
// Map<Integer, String> map3 = new TreeMap<Integer, String>();
|
||||
UnicodeMap<String> composed = ((UnicodeMap)SCRIPTS.cloneAsThawed()).composeWith(GC, composer);
|
||||
String last = "";
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
// if (i == 888) {
|
||||
// int debug = 0;
|
||||
// }
|
||||
String comp = composed.getValue(i);
|
||||
String gc = GC.getValue(i);
|
||||
String sc = SCRIPTS.getValue(i);
|
||||
if (!comp.equals(composer.compose(i, null, sc, gc))) {
|
||||
errln("Failed compose at: " + i);
|
||||
break;
|
||||
}
|
||||
if (!last.equals(comp)) {
|
||||
logln(Utility.hex(i) + "\t" + comp);
|
||||
last = comp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBoilerplate() {
|
||||
// check boilerplate
|
||||
List argList = new ArrayList();
|
||||
argList.add("TestMain");
|
||||
if (params.verbose) argList.add("-verbose");
|
||||
String[] args = new String[argList.size()];
|
||||
argList.toArray(args);
|
||||
new UnicodeMapBoilerplate().run(args);
|
||||
// TODO: the following is not being reached
|
||||
new UnicodeSetBoilerplate().run(args);
|
||||
}
|
||||
|
||||
public void TestAUnicodeMap2() {
|
||||
UnicodeMap foo = new UnicodeMap();
|
||||
@SuppressWarnings("unused")
|
||||
int hash = foo.hashCode(); // make sure doesn't NPE
|
||||
@SuppressWarnings("unused")
|
||||
Set fii = foo.stringKeys(); // make sure doesn't NPE
|
||||
}
|
||||
|
||||
public void TestAUnicodeMapInverse() {
|
||||
UnicodeMap<Character> foo1 = new UnicodeMap<Character>()
|
||||
.putAll('a', 'z', 'b')
|
||||
.put("ab", 'c')
|
||||
.put('x', 'b')
|
||||
.put("xy", 'c')
|
||||
;
|
||||
Map<Character, UnicodeSet> target = new HashMap<Character, UnicodeSet>();
|
||||
foo1.addInverseTo(target);
|
||||
UnicodeMap<Character> reverse = new UnicodeMap().putAllInverse(target);
|
||||
assertEquals("", foo1, reverse);
|
||||
}
|
||||
|
||||
public void TestCollectionUtilitySpeed() {
|
||||
TreeSet ts1 = new TreeSet();
|
||||
TreeSet ts2 = new TreeSet();
|
||||
@ -201,7 +44,7 @@ public class TestUtilities extends TestFmwk {
|
||||
private void timeAndCompare(TreeSet ts1, TreeSet ts2, int iterations, boolean expected, double factorOfStandard) {
|
||||
double utilityTimeSorted = timeUtilityContainsAll(iterations, ts1, ts2, expected)/(double)iterations;
|
||||
double standardTimeSorted = timeStandardContainsAll(iterations, ts1, ts2, expected)/(double)iterations;
|
||||
|
||||
|
||||
if (utilityTimeSorted < standardTimeSorted*factorOfStandard) {
|
||||
logln("Sorted: Utility time (" + utilityTimeSorted + ") << Standard duration (" + standardTimeSorted + "); " + 100*(utilityTimeSorted/standardTimeSorted) + "%");
|
||||
} else {
|
||||
@ -245,7 +88,7 @@ public class TestUtilities extends TestFmwk {
|
||||
}
|
||||
return utilityTime;
|
||||
}
|
||||
|
||||
|
||||
public void TestCollectionUtilities() {
|
||||
String[][] test = {{"a", "c", "e", "g", "h", "z"}, {"b", "d", "f", "h", "w"}, { "a", "b" }, { "a", "d" }, {"d"}, {}}; //
|
||||
int resultMask = 0;
|
||||
@ -312,252 +155,4 @@ public class TestUtilities extends TestFmwk {
|
||||
errln("Fails relation: " + a + " \t" + RelationName[relation] + " \t" + b);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkNext(UnicodeMap<String> map1, Map<Integer,String> map2, int limit) {
|
||||
logln("Comparing nextRange");
|
||||
Map localMap = new TreeMap();
|
||||
UnicodeMapIterator<String> mi = new UnicodeMapIterator<String>(map1);
|
||||
while (mi.nextRange()) {
|
||||
logln(Utility.hex(mi.codepoint) + ".." + Utility.hex(mi.codepointEnd) + " => " + mi.value);
|
||||
for (int i = mi.codepoint; i <= mi.codepointEnd; ++i) {
|
||||
//if (i >= limit) continue;
|
||||
localMap.put(i, mi.value);
|
||||
}
|
||||
}
|
||||
checkMap(map2, localMap);
|
||||
|
||||
logln("Comparing next");
|
||||
mi.reset();
|
||||
localMap = new TreeMap();
|
||||
// String lastValue = null;
|
||||
while (mi.next()) {
|
||||
// if (!UnicodeMap.areEqual(lastValue, mi.value)) {
|
||||
// // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value);
|
||||
// lastValue = mi.value;
|
||||
// }
|
||||
//if (mi.codepoint >= limit) continue;
|
||||
localMap.put(mi.codepoint, mi.value);
|
||||
}
|
||||
checkMap(map2, localMap);
|
||||
}
|
||||
|
||||
public void check(UnicodeMap<String> map1, Map<Integer,String> map2, int counter) {
|
||||
for (int i = 0; i < LIMIT; ++i) {
|
||||
String value1 = map1.getValue(i);
|
||||
String value2 = map2.get(i);
|
||||
if (!UnicodeMap.areEqual(value1, value2)) {
|
||||
errln(counter + " Difference at " + Utility.hex(i)
|
||||
+ "\t UnicodeMap: " + value1
|
||||
+ "\t HashMap: " + value2);
|
||||
errln("UnicodeMap: " + map1);
|
||||
errln("Log: " + TestBoilerplate.show(log));
|
||||
errln("HashMap: " + TestBoilerplate.show(map2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkMap(Map m1, Map m2) {
|
||||
if (m1.equals(m2)) return;
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
Set m1entries = m1.entrySet();
|
||||
Set m2entries = m2.entrySet();
|
||||
getEntries("\r\nIn First, and not Second", m1entries, m2entries, buffer, 20);
|
||||
getEntries("\r\nIn Second, and not First", m2entries, m1entries, buffer, 20);
|
||||
errln(buffer.toString());
|
||||
}
|
||||
|
||||
static Comparator<Map.Entry<Integer, String>> ENTRY_COMPARATOR = new Comparator<Map.Entry<Integer, String>>() {
|
||||
public int compare(Map.Entry<Integer, String> o1, Map.Entry<Integer, String> o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return -1;
|
||||
if (o2 == null) return 1;
|
||||
Map.Entry<Integer, String> a = o1;
|
||||
Map.Entry<Integer, String> b = o2;
|
||||
int result = compare2(a.getKey(), b.getKey());
|
||||
if (result != 0) return result;
|
||||
return compare2(a.getValue(), b.getValue());
|
||||
}
|
||||
private <T extends Comparable> int compare2(T o1, T o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return -1;
|
||||
if (o2 == null) return 1;
|
||||
return o1.compareTo(o2);
|
||||
}
|
||||
};
|
||||
|
||||
private void getEntries(String title, Set<Map.Entry<Integer,String>> m1entries, Set<Map.Entry<Integer, String>> m2entries, StringBuilder buffer, int limit) {
|
||||
Set<Map.Entry<Integer, String>> m1_m2 = new TreeSet<Map.Entry<Integer, String>>(ENTRY_COMPARATOR);
|
||||
m1_m2.addAll(m1entries);
|
||||
m1_m2.removeAll(m2entries);
|
||||
buffer.append(title + ": " + m1_m2.size() + "\r\n");
|
||||
for (Entry<Integer, String> entry : m1_m2) {
|
||||
if (limit-- < 0) return;
|
||||
buffer.append(entry.getKey()).append(" => ")
|
||||
.append(entry.getValue()).append("\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
static final int SET_LIMIT = 0x10FFFF;
|
||||
static final int CHECK_LIMIT = 0xFFFF;
|
||||
static final NumberFormat pf = NumberFormat.getPercentInstance();
|
||||
static final NumberFormat nf = NumberFormat.getInstance();
|
||||
|
||||
public void TestTime() {
|
||||
boolean shortTest = getInclusion() < 10;
|
||||
double hashTime, umTime, icuTime, treeTime;
|
||||
int warmup = shortTest ? 1 : 20;
|
||||
umTime = checkSetTime(warmup, 0);
|
||||
hashTime = checkSetTime(warmup, 1);
|
||||
logln("Percentage: " + pf.format(hashTime/umTime));
|
||||
treeTime = checkSetTime(warmup, 3);
|
||||
logln("Percentage: " + pf.format(treeTime/umTime));
|
||||
//logln(map1.toString());
|
||||
|
||||
if (shortTest) {
|
||||
return;
|
||||
}
|
||||
|
||||
umTime = checkGetTime(1000, 0);
|
||||
hashTime = checkGetTime(1000, 1);
|
||||
logln("Percentage: " + pf.format(hashTime/umTime));
|
||||
icuTime = checkGetTime(1000, 2);
|
||||
logln("Percentage: " + pf.format(icuTime/umTime));
|
||||
treeTime = checkGetTime(1000, 3);
|
||||
logln("Percentage: " + pf.format(treeTime/umTime));
|
||||
}
|
||||
|
||||
int propEnum = UProperty.GENERAL_CATEGORY;
|
||||
|
||||
double checkSetTime(int iterations, int type) {
|
||||
_checkSetTime(1,type);
|
||||
double result = _checkSetTime(iterations, type);
|
||||
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
double _checkSetTime(int iterations, int type) {
|
||||
UnicodeMap<String> map1 = SCRIPTS;
|
||||
Map<Integer,String> map2 = map1.putAllCodepointsInto(new HashMap<Integer,String>());
|
||||
Map<Integer, String> map3 = new TreeMap<Integer, String>(map2);
|
||||
System.gc();
|
||||
double start = System.currentTimeMillis();
|
||||
for (int j = 0; j < iterations; ++j)
|
||||
for (int cp = 0; cp <= SET_LIMIT; ++cp) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
if (enumValue <= 0) continue; // for smaller set
|
||||
String value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
switch(type) {
|
||||
case 0: map1.put(cp, value); break;
|
||||
case 1: map2.put(cp, value); break;
|
||||
case 3: map3.put(cp, value); break;
|
||||
}
|
||||
}
|
||||
double end = System.currentTimeMillis();
|
||||
return (end-start)/1000/iterations;
|
||||
}
|
||||
|
||||
double checkGetTime(int iterations, int type) {
|
||||
UnicodeMap<String> map1 = new UnicodeMap<String>();
|
||||
Map<Integer,String> map2 = map1.putAllCodepointsInto(new HashMap<Integer,String>());
|
||||
Map<Integer, String> map3 = new TreeMap<Integer, String>();
|
||||
_checkGetTime(map1, map2, map3, 1,type); // warmup
|
||||
double result = _checkGetTime(map1, map2, map3, iterations, type);
|
||||
logln((type == 0 ? "UnicodeMap" : type == 1 ? "HashMap" : type == 2 ? "ICU" : "TreeMap") + "\t" + nf.format(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
double _checkGetTime(UnicodeMap<String> map1, Map<Integer,String> map2, Map<Integer,String> map3, int iterations, int type) {
|
||||
System.gc();
|
||||
double start = System.currentTimeMillis();
|
||||
for (int j = 0; j < iterations; ++j)
|
||||
for (int cp = 0; cp < CHECK_LIMIT; ++cp) {
|
||||
switch (type) {
|
||||
case 0: map1.getValue(cp); break;
|
||||
case 1: map2.get(cp); break;
|
||||
case 2:
|
||||
int enumValue = UCharacter.getIntPropertyValue(cp, propEnum);
|
||||
//if (enumValue <= 0) continue;
|
||||
UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
break;
|
||||
case 3: map3.get(cp); break;
|
||||
}
|
||||
}
|
||||
double end = System.currentTimeMillis();
|
||||
return (end-start)/1000/iterations;
|
||||
}
|
||||
|
||||
static class UnicodeMapBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 30) return false;
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
int start = random.nextInt(25);
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
result.put(start, value);
|
||||
}
|
||||
list.add(result);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class StringBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 31) return false;
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result.append((char)random.nextInt(0xFF));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class UnicodeSetBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 32) return false;
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
result.add(random.nextInt(100));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,563 +0,0 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
// copied from the Transliterator demo
|
||||
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* Incrementally returns the set of all strings that case-fold to the same value.
|
||||
*/
|
||||
public class CaseIterator {
|
||||
|
||||
// testing stuff
|
||||
private static Transliterator toName = Transliterator.getInstance("[:^ascii:] Any-Name");
|
||||
private static Transliterator toHex = Transliterator.getInstance("[:^ascii:] Any-Hex");
|
||||
private static Transliterator toHex2 = Transliterator.getInstance("[[^\u0021-\u007F]-[,]] Any-Hex");
|
||||
|
||||
// global tables (could be precompiled)
|
||||
private static Map fromCaseFold = new HashMap();
|
||||
private static Map toCaseFold = new HashMap();
|
||||
private static int maxLength = 0;
|
||||
|
||||
// This exception list is generated on the console by turning on the GENERATED flag,
|
||||
// which MUST be false for normal operation.
|
||||
// Once the list is generated, it is pasted in here.
|
||||
// A bit of a cludge, but this bootstrapping is the easiest way
|
||||
// to get around certain complications in the data.
|
||||
|
||||
private static final boolean GENERATE = false;
|
||||
|
||||
private static final boolean DUMP = false;
|
||||
|
||||
private static String[][] exceptionList = {
|
||||
// a\N{MODIFIER LETTER RIGHT HALF RING}
|
||||
{"a\u02BE","A\u02BE","a\u02BE",},
|
||||
// ff
|
||||
{"ff","FF","Ff","fF","ff",},
|
||||
// ffi
|
||||
{"ffi","FFI","FFi","FfI","Ffi","F\uFB01","fFI","fFi","ffI","ffi","f\uFB01","\uFB00I","\uFB00i",},
|
||||
// ffl
|
||||
{"ffl","FFL","FFl","FfL","Ffl","F\uFB02","fFL","fFl","ffL","ffl","f\uFB02","\uFB00L","\uFB00l",},
|
||||
// fi
|
||||
{"fi","FI","Fi","fI","fi",},
|
||||
// fl
|
||||
{"fl","FL","Fl","fL","fl",},
|
||||
// h\N{COMBINING MACRON BELOW}
|
||||
{"h\u0331","H\u0331","h\u0331",},
|
||||
// i\N{COMBINING DOT ABOVE}
|
||||
{"i\u0307","I\u0307","i\u0307",},
|
||||
// j\N{COMBINING CARON}
|
||||
{"j\u030C","J\u030C","j\u030C",},
|
||||
// ss
|
||||
{"ss","SS","Ss","S\u017F","sS","ss","s\u017F","\u017FS","\u017Fs","\u017F\u017F",},
|
||||
// st
|
||||
{"st","ST","St","sT","st","\u017FT","\u017Ft",},
|
||||
// t\N{COMBINING DIAERESIS}
|
||||
{"t\u0308","T\u0308","t\u0308",},
|
||||
// w\N{COMBINING RING ABOVE}
|
||||
{"w\u030A","W\u030A","w\u030A",},
|
||||
// y\N{COMBINING RING ABOVE}
|
||||
{"y\u030A","Y\u030A","y\u030A",},
|
||||
// \N{MODIFIER LETTER APOSTROPHE}n
|
||||
{"\u02BCn","\u02BCN","\u02BCn",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03AC\u03B9","\u0386\u0345","\u0386\u0399","\u0386\u03B9","\u0386\u1FBE","\u03AC\u0345","\u03AC\u0399","\u03AC\u03B9","\u03AC\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03AE\u03B9","\u0389\u0345","\u0389\u0399","\u0389\u03B9","\u0389\u1FBE","\u03AE\u0345","\u03AE\u0399","\u03AE\u03B9","\u03AE\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03B1\u0342","\u0391\u0342","\u03B1\u0342",},
|
||||
// \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03B1\u0342\u03B9","\u0391\u0342\u0345","\u0391\u0342\u0399","\u0391\u0342\u03B9","\u0391\u0342\u1FBE",
|
||||
"\u03B1\u0342\u0345","\u03B1\u0342\u0399","\u03B1\u0342\u03B9","\u03B1\u0342\u1FBE","\u1FB6\u0345",
|
||||
"\u1FB6\u0399","\u1FB6\u03B9","\u1FB6\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03B1\u03B9","\u0391\u0345","\u0391\u0399","\u0391\u03B9","\u0391\u1FBE","\u03B1\u0345","\u03B1\u0399","\u03B1\u03B9","\u03B1\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03B7\u0342","\u0397\u0342","\u03B7\u0342",},
|
||||
// \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03B7\u0342\u03B9","\u0397\u0342\u0345","\u0397\u0342\u0399","\u0397\u0342\u03B9","\u0397\u0342\u1FBE",
|
||||
"\u03B7\u0342\u0345","\u03B7\u0342\u0399","\u03B7\u0342\u03B9","\u03B7\u0342\u1FBE","\u1FC6\u0345","\u1FC6\u0399",
|
||||
"\u1FC6\u03B9","\u1FC6\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03B7\u03B9","\u0397\u0345","\u0397\u0399","\u0397\u03B9","\u0397\u1FBE","\u03B7\u0345","\u03B7\u0399","\u03B7\u03B9","\u03B7\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
|
||||
{"\u03B9\u0308\u0300","\u0345\u0308\u0300","\u0399\u0308\u0300","\u03B9\u0308\u0300","\u1FBE\u0308\u0300",},
|
||||
// \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
|
||||
{"\u03B9\u0308\u0301","\u0345\u0308\u0301","\u0399\u0308\u0301","\u03B9\u0308\u0301","\u1FBE\u0308\u0301",},
|
||||
// \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03B9\u0308\u0342","\u0345\u0308\u0342","\u0399\u0308\u0342","\u03B9\u0308\u0342","\u1FBE\u0308\u0342",},
|
||||
// \N{GREEK SMALL LETTER IOTA}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03B9\u0342","\u0345\u0342","\u0399\u0342","\u03B9\u0342","\u1FBE\u0342",},
|
||||
// \N{GREEK SMALL LETTER RHO}\N{COMBINING COMMA ABOVE}
|
||||
{"\u03C1\u0313","\u03A1\u0313","\u03C1\u0313","\u03F1\u0313",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
|
||||
{"\u03C5\u0308\u0300","\u03A5\u0308\u0300","\u03C5\u0308\u0300",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
|
||||
{"\u03C5\u0308\u0301","\u03A5\u0308\u0301","\u03C5\u0308\u0301",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03C5\u0308\u0342","\u03A5\u0308\u0342","\u03C5\u0308\u0342",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}
|
||||
{"\u03C5\u0313","\u03A5\u0313","\u03C5\u0313",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GRAVE ACCENT}
|
||||
{"\u03C5\u0313\u0300","\u03A5\u0313\u0300","\u03C5\u0313\u0300","\u1F50\u0300",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING ACUTE ACCENT}
|
||||
{"\u03C5\u0313\u0301","\u03A5\u0313\u0301","\u03C5\u0313\u0301","\u1F50\u0301",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03C5\u0313\u0342","\u03A5\u0313\u0342","\u03C5\u0313\u0342","\u1F50\u0342",},
|
||||
// \N{GREEK SMALL LETTER UPSILON}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03C5\u0342","\u03A5\u0342","\u03C5\u0342",},
|
||||
// \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}
|
||||
{"\u03C9\u0342","\u03A9\u0342","\u03C9\u0342","\u2126\u0342",},
|
||||
// \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03C9\u0342\u03B9","\u03A9\u0342\u0345","\u03A9\u0342\u0399","\u03A9\u0342\u03B9","\u03A9\u0342\u1FBE","\u03C9\u0342\u0345","\u03C9\u0342\u0399","\u03C9\u0342\u03B9","\u03C9\u0342\u1FBE","\u1FF6\u0345",
|
||||
"\u1FF6\u0399","\u1FF6\u03B9","\u1FF6\u1FBE","\u2126\u0342\u0345","\u2126\u0342\u0399","\u2126\u0342\u03B9","\u2126\u0342\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03C9\u03B9","\u03A9\u0345","\u03A9\u0399","\u03A9\u03B9","\u03A9\u1FBE","\u03C9\u0345","\u03C9\u0399","\u03C9\u03B9","\u03C9\u1FBE","\u2126\u0345","\u2126\u0399","\u2126\u03B9","\u2126\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u03CE\u03B9","\u038F\u0345","\u038F\u0399","\u038F\u03B9","\u038F\u1FBE","\u03CE\u0345","\u03CE\u0399","\u03CE\u03B9","\u03CE\u1FBE",},
|
||||
// \N{ARMENIAN SMALL LETTER ECH}\N{ARMENIAN SMALL LETTER YIWN}
|
||||
{"\u0565\u0582","\u0535\u0552","\u0535\u0582","\u0565\u0552","\u0565\u0582",},
|
||||
// \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER ECH}
|
||||
{"\u0574\u0565","\u0544\u0535","\u0544\u0565","\u0574\u0535","\u0574\u0565",},
|
||||
// \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER INI}
|
||||
{"\u0574\u056B","\u0544\u053B","\u0544\u056B","\u0574\u053B","\u0574\u056B",},
|
||||
// \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER XEH}
|
||||
{"\u0574\u056D","\u0544\u053D","\u0544\u056D","\u0574\u053D","\u0574\u056D",},
|
||||
// \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER NOW}
|
||||
{"\u0574\u0576","\u0544\u0546","\u0544\u0576","\u0574\u0546","\u0574\u0576",},
|
||||
// \N{ARMENIAN SMALL LETTER VEW}\N{ARMENIAN SMALL LETTER NOW}
|
||||
{"\u057E\u0576","\u054E\u0546","\u054E\u0576","\u057E\u0546","\u057E\u0576",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F00\u03B9","\u1F00\u0345","\u1F00\u0399","\u1F00\u03B9","\u1F00\u1FBE","\u1F08\u0345","\u1F08\u0399","\u1F08\u03B9","\u1F08\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F01\u03B9","\u1F01\u0345","\u1F01\u0399","\u1F01\u03B9","\u1F01\u1FBE","\u1F09\u0345","\u1F09\u0399","\u1F09\u03B9","\u1F09\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F02\u03B9","\u1F02\u0345","\u1F02\u0399","\u1F02\u03B9","\u1F02\u1FBE","\u1F0A\u0345","\u1F0A\u0399","\u1F0A\u03B9","\u1F0A\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F03\u03B9","\u1F03\u0345","\u1F03\u0399","\u1F03\u03B9","\u1F03\u1FBE","\u1F0B\u0345","\u1F0B\u0399","\u1F0B\u03B9","\u1F0B\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F04\u03B9","\u1F04\u0345","\u1F04\u0399","\u1F04\u03B9","\u1F04\u1FBE","\u1F0C\u0345","\u1F0C\u0399","\u1F0C\u03B9","\u1F0C\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F05\u03B9","\u1F05\u0345","\u1F05\u0399","\u1F05\u03B9","\u1F05\u1FBE","\u1F0D\u0345","\u1F0D\u0399","\u1F0D\u03B9","\u1F0D\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F06\u03B9","\u1F06\u0345","\u1F06\u0399","\u1F06\u03B9","\u1F06\u1FBE","\u1F0E\u0345","\u1F0E\u0399","\u1F0E\u03B9","\u1F0E\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F07\u03B9","\u1F07\u0345","\u1F07\u0399","\u1F07\u03B9","\u1F07\u1FBE","\u1F0F\u0345","\u1F0F\u0399","\u1F0F\u03B9","\u1F0F\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F20\u03B9","\u1F20\u0345","\u1F20\u0399","\u1F20\u03B9","\u1F20\u1FBE","\u1F28\u0345","\u1F28\u0399","\u1F28\u03B9","\u1F28\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F21\u03B9","\u1F21\u0345","\u1F21\u0399","\u1F21\u03B9","\u1F21\u1FBE","\u1F29\u0345","\u1F29\u0399","\u1F29\u03B9","\u1F29\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F22\u03B9","\u1F22\u0345","\u1F22\u0399","\u1F22\u03B9","\u1F22\u1FBE","\u1F2A\u0345","\u1F2A\u0399","\u1F2A\u03B9","\u1F2A\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F23\u03B9","\u1F23\u0345","\u1F23\u0399","\u1F23\u03B9","\u1F23\u1FBE","\u1F2B\u0345","\u1F2B\u0399","\u1F2B\u03B9","\u1F2B\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F24\u03B9","\u1F24\u0345","\u1F24\u0399","\u1F24\u03B9","\u1F24\u1FBE","\u1F2C\u0345","\u1F2C\u0399","\u1F2C\u03B9","\u1F2C\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F25\u03B9","\u1F25\u0345","\u1F25\u0399","\u1F25\u03B9","\u1F25\u1FBE","\u1F2D\u0345","\u1F2D\u0399","\u1F2D\u03B9","\u1F2D\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F26\u03B9","\u1F26\u0345","\u1F26\u0399","\u1F26\u03B9","\u1F26\u1FBE","\u1F2E\u0345","\u1F2E\u0399","\u1F2E\u03B9","\u1F2E\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F27\u03B9","\u1F27\u0345","\u1F27\u0399","\u1F27\u03B9","\u1F27\u1FBE","\u1F2F\u0345","\u1F2F\u0399","\u1F2F\u03B9","\u1F2F\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F60\u03B9","\u1F60\u0345","\u1F60\u0399","\u1F60\u03B9","\u1F60\u1FBE","\u1F68\u0345","\u1F68\u0399","\u1F68\u03B9","\u1F68\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F61\u03B9","\u1F61\u0345","\u1F61\u0399","\u1F61\u03B9","\u1F61\u1FBE","\u1F69\u0345","\u1F69\u0399","\u1F69\u03B9","\u1F69\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F62\u03B9","\u1F62\u0345","\u1F62\u0399","\u1F62\u03B9","\u1F62\u1FBE","\u1F6A\u0345","\u1F6A\u0399","\u1F6A\u03B9","\u1F6A\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F63\u03B9","\u1F63\u0345","\u1F63\u0399","\u1F63\u03B9","\u1F63\u1FBE","\u1F6B\u0345","\u1F6B\u0399","\u1F6B\u03B9","\u1F6B\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F64\u03B9","\u1F64\u0345","\u1F64\u0399","\u1F64\u03B9","\u1F64\u1FBE","\u1F6C\u0345","\u1F6C\u0399","\u1F6C\u03B9","\u1F6C\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F65\u03B9","\u1F65\u0345","\u1F65\u0399","\u1F65\u03B9","\u1F65\u1FBE","\u1F6D\u0345","\u1F6D\u0399","\u1F6D\u03B9","\u1F6D\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F66\u03B9","\u1F66\u0345","\u1F66\u0399","\u1F66\u03B9","\u1F66\u1FBE","\u1F6E\u0345","\u1F6E\u0399","\u1F6E\u03B9","\u1F6E\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F67\u03B9","\u1F67\u0345","\u1F67\u0399","\u1F67\u03B9","\u1F67\u1FBE","\u1F6F\u0345","\u1F6F\u0399","\u1F6F\u03B9","\u1F6F\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ALPHA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F70\u03B9","\u1F70\u0345","\u1F70\u0399","\u1F70\u03B9","\u1F70\u1FBE","\u1FBA\u0345","\u1FBA\u0399","\u1FBA\u03B9","\u1FBA\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER ETA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F74\u03B9","\u1F74\u0345","\u1F74\u0399","\u1F74\u03B9","\u1F74\u1FBE","\u1FCA\u0345","\u1FCA\u0399","\u1FCA\u03B9","\u1FCA\u1FBE",},
|
||||
// \N{GREEK SMALL LETTER OMEGA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
|
||||
{"\u1F7C\u03B9","\u1F7C\u0345","\u1F7C\u0399","\u1F7C\u03B9","\u1F7C\u1FBE","\u1FFA\u0345","\u1FFA\u0399","\u1FFA\u03B9","\u1FFA\u1FBE",},
|
||||
};
|
||||
|
||||
// this initializes the data used to generated the case-equivalents
|
||||
|
||||
static {
|
||||
|
||||
// Gather up the exceptions in a form we can use
|
||||
|
||||
if (!GENERATE) {
|
||||
for (int i = 0; i < exceptionList.length; ++i) {
|
||||
String[] exception = exceptionList[i];
|
||||
Set s = new HashSet();
|
||||
// there has to be some method to do the following, but I can't find it in the collections
|
||||
for (int j = 0; j < exception.length; ++j) {
|
||||
s.add(exception[j]);
|
||||
}
|
||||
fromCaseFold.put(exception[0], s);
|
||||
}
|
||||
}
|
||||
|
||||
// walk through all the characters, and at every case fold result,
|
||||
// put a set of all the characters that map to that result
|
||||
|
||||
boolean defaultmapping = true; // false for turkish
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
int cat = UCharacter.getType(i);
|
||||
if (cat == Character.UNASSIGNED || cat == Character.PRIVATE_USE) continue;
|
||||
|
||||
String cp = UTF16.valueOf(i);
|
||||
String mapped = UCharacter.foldCase(cp, defaultmapping);
|
||||
if (mapped.equals(cp)) continue;
|
||||
|
||||
if (maxLength < mapped.length()) maxLength = mapped.length();
|
||||
|
||||
// at this point, have different case folding
|
||||
|
||||
Set s = (Set) fromCaseFold.get(mapped);
|
||||
if (s == null) {
|
||||
s = new HashSet();
|
||||
s.add(mapped); // add the case fold result itself
|
||||
fromCaseFold.put(mapped, s);
|
||||
}
|
||||
s.add(cp);
|
||||
toCaseFold.put(cp, mapped);
|
||||
toCaseFold.put(mapped, mapped); // add mapping to self
|
||||
}
|
||||
|
||||
// Emit the final data
|
||||
|
||||
if (DUMP) {
|
||||
System.out.println("maxLength = " + maxLength);
|
||||
|
||||
System.out.println("\nfromCaseFold:");
|
||||
Iterator it = fromCaseFold.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object key = it.next();
|
||||
System.out.print(" " + toHex2.transliterate((String)key) + ": ");
|
||||
Set s = (Set) fromCaseFold.get(key);
|
||||
Iterator it2 = s.iterator();
|
||||
boolean first = true;
|
||||
while (it2.hasNext()) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
System.out.print(", ");
|
||||
}
|
||||
System.out.print(toHex2.transliterate((String)it2.next()));
|
||||
}
|
||||
System.out.println("");
|
||||
}
|
||||
|
||||
System.out.println("\ntoCaseFold:");
|
||||
it = toCaseFold.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
String value = (String) toCaseFold.get(key);
|
||||
System.out.println(" " + toHex2.transliterate(key) + ": " + toHex2.transliterate(value));
|
||||
}
|
||||
}
|
||||
|
||||
// Now convert all those sets into linear arrays
|
||||
// We can't do this in place in Java, so make a temporary target array
|
||||
|
||||
// Note: This could be transformed into a single array, with offsets into it.
|
||||
// Might be best choice in C.
|
||||
|
||||
|
||||
Map fromCaseFold2 = new HashMap();
|
||||
Iterator it = fromCaseFold.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object key = it.next();
|
||||
Set s = (Set) fromCaseFold.get(key);
|
||||
String[] temp = new String[s.size()];
|
||||
s.toArray(temp);
|
||||
fromCaseFold2.put(key, temp);
|
||||
}
|
||||
fromCaseFold = fromCaseFold2;
|
||||
|
||||
// We have processed everything, so the iterator will now work
|
||||
// The following is normally OFF.
|
||||
// It is here to generate (under the GENERATE flag) the static exception list.
|
||||
// It must be at the very end of initialization, so that the iterator is functional.
|
||||
// (easiest to do it that way)
|
||||
|
||||
if (GENERATE) {
|
||||
|
||||
// first get small set of items that have multiple characters
|
||||
|
||||
Set multichars = new TreeSet();
|
||||
it = fromCaseFold.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
if (UTF16.countCodePoint(key) < 2) continue;
|
||||
multichars.add(key);
|
||||
}
|
||||
|
||||
// now we will go through each of them.
|
||||
|
||||
CaseIterator ci = new CaseIterator();
|
||||
it = multichars.iterator();
|
||||
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
|
||||
// here is a nasty complication. Take 'ffi' ligature. We
|
||||
// can't just close it, since we would miss the combination
|
||||
// that includes the 'fi' => "fi" ligature
|
||||
// so first do a pass through, and add substring combinations
|
||||
// we call this a 'partial closure'
|
||||
|
||||
Set partialClosure = new TreeSet();
|
||||
partialClosure.add(key);
|
||||
|
||||
if (UTF16.countCodePoint(key) > 2) {
|
||||
Iterator multiIt2 = multichars.iterator();
|
||||
while (multiIt2.hasNext()) {
|
||||
String otherKey = (String) multiIt2.next();
|
||||
if (otherKey.length() >= key.length()) continue;
|
||||
int pos = -1;
|
||||
while (true) {
|
||||
// The following is not completely general
|
||||
// but works for the actual cased stuff,
|
||||
// and should work for future characters, since we won't have
|
||||
// more ligatures & other oddities.
|
||||
pos = key.indexOf(otherKey, pos+1);
|
||||
if (pos < 0) break;
|
||||
int endPos = pos + otherKey.length();
|
||||
// we know we have a proper substring,
|
||||
// so get the combinations
|
||||
String[] choices = (String[]) fromCaseFold.get(otherKey);
|
||||
for (int ii = 0; ii < choices.length; ++ii) {
|
||||
String patchwork = key.substring(0, pos)
|
||||
+ choices[ii]
|
||||
+ key.substring(endPos);
|
||||
partialClosure.add(patchwork);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// now, for each thing in the partial closure, get its
|
||||
// case closure and add it to the final result.
|
||||
|
||||
Set closure = new TreeSet(); // this will be the real closure
|
||||
Iterator partialIt = partialClosure.iterator();
|
||||
while (partialIt.hasNext()) {
|
||||
String key2 = (String) partialIt.next();
|
||||
ci.reset(key2);
|
||||
for (String temp = ci.next(); temp != null; temp = ci.next()) {
|
||||
closure.add(temp);
|
||||
}
|
||||
// form closure
|
||||
/*String[] choices = (String[]) fromCaseFold.get(key2);
|
||||
for (int i = 0; i < choices.length; ++i) {
|
||||
ci.reset(choices[i]);
|
||||
String temp;
|
||||
while (null != (temp = ci.next())) {
|
||||
closure.add(temp);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
// print it out, so that it can be cut and pasted back into this document.
|
||||
|
||||
Iterator it2 = closure.iterator();
|
||||
System.out.println("\t// " + toName.transliterate(key));
|
||||
System.out.print("\t{\"" + toHex.transliterate(key) + "\",");
|
||||
while (it2.hasNext()) {
|
||||
String item = (String)it2.next();
|
||||
System.out.print("\"" + toHex.transliterate(item) + "\",");
|
||||
}
|
||||
System.out.println("},");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============ PRIVATE CLASS DATA ============
|
||||
|
||||
// pieces that we will put together
|
||||
// is not changed during iteration
|
||||
private int count = 0;
|
||||
private String[][] variants;
|
||||
|
||||
// state information, changes during iteration
|
||||
private boolean done = false;
|
||||
private int[] counts;
|
||||
|
||||
// internal buffer for efficiency
|
||||
private StringBuffer nextBuffer = new StringBuffer();
|
||||
|
||||
// ========================
|
||||
|
||||
/**
|
||||
* Reset to different source. Once reset, the iteration starts from the beginning.
|
||||
* @param source The string to get case variants for
|
||||
*/
|
||||
public void reset(String source) {
|
||||
|
||||
// allocate arrays to store pieces
|
||||
// using length might be slightly too long, but we don't care much
|
||||
|
||||
counts = new int[source.length()];
|
||||
variants = new String[source.length()][];
|
||||
|
||||
// walk through the source, and break up into pieces
|
||||
// each piece becomes an array of equivalent values
|
||||
// TODO: could optimized this later to coalesce all single string pieces
|
||||
|
||||
String piece = null;
|
||||
count = 0;
|
||||
for (int i = 0; i < source.length(); i += piece.length()) {
|
||||
|
||||
// find *longest* matching piece
|
||||
String caseFold = null;
|
||||
|
||||
if (GENERATE) {
|
||||
// do exactly one CP
|
||||
piece = UTF16.valueOf(source, i);
|
||||
caseFold = (String) toCaseFold.get(piece);
|
||||
} else {
|
||||
int max = i + maxLength;
|
||||
if (max > source.length()) max = source.length();
|
||||
for (int j = max; j > i; --j) {
|
||||
piece = source.substring(i, j);
|
||||
caseFold = (String) toCaseFold.get(piece);
|
||||
if (caseFold != null) break;
|
||||
}
|
||||
}
|
||||
|
||||
// if we fail, pick one code point
|
||||
if (caseFold == null) {
|
||||
piece = UTF16.valueOf(source, i);
|
||||
variants[count++] = new String[] {piece}; // single item string
|
||||
} else {
|
||||
variants[count++] = (String[])fromCaseFold.get(caseFold);
|
||||
}
|
||||
}
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart the iteration from the beginning, but with same source
|
||||
*/
|
||||
public void reset() {
|
||||
done = false;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
counts[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates through the case variants.
|
||||
* @return next case variant. Each variant will case-fold to the same value as the source will.
|
||||
* When the iteration is done, null is returned.
|
||||
*/
|
||||
public String next() {
|
||||
|
||||
if (done) return null;
|
||||
int i;
|
||||
|
||||
// TODO Optimize so we keep the piece before and after the current position
|
||||
// so we don't have so much concatenation
|
||||
|
||||
// get the result, a concatenation
|
||||
|
||||
nextBuffer.setLength(0);
|
||||
for (i = 0; i < count; ++i) {
|
||||
nextBuffer.append(variants[i][counts[i]]);
|
||||
}
|
||||
|
||||
// find the next right set of pieces to concatenate
|
||||
|
||||
for (i = count-1; i >= 0; --i) {
|
||||
counts[i]++;
|
||||
if (counts[i] < variants[i].length) break;
|
||||
counts[i] = 0;
|
||||
}
|
||||
|
||||
// if we go too far, bail
|
||||
|
||||
if (i < 0) {
|
||||
done = true;
|
||||
}
|
||||
|
||||
return nextBuffer.toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Temporary test, just to see how the stuff works.
|
||||
*/
|
||||
static public void main(String[] args) {
|
||||
String[] testCases = {"fiss", "h\u03a3"};
|
||||
CaseIterator ci = new CaseIterator();
|
||||
|
||||
for (int i = 0; i < testCases.length; ++i) {
|
||||
String item = testCases[i];
|
||||
System.out.println();
|
||||
System.out.println("Testing: " + toName.transliterate(item));
|
||||
System.out.println();
|
||||
ci.reset(item);
|
||||
int count = 0;
|
||||
for (String temp = ci.next(); temp != null; temp = ci.next()) {
|
||||
System.out.println(toName.transliterate(temp));
|
||||
count++;
|
||||
}
|
||||
System.out.println("Total: " + count);
|
||||
}
|
||||
|
||||
// generate a list of all caseless characters -- characters whose
|
||||
// case closure is themselves.
|
||||
|
||||
UnicodeSet caseless = new UnicodeSet();
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String cp = UTF16.valueOf(i);
|
||||
ci.reset(cp);
|
||||
int count = 0;
|
||||
String fold = null;
|
||||
for (String temp = ci.next(); temp != null; temp = ci.next()) {
|
||||
fold = temp;
|
||||
if (++count > 1) break;
|
||||
}
|
||||
if (count==1 && fold.equals(cp)) {
|
||||
caseless.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("caseless = " + caseless.toPattern(true));
|
||||
|
||||
UnicodeSet not_lc = new UnicodeSet("[:^lc:]");
|
||||
|
||||
UnicodeSet a = new UnicodeSet();
|
||||
a.set(not_lc);
|
||||
a.removeAll(caseless);
|
||||
System.out.println("[:^lc:] - caseless = " + a.toPattern(true));
|
||||
|
||||
a.set(caseless);
|
||||
a.removeAll(not_lc);
|
||||
System.out.println("caseless - [:^lc:] = " + a.toPattern(true));
|
||||
}
|
||||
}
|
@ -1,147 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Locale;
|
||||
|
||||
public class FileUtilities {
|
||||
public static final boolean SHOW_FILES;
|
||||
static {
|
||||
boolean showFiles = false;
|
||||
try {
|
||||
showFiles = System.getProperty("SHOW_FILES") != null;
|
||||
} catch (SecurityException ignored) {
|
||||
}
|
||||
SHOW_FILES = showFiles;
|
||||
}
|
||||
|
||||
public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);
|
||||
|
||||
private static PrintWriter log = CONSOLE;
|
||||
|
||||
public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {
|
||||
return openReader(dir, filename, "UTF-8");
|
||||
}
|
||||
|
||||
public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
|
||||
File file = dir.length() == 0 ? new File(filename) : new File(dir, filename);
|
||||
if (SHOW_FILES && log != null) {
|
||||
log.println("Opening File: "
|
||||
+ file.getCanonicalPath());
|
||||
}
|
||||
return new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(file),
|
||||
encoding),
|
||||
4*1024);
|
||||
}
|
||||
|
||||
public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException {
|
||||
return openWriter(dir, filename, "UTF-8");
|
||||
}
|
||||
|
||||
public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException {
|
||||
File file = new File(dir, filename);
|
||||
if (SHOW_FILES && log != null) {
|
||||
log.println("Creating File: "
|
||||
+ file.getCanonicalPath());
|
||||
}
|
||||
String parentName = file.getParent();
|
||||
if (parentName != null) {
|
||||
File parent = new File(parentName);
|
||||
parent.mkdirs();
|
||||
}
|
||||
return new PrintWriter(
|
||||
new BufferedWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(file),
|
||||
encoding),
|
||||
4*1024));
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException {
|
||||
appendFile(filename, encoding, output, null);
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException {
|
||||
BufferedReader br = openReader("", filename, encoding);
|
||||
/*
|
||||
FileInputStream fis = new FileInputStream(filename);
|
||||
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
||||
BufferedReader br = new BufferedReader(isr, 32*1024);
|
||||
*/
|
||||
try {
|
||||
appendBufferedReader(br, output, replacementList);
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void appendBufferedReader(BufferedReader br,
|
||||
PrintWriter output, String[] replacementList) throws IOException {
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
if (replacementList != null) {
|
||||
for (int i = 0; i < replacementList.length; i += 2) {
|
||||
line = replace(line, replacementList[i], replacementList[i+1]);
|
||||
}
|
||||
}
|
||||
output.println(line);
|
||||
}
|
||||
br.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces all occurrences of piece with replacement, and returns new String
|
||||
*/
|
||||
public static String replace(String source, String piece, String replacement) {
|
||||
if (source == null || source.length() < piece.length()) return source;
|
||||
int pos = 0;
|
||||
while (true) {
|
||||
pos = source.indexOf(piece, pos);
|
||||
if (pos < 0) return source;
|
||||
source = source.substring(0,pos) + replacement + source.substring(pos + piece.length());
|
||||
pos += replacement.length();
|
||||
}
|
||||
}
|
||||
|
||||
public static String replace(String source, String[][] replacements) {
|
||||
return replace(source, replacements, replacements.length);
|
||||
}
|
||||
|
||||
public static String replace(String source, String[][] replacements, int count) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
source = replace(source, replacements[i][0], replacements[i][1]);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
public static String replace(String source, String[][] replacements, boolean reverse) {
|
||||
if (!reverse) return replace(source, replacements);
|
||||
for (int i = 0; i < replacements.length; ++i) {
|
||||
source = replace(source, replacements[i][1], replacements[i][0]);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
public static String anchorize(String source) {
|
||||
String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_");
|
||||
if (result.endsWith("_")) result = result.substring(0,result.length()-1);
|
||||
if (result.startsWith("_")) result = result.substring(1);
|
||||
return result;
|
||||
}
|
||||
}
|
@ -1,556 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
|
||||
/**
|
||||
* Provides a general interface for Unicode Properties, and
|
||||
* extracting sets based on those values.
|
||||
* @author Davis
|
||||
*/
|
||||
|
||||
public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
||||
|
||||
static class ICUProperty extends UnicodeProperty {
|
||||
protected int propEnum = Integer.MIN_VALUE;
|
||||
|
||||
protected ICUProperty(String propName, int propEnum) {
|
||||
setName(propName);
|
||||
this.propEnum = propEnum;
|
||||
setType(internalGetPropertyType(propEnum));
|
||||
if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS || propEnum == UProperty.GENERAL_CATEGORY) {
|
||||
setUniformUnassigned(false);
|
||||
} else {
|
||||
setUniformUnassigned(true);
|
||||
}
|
||||
}
|
||||
|
||||
boolean shownException = false;
|
||||
|
||||
public String _getValue(int codePoint) {
|
||||
switch (propEnum) {
|
||||
case UProperty.AGE:
|
||||
return getAge(codePoint);
|
||||
case UProperty.BIDI_MIRRORING_GLYPH:
|
||||
return UTF16.valueOf(UCharacter.getMirror(codePoint));
|
||||
case UProperty.CASE_FOLDING:
|
||||
return UCharacter.foldCase(UTF16.valueOf(codePoint), true);
|
||||
case UProperty.ISO_COMMENT:
|
||||
return UCharacter.getISOComment(codePoint);
|
||||
case UProperty.LOWERCASE_MAPPING:
|
||||
return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
|
||||
case UProperty.NAME:
|
||||
return UCharacter.getName(codePoint);
|
||||
case UProperty.SIMPLE_CASE_FOLDING:
|
||||
return UTF16.valueOf(UCharacter.foldCase(codePoint, true));
|
||||
case UProperty.SIMPLE_LOWERCASE_MAPPING:
|
||||
return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
|
||||
case UProperty.SIMPLE_TITLECASE_MAPPING:
|
||||
return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
|
||||
case UProperty.SIMPLE_UPPERCASE_MAPPING:
|
||||
return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
|
||||
case UProperty.TITLECASE_MAPPING:
|
||||
return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);
|
||||
case UProperty.UNICODE_1_NAME:
|
||||
return UCharacter.getName1_0(codePoint);
|
||||
case UProperty.UPPERCASE_MAPPING:
|
||||
return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
|
||||
// case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
|
||||
// case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
|
||||
// case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
|
||||
// case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
|
||||
case isNFC:
|
||||
return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
|
||||
case isNFD:
|
||||
return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
|
||||
case isNFKC:
|
||||
return String
|
||||
.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
|
||||
case isNFKD:
|
||||
return String
|
||||
.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
|
||||
case isLowercase:
|
||||
return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
|
||||
UTF16.valueOf(codePoint)));
|
||||
case isUppercase:
|
||||
return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
|
||||
UTF16.valueOf(codePoint)));
|
||||
case isTitlecase:
|
||||
return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(
|
||||
UTF16.valueOf(codePoint)));
|
||||
case isCasefolded:
|
||||
return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(
|
||||
UTF16.valueOf(codePoint)));
|
||||
case isCased:
|
||||
return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
|
||||
UTF16.valueOf(codePoint)));
|
||||
case UProperty.SCRIPT_EXTENSIONS:
|
||||
return getStringScriptExtensions(codePoint);
|
||||
}
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
int enumValue = -1;
|
||||
String value = null;
|
||||
try {
|
||||
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
|
||||
if (enumValue >= 0)
|
||||
value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (!shownException) {
|
||||
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
|
||||
shownException = true;
|
||||
}
|
||||
}
|
||||
return value != null ? value : String.valueOf(enumValue);
|
||||
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
|
||||
double num = UCharacter.getUnicodeNumericValue(codePoint);
|
||||
if (num == UCharacter.NO_NUMERIC_VALUE)
|
||||
return null;
|
||||
return Double.toString(num);
|
||||
// TODO: Fix HACK -- API deficient
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getAge(int codePoint) {
|
||||
String temp = UCharacter.getAge(codePoint).toString();
|
||||
if (temp.equals("0.0.0.0"))
|
||||
return "unassigned";
|
||||
if (temp.endsWith(".0.0"))
|
||||
return temp.substring(0, temp.length() - 4);
|
||||
return temp;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param valueAlias null if unused.
|
||||
* @param valueEnum -1 if unused
|
||||
* @param nameChoice
|
||||
* @return
|
||||
*/
|
||||
private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
|
||||
if (propEnum >= UProperty.STRING_START) {
|
||||
if (nameChoice > UProperty.NameChoice.LONG)
|
||||
throw new IllegalArgumentException();
|
||||
if (nameChoice != UProperty.NameChoice.LONG)
|
||||
return null;
|
||||
return "<string>";
|
||||
} else if (propEnum >= UProperty.DOUBLE_START) {
|
||||
if (nameChoice > UProperty.NameChoice.LONG)
|
||||
throw new IllegalArgumentException();
|
||||
if (nameChoice != UProperty.NameChoice.LONG)
|
||||
return null;
|
||||
return "<number>";
|
||||
}
|
||||
if (valueAlias != null && !valueAlias.equals("<integer>")) {
|
||||
valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);
|
||||
}
|
||||
// because these are defined badly, there may be no normal (long) name.
|
||||
// if there is
|
||||
String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
|
||||
if (result != null)
|
||||
return result;
|
||||
// HACK try other namechoice
|
||||
if (nameChoice == UProperty.NameChoice.LONG) {
|
||||
result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);
|
||||
if (result != null)
|
||||
return result;
|
||||
if (isCombiningClassProperty())
|
||||
return null;
|
||||
return "<integer>";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean isCombiningClassProperty() {
|
||||
return (propEnum == UProperty.CANONICAL_COMBINING_CLASS
|
||||
|| propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS
|
||||
|| propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
|
||||
}
|
||||
|
||||
private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
|
||||
try {
|
||||
if (propEnum < BINARY_LIMIT) {
|
||||
propEnum = UProperty.ALPHABETIC;
|
||||
}
|
||||
return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
|
||||
} catch (Exception e) {
|
||||
return Integer.parseInt(valueAlias);
|
||||
}
|
||||
}
|
||||
|
||||
static Map fixSkeleton = new HashMap();
|
||||
|
||||
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
|
||||
String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);
|
||||
String newValue = (String) fixSkeleton.get(value);
|
||||
if (newValue == null) {
|
||||
newValue = value;
|
||||
if (propEnum == UProperty.JOINING_GROUP) {
|
||||
newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);
|
||||
}
|
||||
newValue = regularize(newValue, true);
|
||||
fixSkeleton.put(value, newValue);
|
||||
}
|
||||
return newValue;
|
||||
}
|
||||
|
||||
public List _getNameAliases(List result) {
|
||||
if (result == null)
|
||||
result = new ArrayList();
|
||||
// String alias = String_Extras.get(propEnum);
|
||||
// if (alias == null)
|
||||
String alias = Binary_Extras.get(propEnum);
|
||||
if (alias != null) {
|
||||
addUnique(alias, result);
|
||||
} else {
|
||||
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
|
||||
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public String getFixedPropertyName(int propName, int nameChoice) {
|
||||
try {
|
||||
return UCharacter.getPropertyName(propEnum, nameChoice);
|
||||
} catch (IllegalArgumentException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static Map cccHack = new HashMap();
|
||||
private static Set cccExtras = new HashSet();
|
||||
static {
|
||||
for (int i = 0; i <= 255; ++i) {
|
||||
String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,
|
||||
UProperty.NameChoice.LONG);
|
||||
String numStr = String.valueOf(i);
|
||||
if (alias != null) {
|
||||
cccHack.put(alias, numStr);
|
||||
} else {
|
||||
cccHack.put(numStr, numStr);
|
||||
cccExtras.add(numStr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List _getAvailableValues(List result) {
|
||||
if (result == null)
|
||||
result = new ArrayList();
|
||||
if (propEnum == UProperty.AGE) {
|
||||
addAllUnique(getAges(), result);
|
||||
return result;
|
||||
|
||||
}
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
if (Binary_Extras.isInRange(propEnum)) {
|
||||
propEnum = UProperty.BINARY_START; // HACK
|
||||
}
|
||||
int start = UCharacter.getIntPropertyMinValue(propEnum);
|
||||
int end = UCharacter.getIntPropertyMaxValue(propEnum);
|
||||
for (int i = start; i <= end; ++i) {
|
||||
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
|
||||
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
|
||||
if (alias == null) {
|
||||
alias = alias2;
|
||||
if (alias == null && isCombiningClassProperty()) {
|
||||
alias = String.valueOf(i);
|
||||
}
|
||||
}
|
||||
// System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
|
||||
addUnique(alias, result);
|
||||
}
|
||||
} else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {
|
||||
UnicodeMap map = getUnicodeMap();
|
||||
Collection values = map.values();
|
||||
addAllUnique(values, result);
|
||||
} else {
|
||||
String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);
|
||||
addUnique(alias, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static String[] AGES = null;
|
||||
|
||||
private String[] getAges() {
|
||||
if (AGES == null) {
|
||||
Set ages = new TreeSet();
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
ages.add(getAge(i));
|
||||
}
|
||||
AGES = (String[]) ages.toArray(new String[ages.size()]);
|
||||
}
|
||||
return AGES;
|
||||
}
|
||||
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (result == null)
|
||||
result = new ArrayList();
|
||||
if (propEnum == UProperty.AGE) {
|
||||
addUnique(valueAlias, result);
|
||||
return result;
|
||||
}
|
||||
if (isCombiningClassProperty()) {
|
||||
addUnique(cccHack.get(valueAlias), result); // add number
|
||||
}
|
||||
int type = getType();
|
||||
if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {
|
||||
addUnique(valueAlias, result);
|
||||
if (valueAlias.endsWith(".0")) {
|
||||
addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);
|
||||
}
|
||||
} else {
|
||||
for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {
|
||||
try {
|
||||
addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);
|
||||
} catch (Exception e) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
|
||||
*/
|
||||
private int internalGetPropertyType(int prop) {
|
||||
switch (prop) {
|
||||
case UProperty.AGE:
|
||||
case UProperty.BLOCK:
|
||||
case UProperty.SCRIPT:
|
||||
return UnicodeProperty.CATALOG;
|
||||
case UProperty.ISO_COMMENT:
|
||||
case UProperty.NAME:
|
||||
case UProperty.UNICODE_1_NAME:
|
||||
case UProperty.SCRIPT_EXTENSIONS:
|
||||
return UnicodeProperty.MISC;
|
||||
case UProperty.BIDI_MIRRORING_GLYPH:
|
||||
case UProperty.CASE_FOLDING:
|
||||
case UProperty.LOWERCASE_MAPPING:
|
||||
case UProperty.SIMPLE_CASE_FOLDING:
|
||||
case UProperty.SIMPLE_LOWERCASE_MAPPING:
|
||||
case UProperty.SIMPLE_TITLECASE_MAPPING:
|
||||
case UProperty.SIMPLE_UPPERCASE_MAPPING:
|
||||
case UProperty.TITLECASE_MAPPING:
|
||||
case UProperty.UPPERCASE_MAPPING:
|
||||
return UnicodeProperty.EXTENDED_STRING;
|
||||
}
|
||||
if (prop < UProperty.BINARY_START)
|
||||
return UnicodeProperty.UNKNOWN;
|
||||
if (prop < UProperty.BINARY_LIMIT)
|
||||
return UnicodeProperty.BINARY;
|
||||
if (prop < UProperty.INT_START)
|
||||
return UnicodeProperty.EXTENDED_BINARY;
|
||||
if (prop < UProperty.INT_LIMIT)
|
||||
return UnicodeProperty.ENUMERATED;
|
||||
if (prop < UProperty.DOUBLE_START)
|
||||
return UnicodeProperty.EXTENDED_ENUMERATED;
|
||||
if (prop < UProperty.DOUBLE_LIMIT)
|
||||
return UnicodeProperty.NUMERIC;
|
||||
if (prop < UProperty.STRING_START)
|
||||
return UnicodeProperty.EXTENDED_NUMERIC;
|
||||
if (prop < UProperty.STRING_LIMIT)
|
||||
return UnicodeProperty.STRING;
|
||||
return UnicodeProperty.EXTENDED_STRING;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
|
||||
*/
|
||||
public String _getVersion() {
|
||||
return VersionInfo.ICU_VERSION.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/*{
|
||||
matchIterator = new UnicodeSetIterator(
|
||||
new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Other Missing Functions:
|
||||
Expands_On_NFC
|
||||
Expands_On_NFD
|
||||
Expands_On_NFKC
|
||||
Expands_On_NFKD
|
||||
Composition_Exclusion
|
||||
Decomposition_Mapping
|
||||
FC_NFKC_Closure
|
||||
ISO_Comment
|
||||
NFC_Quick_Check
|
||||
NFD_Quick_Check
|
||||
NFKC_Quick_Check
|
||||
NFKD_Quick_Check
|
||||
Special_Case_Condition
|
||||
Unicode_Radical_Stroke
|
||||
*/
|
||||
|
||||
static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
|
||||
new String[] {
|
||||
"isNFC", "isNFD", "isNFKC", "isNFKD",
|
||||
"isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
|
||||
});
|
||||
|
||||
// static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
|
||||
// new String[] {
|
||||
// "toNFC", "toNFD", "toNFKC", "toNKFD",
|
||||
// });
|
||||
|
||||
static final int
|
||||
isNFC = UProperty.BINARY_LIMIT,
|
||||
isNFD = UProperty.BINARY_LIMIT+1,
|
||||
isNFKC = UProperty.BINARY_LIMIT+2,
|
||||
isNFKD = UProperty.BINARY_LIMIT+3,
|
||||
isLowercase = UProperty.BINARY_LIMIT+4,
|
||||
isUppercase = UProperty.BINARY_LIMIT+5,
|
||||
isTitlecase = UProperty.BINARY_LIMIT+6,
|
||||
isCasefolded = UProperty.BINARY_LIMIT+7,
|
||||
isCased = UProperty.BINARY_LIMIT+8,
|
||||
BINARY_LIMIT = UProperty.BINARY_LIMIT+9
|
||||
|
||||
// NFC = UProperty.STRING_LIMIT,
|
||||
// NFD = UProperty.STRING_LIMIT+1,
|
||||
// NFKC = UProperty.STRING_LIMIT+2,
|
||||
// NFKD = UProperty.STRING_LIMIT+3
|
||||
;
|
||||
|
||||
protected ICUPropertyFactory() {
|
||||
Collection c = getInternalAvailablePropertyAliases(new ArrayList());
|
||||
Iterator it = c.iterator();
|
||||
while (it.hasNext()) {
|
||||
add(getInternalProperty((String) it.next()));
|
||||
}
|
||||
}
|
||||
|
||||
static BitSet BITSET = new BitSet();
|
||||
public static synchronized String getStringScriptExtensions(int codePoint) {
|
||||
int result = UScript.getScriptExtensions(codePoint, BITSET);
|
||||
if (result >= 0) {
|
||||
return UScript.getName(result);
|
||||
}
|
||||
TreeMap<String,String> sorted = new TreeMap<String,String>();
|
||||
for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {
|
||||
// sort by short form
|
||||
sorted.put(UScript.getShortName(scriptCode), UScript.getName(scriptCode));
|
||||
}
|
||||
return CollectionUtilities.join(sorted.values(), " ");
|
||||
}
|
||||
|
||||
private static ICUPropertyFactory singleton = null;
|
||||
|
||||
public static synchronized ICUPropertyFactory make() {
|
||||
if (singleton != null)
|
||||
return singleton;
|
||||
singleton = new ICUPropertyFactory();
|
||||
return singleton;
|
||||
}
|
||||
|
||||
public List getInternalAvailablePropertyAliases(List result) {
|
||||
int[][] ranges = {
|
||||
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
|
||||
{UProperty.INT_START, UProperty.INT_LIMIT},
|
||||
{UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
|
||||
{UProperty.STRING_START, UProperty.STRING_LIMIT},
|
||||
{UProperty.OTHER_PROPERTY_START, UProperty.OTHER_PROPERTY_LIMIT},
|
||||
|
||||
};
|
||||
for (int i = 0; i < ranges.length; ++i) {
|
||||
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
|
||||
String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
|
||||
UnicodeProperty.addUnique(alias, result);
|
||||
if (!result.contains(alias))
|
||||
result.add(alias);
|
||||
}
|
||||
}
|
||||
// result.addAll(String_Extras.getNames());
|
||||
result.addAll(Binary_Extras.getNames());
|
||||
return result;
|
||||
}
|
||||
|
||||
public UnicodeProperty getInternalProperty(String propertyAlias) {
|
||||
int propEnum;
|
||||
main: {
|
||||
int possibleItem = Binary_Extras.get(propertyAlias);
|
||||
if (possibleItem >= 0) {
|
||||
propEnum = possibleItem;
|
||||
break main;
|
||||
}
|
||||
// possibleItem = String_Extras.get(propertyAlias);
|
||||
// if (possibleItem >= 0) {
|
||||
// propEnum = possibleItem;
|
||||
// break main;
|
||||
// }
|
||||
propEnum = UCharacter.getPropertyEnum(propertyAlias);
|
||||
}
|
||||
return new ICUProperty(propertyAlias, propEnum);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
|
||||
*/
|
||||
// TODO file bug on getPropertyValueName for Canonical_Combining_Class
|
||||
public static class Names {
|
||||
private String[] names;
|
||||
private int base;
|
||||
|
||||
public Names(int base, String[] names) {
|
||||
this.base = base;
|
||||
this.names = names;
|
||||
}
|
||||
|
||||
public int get(String name) {
|
||||
for (int i = 0; i < names.length; ++i) {
|
||||
if (name.equalsIgnoreCase(names[i]))
|
||||
return base + i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public String get(int number) {
|
||||
number -= base;
|
||||
if (number < 0 || names.length <= number)
|
||||
return null;
|
||||
return names[number];
|
||||
}
|
||||
|
||||
public boolean isInRange(int number) {
|
||||
number -= base;
|
||||
return (0 <= number && number < names.length);
|
||||
}
|
||||
|
||||
public List getNames() {
|
||||
return Arrays.asList(names);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,149 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
||||
public class TransliteratorUtilities {
|
||||
public static boolean DEBUG = false;
|
||||
|
||||
public static void registerTransliteratorFromFile(String dir, String id) {
|
||||
try {
|
||||
String filename = id.replace('-', '_') + ".txt";
|
||||
String rules = getFileContents(dir, filename);
|
||||
Transliterator t;
|
||||
int pos = id.indexOf('-');
|
||||
String rid;
|
||||
if (pos < 0) {
|
||||
rid = id + "-Any";
|
||||
id = "Any-" + id;
|
||||
} else {
|
||||
rid = id.substring(pos+1) + "-" + id.substring(0, pos);
|
||||
}
|
||||
t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
|
||||
Transliterator.unregister(id);
|
||||
Transliterator.registerInstance(t);
|
||||
|
||||
/*String test = "\u049A\u0430\u0437\u0430\u049B";
|
||||
System.out.println(t.transliterate(test));
|
||||
t = Transliterator.getInstance(id);
|
||||
System.out.println(t.transliterate(test));
|
||||
*/
|
||||
|
||||
t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
|
||||
Transliterator.unregister(rid);
|
||||
Transliterator.registerInstance(t);
|
||||
if (DEBUG) System.out.println("Registered new Transliterator: " + id + ", " + rid);
|
||||
} catch (IOException e) {
|
||||
//#if defined(FOUNDATION10) || defined(J2SE13)
|
||||
//## throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id+" "+ e.getMessage());
|
||||
//#else
|
||||
throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + id).initCause(e);
|
||||
//#endif
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public static String getFileContents(String dir, String filename) throws IOException {
|
||||
//#if defined(FOUNDATION10) || defined(J2SE13)
|
||||
//## BufferedReader br = TestUtil.openUTF8Reader(dir, filename);
|
||||
//#else
|
||||
BufferedReader br = FileUtilities.openUTF8Reader(dir, filename);
|
||||
//#endif
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
|
||||
buffer.append(line).append("\r\n");
|
||||
}
|
||||
br.close();
|
||||
return buffer.toString();
|
||||
|
||||
}
|
||||
|
||||
private static final String BASE_RULES =
|
||||
":: (hex-any/xml);" +
|
||||
":: (hex-any/xml10);" +
|
||||
"'<' > '<' ;" +
|
||||
"'<' < '&'[lL][Tt]';' ;" +
|
||||
"'&' > '&' ;" +
|
||||
"'&' < '&'[aA][mM][pP]';' ;" +
|
||||
"'>' < '&'[gG][tT]';' ;" +
|
||||
"'\"' < '&'[qQ][uU][oO][tT]';' ; " +
|
||||
"'' < '&'[aA][pP][oO][sS]';' ; ";
|
||||
|
||||
private static final String CONTENT_RULES =
|
||||
"'>' > '>' ;";
|
||||
|
||||
private static final String HTML_RULES = BASE_RULES + CONTENT_RULES +
|
||||
"'\"' > '"' ; ";
|
||||
|
||||
private static final String HTML_RULES_CONTROLS = HTML_RULES +
|
||||
":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; ";
|
||||
|
||||
private static final String HTML_RULES_ASCII = HTML_RULES +
|
||||
":: [[:C:][:^ASCII:]] any-hex/xml ; ";
|
||||
|
||||
private static final String XML_RULES = HTML_RULES +
|
||||
"'' > ''' ; "
|
||||
;
|
||||
|
||||
/*
|
||||
The ampersand character (&) and the left angle bracket (<) MUST NOT appear
|
||||
|
||||
in their literal form, except when used as markup delimiters, or within a
|
||||
|
||||
comment, a processing instruction, or a CDATA section. If they are needed
|
||||
|
||||
elsewhere, they MUST be escaped using either numeric character references or
|
||||
|
||||
the strings "&" and "<" respectively. The right angle bracket (>) MAY
|
||||
|
||||
be represented using the string ">", and MUST, for compatibility, be
|
||||
|
||||
escaped using either ">" or a character reference when it appears in the string
|
||||
|
||||
"]]>" in content, when that string is not marking the end of a CDATA section.
|
||||
|
||||
In the content of elements, character data is any string of characters which does
|
||||
|
||||
not contain the start-delimiter of any markup and does not include the
|
||||
|
||||
CDATA-section-close delimiter, "]]>". In a CDATA section, character data is
|
||||
|
||||
any string of characters not including the CDATA-section-close delimiter,
|
||||
|
||||
"]]>".
|
||||
|
||||
To allow attribute values to contain both single and double quotes, the
|
||||
|
||||
apostrophe or single-quote character (') MAY be represented as "'", and
|
||||
|
||||
the double-quote character (") as """.
|
||||
|
||||
|
||||
*/
|
||||
|
||||
public static final Transliterator toXML = Transliterator.createFromRules(
|
||||
"any-xml", XML_RULES, Transliterator.FORWARD);
|
||||
public static final Transliterator fromXML = Transliterator.createFromRules(
|
||||
"xml-any", XML_RULES, Transliterator.REVERSE);
|
||||
public static final Transliterator toHTML = Transliterator.createFromRules(
|
||||
"any-html", HTML_RULES, Transliterator.FORWARD);
|
||||
public static final Transliterator toHTMLControl = Transliterator.createFromRules(
|
||||
"any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
|
||||
public static final Transliterator toHTMLAscii = Transliterator.createFromRules(
|
||||
"any-html", HTML_RULES_ASCII, Transliterator.FORWARD);
|
||||
public static final Transliterator fromHTML = Transliterator.createFromRules(
|
||||
"html-any", HTML_RULES, Transliterator.REVERSE);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,248 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, Google, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.dev.util.UnicodeProperty.PatternMatcher;
|
||||
import com.ibm.icu.impl.UnicodeRegex;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* Allows for overriding the parsing of UnicodeSet property patterns.
|
||||
* <p>
|
||||
* WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the
|
||||
* Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
|
||||
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
|
||||
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {
|
||||
UnicodeRegex unicodeRegex;
|
||||
final UnicodeProperty.Factory factory;
|
||||
|
||||
public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {
|
||||
unicodeRegex = new UnicodeRegex().setSymbolTable(this);
|
||||
this.factory = factory;
|
||||
}
|
||||
|
||||
|
||||
// public boolean applyPropertyAlias0(String propertyName,
|
||||
// String propertyValue, UnicodeSet result) {
|
||||
// if (!propertyName.contains("*")) {
|
||||
// return applyPropertyAlias(propertyName, propertyValue, result);
|
||||
// }
|
||||
// String[] propertyNames = propertyName.split("[*]");
|
||||
// for (int i = propertyNames.length - 1; i >= 0; ++i) {
|
||||
// String pname = propertyNames[i];
|
||||
//
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
|
||||
public boolean applyPropertyAlias(String propertyName,
|
||||
String propertyValue, UnicodeSet result) {
|
||||
boolean status = false;
|
||||
boolean invert = false;
|
||||
int posNotEqual = propertyName.indexOf('\u2260');
|
||||
int posColon = propertyName.indexOf(':');
|
||||
if (posNotEqual >= 0 || posColon >= 0) {
|
||||
if (posNotEqual < 0) posNotEqual = propertyName.length();
|
||||
if (posColon < 0) posColon = propertyName.length();
|
||||
int opPos = posNotEqual < posColon ? posNotEqual : posColon;
|
||||
propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1)
|
||||
: propertyName.substring(opPos+1) + "=" + propertyValue;
|
||||
propertyName = propertyName.substring(0,opPos);
|
||||
if (posNotEqual < posColon) {
|
||||
invert = true;
|
||||
}
|
||||
}
|
||||
if (propertyName.endsWith("!")) {
|
||||
propertyName = propertyName.substring(0, propertyName.length() - 1);
|
||||
invert = !invert;
|
||||
}
|
||||
propertyValue = propertyValue.trim();
|
||||
if (propertyValue.length() != 0) {
|
||||
status = applyPropertyAlias0(propertyName, propertyValue, result);
|
||||
} else {
|
||||
try {
|
||||
status = applyPropertyAlias0("gc", propertyName, result);
|
||||
} catch (Exception e) {};
|
||||
if (!status) {
|
||||
try {
|
||||
status = applyPropertyAlias0("sc", propertyName, result);
|
||||
} catch (Exception e) {};
|
||||
if (!status) {
|
||||
try {
|
||||
status = applyPropertyAlias0(propertyName, "Yes", result);
|
||||
} catch (Exception e) {};
|
||||
if (!status) {
|
||||
status = applyPropertyAlias0(propertyName, "", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (status && invert) {
|
||||
result.complement();
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static final HashMap<String,String[]> GC_REMAP = new HashMap();
|
||||
{
|
||||
GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));
|
||||
GC_REMAP.put("other", GC_REMAP.get("c"));
|
||||
|
||||
GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));
|
||||
GC_REMAP.put("letter", GC_REMAP.get("l"));
|
||||
|
||||
GC_REMAP.put("lc", "Ll Lt Lu".split(" "));
|
||||
GC_REMAP.put("casedletter", GC_REMAP.get("lc"));
|
||||
|
||||
GC_REMAP.put("m", "Mc Me Mn".split(" "));
|
||||
GC_REMAP.put("mark", GC_REMAP.get("m"));
|
||||
|
||||
GC_REMAP.put("n", "Nd Nl No".split(" "));
|
||||
GC_REMAP.put("number", GC_REMAP.get("n"));
|
||||
|
||||
GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));
|
||||
GC_REMAP.put("punctuation", GC_REMAP.get("p"));
|
||||
GC_REMAP.put("punct", GC_REMAP.get("p"));
|
||||
|
||||
GC_REMAP.put("s", "Sc Sk Sm So".split(" "));
|
||||
GC_REMAP.put("symbol", GC_REMAP.get("s"));
|
||||
|
||||
GC_REMAP.put("z", "Zl Zp Zs".split(" "));
|
||||
GC_REMAP.put("separator", GC_REMAP.get("z"));
|
||||
}
|
||||
|
||||
public boolean applyPropertyAlias0(String propertyName,
|
||||
String propertyValue, UnicodeSet result) {
|
||||
result.clear();
|
||||
UnicodeProperty prop = factory.getProperty(propertyName);
|
||||
String canonicalName = prop.getName();
|
||||
boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);
|
||||
|
||||
// Hack for special GC values
|
||||
if (canonicalName.equals("General_Category")) {
|
||||
String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));
|
||||
if (parts != null) {
|
||||
for (String part : parts) {
|
||||
prop.getSet(part, result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
PatternMatcher patternMatcher = null;
|
||||
if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
|
||||
String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));
|
||||
patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
|
||||
}
|
||||
UnicodeProperty otherProperty = null;
|
||||
boolean testCp = false;
|
||||
if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {
|
||||
String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();
|
||||
if ("cp".equalsIgnoreCase(otherPropName)) {
|
||||
testCp = true;
|
||||
} else {
|
||||
otherProperty = factory.getProperty(otherPropName);
|
||||
}
|
||||
}
|
||||
if (prop != null) {
|
||||
UnicodeSet set;
|
||||
if (testCp) {
|
||||
set = new UnicodeSet();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (UnicodeProperty.equals(i, prop.getValue(i))) {
|
||||
set.add(i);
|
||||
}
|
||||
}
|
||||
} else if (otherProperty != null) {
|
||||
set = new UnicodeSet();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String v1 = prop.getValue(i);
|
||||
String v2 = otherProperty.getValue(i);
|
||||
if (UnicodeProperty.equals(v1, v2)) {
|
||||
set.add(i);
|
||||
}
|
||||
}
|
||||
} else if (patternMatcher == null) {
|
||||
if (!isValid(prop, propertyValue)) {
|
||||
throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName
|
||||
+ " must be in "
|
||||
+ prop.getAvailableValues() + " or in " + prop.getValueAliases());
|
||||
}
|
||||
if (isAge) {
|
||||
set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));
|
||||
} else {
|
||||
set = prop.getSet(propertyValue);
|
||||
}
|
||||
} else if (isAge) {
|
||||
set = new UnicodeSet();
|
||||
List<String> values = prop.getAvailableValues();
|
||||
for (String value : values) {
|
||||
if (patternMatcher.matches(value)) {
|
||||
for (String other : values) {
|
||||
if (other.compareTo(value) <= 0) {
|
||||
set.addAll(prop.getSet(other));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
set = prop.getSet(patternMatcher);
|
||||
}
|
||||
result.addAll(set);
|
||||
return true;
|
||||
}
|
||||
throw new IllegalArgumentException("Illegal property: " + propertyName);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private boolean isValid(UnicodeProperty prop, String propertyValue) {
|
||||
// if (prop.getName().equals("General_Category")) {
|
||||
// if (propertyValue)
|
||||
// }
|
||||
return prop.isValidValue(propertyValue);
|
||||
}
|
||||
|
||||
public enum Relation {less, leq, equal, geq, greater}
|
||||
|
||||
public static class ComparisonMatcher implements PatternMatcher {
|
||||
Relation relation;
|
||||
static Comparator comparator = new UTF16.StringComparator(true, false,0);
|
||||
|
||||
String pattern;
|
||||
|
||||
public ComparisonMatcher(String pattern, Relation comparator) {
|
||||
this.relation = comparator;
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
public boolean matches(Object value) {
|
||||
int comp = comparator.compare(pattern, value.toString());
|
||||
switch (relation) {
|
||||
case less: return comp < 0;
|
||||
case leq: return comp <= 0;
|
||||
default: return comp == 0;
|
||||
case geq: return comp >= 0;
|
||||
case greater: return comp > 0;
|
||||
}
|
||||
}
|
||||
|
||||
public PatternMatcher set(String pattern) {
|
||||
this.pattern = pattern;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user