ICU-0 fixes for generation of Unicode 4.1.0 properties
X-SVN-Rev: 16858
This commit is contained in:
parent
4c8340b33f
commit
bd1094eaca
27
icu4j/src/com/ibm/icu/dev/test/util/Equator.java
Normal file
27
icu4j/src/com/ibm/icu/dev/test/util/Equator.java
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
public interface Equator {
|
||||
/**
|
||||
* Comparator function. If overridden, must handle case of null,
|
||||
* and compare any two objects that could be compared.
|
||||
* Must obey normal rules of symmetry: a=b => b=a
|
||||
* and transitivity: a=b & b=c => a=b)
|
||||
* @param a
|
||||
* @param b
|
||||
* @return true if a and b are equal
|
||||
*/
|
||||
public boolean isEqual(Object a, Object b);
|
||||
|
||||
/**
|
||||
* Must obey normal rules: a=b => getHashCode(a)=getHashCode(b)
|
||||
* @param object
|
||||
* @return a hash code for the object
|
||||
*/
|
||||
public int getHashCode(Object object);
|
||||
}
|
247
icu4j/src/com/ibm/icu/dev/test/util/ListSet.java
Normal file
247
icu4j/src/com/ibm/icu/dev/test/util/ListSet.java
Normal file
@ -0,0 +1,247 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* A list with unique items. It does not permit multiple items to be added, and does not support (at
|
||||
* least for now) adding elements at a position. (Support may be added later). Also should add support
|
||||
* for Equator.
|
||||
* @author davis
|
||||
*/
|
||||
public class ListSet implements Set, List {
|
||||
List list = new ArrayList();
|
||||
Set set;
|
||||
Comparator comparator;
|
||||
|
||||
ListSet(Comparator comparator) {
|
||||
this.comparator = comparator;
|
||||
set = new TreeSet(comparator);
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @param element
|
||||
*/
|
||||
public void add(int index, Object element) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
/**
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public boolean add(Object o) {
|
||||
boolean result = set.add(o);
|
||||
if (result) list.add(o);
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public boolean addAll(int index, Collection c) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
/**
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public boolean addAll(Collection c) {
|
||||
// TODO optimize
|
||||
boolean result = false;
|
||||
for (Iterator it = c.iterator(); it.hasNext();) {
|
||||
result = result || add(it.next());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void clear() {
|
||||
list.clear();
|
||||
}
|
||||
/**
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public boolean contains(Object o) {
|
||||
return set.contains(o);
|
||||
}
|
||||
/**
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public boolean containsAll(Collection c) {
|
||||
return set.containsAll(c);
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#equals(java.lang.Object)
|
||||
*/
|
||||
public boolean equals(Object obj) {
|
||||
try {
|
||||
ListSet other = (ListSet) obj;
|
||||
return list.equals(other.list) && set.equals(other.set);
|
||||
} catch (ClassCastException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @return
|
||||
*/
|
||||
public Object get(int index) {
|
||||
return list.get(index);
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
public int hashCode() {
|
||||
return list.hashCode();
|
||||
}
|
||||
/**
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public int indexOf(Object o) {
|
||||
for (int i = 0; i < list.size(); ++i) {
|
||||
if (0 == comparator.compare(list.get(i), o)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return list.isEmpty();
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public Iterator iterator() {
|
||||
return list.iterator();
|
||||
}
|
||||
/**
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public int lastIndexOf(Object o) {
|
||||
for (int i = list.size()-1; i >= 0 ; --i) {
|
||||
if (0 == comparator.compare(list.get(i), o)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public ListIterator listIterator() {
|
||||
return list.listIterator();
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @return
|
||||
*/
|
||||
public ListIterator listIterator(int index) {
|
||||
return list.listIterator(index);
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @return
|
||||
*/
|
||||
public Object remove(int index) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
/**
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public boolean remove(Object o) {
|
||||
boolean result = set.remove(o);
|
||||
if (!result) return false;
|
||||
return matchListToSet();
|
||||
}
|
||||
/**
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public boolean removeAll(Collection c) {
|
||||
boolean result = set.removeAll(c);
|
||||
if (!result) return false;
|
||||
return matchListToSet();
|
||||
|
||||
}
|
||||
/**
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public boolean retainAll(Collection c) {
|
||||
boolean result = set.retainAll(c);
|
||||
if (!result) return false;
|
||||
return matchListToSet();
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
private boolean matchListToSet() {
|
||||
for (Iterator it = list.iterator(); it.hasNext();) {
|
||||
Object o = it.next();
|
||||
if (!set.contains(o)) it.remove();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* @param index
|
||||
* @param element
|
||||
* @return
|
||||
*/
|
||||
public Object set(int index, Object element) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public int size() {
|
||||
return list.size();
|
||||
}
|
||||
/**
|
||||
* @param fromIndex
|
||||
* @param toIndex
|
||||
* @return
|
||||
*/
|
||||
public List subList(int fromIndex, int toIndex) {
|
||||
ListSet result = new ListSet(comparator);
|
||||
result.add(list.subList(fromIndex, toIndex));
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public Object[] toArray() {
|
||||
return list.toArray();
|
||||
}
|
||||
/**
|
||||
* @param a
|
||||
* @return
|
||||
*/
|
||||
public Object[] toArray(Object[] a) {
|
||||
return list.toArray(a);
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
public String toString() {
|
||||
return list.toString();
|
||||
}
|
||||
}
|
@ -39,7 +39,7 @@ public class TestUtilities extends TestFmwk {
|
||||
UnicodeMap map1 = new UnicodeMap();
|
||||
Map map2 = new HashMap();
|
||||
Map map3 = new TreeMap();
|
||||
UnicodeMap.Equator equator = UnicodeMap.SIMPLE_EQUATOR;
|
||||
Comparator equator = UnicodeMap.SIMPLE_EQUATOR;
|
||||
SortedSet log = new TreeSet();
|
||||
static String[] TEST_VALUES = {null, "A", "B", "C", "D", "E", "F"};
|
||||
static Random random = new Random(12345);
|
||||
@ -126,7 +126,7 @@ public class TestUtilities extends TestFmwk {
|
||||
map3 = new TreeMap();
|
||||
Object lastValue = new Object();
|
||||
while (mi.next()) {
|
||||
if (!UnicodeMap.SIMPLE_EQUATOR.isEqual(lastValue, mi.value)) {
|
||||
if (UnicodeMap.SIMPLE_EQUATOR.compare(lastValue, mi.value) != 0) {
|
||||
// System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value);
|
||||
lastValue = mi.value;
|
||||
}
|
||||
@ -140,7 +140,7 @@ public class TestUtilities extends TestFmwk {
|
||||
for (int i = 0; i < LIMIT; ++i) {
|
||||
Object value1 = map1.getValue(i);
|
||||
Object value2 = map2.get(new Integer(i));
|
||||
if (!equator.isEqual(value1, value2)) {
|
||||
if (equator.compare(value1, value2) != 0) {
|
||||
errln(counter + " Difference at " + Utility.hex(i)
|
||||
+ "\t UnicodeMap: " + value1
|
||||
+ "\t HashMap: " + value2);
|
||||
|
@ -10,6 +10,7 @@ import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
@ -30,11 +31,16 @@ public final class UnicodeMap implements Cloneable {
|
||||
private int length = 2;
|
||||
private int[] transitions = {0,0x110000,0,0,0,0,0,0,0,0};
|
||||
private Object[] values = new Object[10];
|
||||
private boolean errorOnReset = false;
|
||||
|
||||
private ListSet availableValues;
|
||||
boolean staleAvailableValues = false;
|
||||
|
||||
private int lastIndex = 0;
|
||||
|
||||
public UnicodeMap(Equator equator) {
|
||||
public UnicodeMap(Comparator equator) {
|
||||
this.equator = equator;
|
||||
availableValues = new ListSet(equator);
|
||||
}
|
||||
|
||||
public UnicodeMap() {
|
||||
@ -49,7 +55,7 @@ public final class UnicodeMap implements Cloneable {
|
||||
if (length != that.length || !equator.equals(that.equator)) return false;
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
if (transitions[i] != that.transitions[i]) return false;
|
||||
if (!equator.isEqual(values[i], that.values[i])) return false;
|
||||
if (!areEqual(values[i], that.values[i])) return false;
|
||||
}
|
||||
return true;
|
||||
} catch (ClassCastException e) {
|
||||
@ -57,12 +63,22 @@ public final class UnicodeMap implements Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
public int getHashCode(Object o) {
|
||||
return o.hashCode();
|
||||
//equator.getHashCode
|
||||
}
|
||||
|
||||
public boolean areEqual(Object a, Object b) {
|
||||
return equator.compare(a, b) == 0;
|
||||
//equator.getHashCode
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
int result = length;
|
||||
// TODO might want to abbreviate this for speed.
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
result = 37*result + transitions[i];
|
||||
result = 37*result + equator.getHashCode(values[i]);
|
||||
result = 37*result + getHashCode(values[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -75,6 +91,8 @@ public final class UnicodeMap implements Cloneable {
|
||||
that.length = length;
|
||||
that.transitions = (int[]) transitions.clone();
|
||||
that.values = (Object[]) values.clone();
|
||||
that.equator = equator;
|
||||
that.availableValues = new ListSet(equator);
|
||||
return that;
|
||||
}
|
||||
|
||||
@ -87,7 +105,7 @@ public final class UnicodeMap implements Cloneable {
|
||||
throw new IllegalArgumentException("Invariant failed: Lengths bad");
|
||||
}
|
||||
for (int i = 1; i < length-1; ++i) {
|
||||
if (equator.isEqual(values[i-1], values[i])) {
|
||||
if (areEqual(values[i-1], values[i])) {
|
||||
throw new IllegalArgumentException("Invariant failed: values shared at "
|
||||
+ "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
|
||||
+ "\t" + Utility.hex(i) + ": <" + values[i] + ">"
|
||||
@ -107,39 +125,20 @@ public final class UnicodeMap implements Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
public interface Equator {
|
||||
/**
|
||||
* Comparator function. If overridden, must handle case of null,
|
||||
* and compare any two objects that could be compared.
|
||||
* Must obey normal rules of symmetry: a=b => b=a
|
||||
* and transitivity: a=b & b=c => a=b)
|
||||
* @param a
|
||||
* @param b
|
||||
* @return true if a and b are equal
|
||||
*/
|
||||
public boolean isEqual(Object a, Object b);
|
||||
|
||||
/**
|
||||
* Must obey normal rules: a=b => getHashCode(a)=getHashCode(b)
|
||||
* @param object
|
||||
* @return a hash code for the object
|
||||
*/
|
||||
public int getHashCode(Object object);
|
||||
}
|
||||
|
||||
private static final class SimpleEquator implements Equator {
|
||||
public boolean isEqual(Object a, Object b) {
|
||||
if (a == b) return true;
|
||||
if (a == null || b == null) return false;
|
||||
return a.equals(b);
|
||||
private static final class SimpleEquator implements Comparator {
|
||||
public int compare(Object a, Object b) {
|
||||
if (a == b) return 0;
|
||||
if (a == null) return -1;
|
||||
if (b == null) return 1;
|
||||
return ((Comparable)a).compareTo((Comparable)b);
|
||||
}
|
||||
public int getHashCode(Object a) {
|
||||
if (a == null) return 0;
|
||||
return a.hashCode();
|
||||
}
|
||||
}
|
||||
public static Equator SIMPLE_EQUATOR = new SimpleEquator();
|
||||
private Equator equator = SIMPLE_EQUATOR;
|
||||
public static Comparator SIMPLE_EQUATOR = new SimpleEquator();
|
||||
private Comparator equator = SIMPLE_EQUATOR;
|
||||
|
||||
/**
|
||||
* Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
|
||||
@ -261,7 +260,16 @@ public final class UnicodeMap implements Cloneable {
|
||||
}
|
||||
int limitIndex = baseIndex + 1;
|
||||
// cases are (a) value is already set
|
||||
if (equator.isEqual(values[baseIndex], value)) return this;
|
||||
if (areEqual(values[baseIndex], value)) return this;
|
||||
if (errorOnReset && values[baseIndex] != null) {
|
||||
throw new IllegalArgumentException("Attempt to reset value for " + Utility.hex(codepoint)
|
||||
+ " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
|
||||
}
|
||||
|
||||
// adjust the available values
|
||||
staleAvailableValues = true;
|
||||
availableValues.add(value); // add if not there already
|
||||
|
||||
int baseCP = transitions[baseIndex];
|
||||
int limitCP = transitions[limitIndex];
|
||||
// we now start walking through the difference case,
|
||||
@ -271,12 +279,12 @@ public final class UnicodeMap implements Cloneable {
|
||||
if (baseCP == codepoint) {
|
||||
// CASE: At very start of range
|
||||
boolean connectsWithPrevious =
|
||||
baseIndex != 0 && equator.isEqual(value, values[baseIndex-1]);
|
||||
baseIndex != 0 && areEqual(value, values[baseIndex-1]);
|
||||
|
||||
if (limitCP == codepoint + 1) {
|
||||
// CASE: Single codepoint range
|
||||
boolean connectsWithFollowing =
|
||||
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
|
||||
baseIndex < length - 1 && areEqual(value, values[limitIndex]);
|
||||
|
||||
if (connectsWithPrevious) {
|
||||
// A1a connects with previous & following, so remove index
|
||||
@ -308,7 +316,7 @@ public final class UnicodeMap implements Cloneable {
|
||||
// CASE: at end of range
|
||||
// if connects, just back up range
|
||||
boolean connectsWithFollowing =
|
||||
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
|
||||
baseIndex < length - 1 && areEqual(value, values[limitIndex]);
|
||||
|
||||
if (connectsWithFollowing) {
|
||||
--transitions[limitIndex];
|
||||
@ -396,6 +404,8 @@ public final class UnicodeMap implements Cloneable {
|
||||
* @return this (for chaining)
|
||||
*/
|
||||
public UnicodeMap setMissing(Object value) {
|
||||
staleAvailableValues = true;
|
||||
availableValues.add(value);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (values[i] == null) values[i] = value;
|
||||
}
|
||||
@ -412,7 +422,7 @@ public final class UnicodeMap implements Cloneable {
|
||||
public UnicodeSet getSet(Object value, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
if (equator.isEqual(value, values[i])) {
|
||||
if (areEqual(value, values[i])) {
|
||||
result.add(transitions[i], transitions[i+1]-1);
|
||||
}
|
||||
}
|
||||
@ -429,13 +439,18 @@ public final class UnicodeMap implements Cloneable {
|
||||
* @return result
|
||||
*/
|
||||
public Collection getAvailableValues(Collection result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
Object value = values[i];
|
||||
if (value == null) continue;
|
||||
if (result.contains(value)) continue;
|
||||
result.add(value);
|
||||
}
|
||||
if (staleAvailableValues) {
|
||||
// collect all the current values
|
||||
// retain them in the availableValues
|
||||
Set temp = new TreeSet(equator);
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
temp.add(values[i]);
|
||||
}
|
||||
availableValues.retainAll(temp);
|
||||
staleAvailableValues = false;
|
||||
}
|
||||
if (result == null) result = new ArrayList(1);
|
||||
result.addAll(availableValues);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -539,4 +554,16 @@ public final class UnicodeMap implements Cloneable {
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
/**
|
||||
* @return Returns the errorOnReset.
|
||||
*/
|
||||
public boolean isErrorOnReset() {
|
||||
return errorOnReset;
|
||||
}
|
||||
/**
|
||||
* @param errorOnReset The errorOnReset to set.
|
||||
*/
|
||||
public void setErrorOnReset(boolean errorOnReset) {
|
||||
this.errorOnReset = errorOnReset;
|
||||
}
|
||||
}
|
||||
|
@ -110,6 +110,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
return _getValue(codepoint);
|
||||
}
|
||||
|
||||
//public String getValue(int codepoint, boolean isShort) {
|
||||
// return getValue(codepoint);
|
||||
//}
|
||||
|
||||
public List getNameAliases(List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
return _getNameAliases(result);
|
||||
@ -118,6 +122,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
result = _getValueAliases(valueAlias, result);
|
||||
if (!result.contains(valueAlias) && type < NUMERIC) {
|
||||
result = _getValueAliases(valueAlias, result); // for debugging
|
||||
throw new IllegalArgumentException(
|
||||
"Internal error: " + getName() + " doesn't contain " + valueAlias
|
||||
+ ": " + new BagFormatter().join(result));
|
||||
@ -146,6 +151,372 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
return getAvailableValues(null);
|
||||
}
|
||||
|
||||
public final String getValue(int codepoint, boolean getShortest) {
|
||||
String result = getValue(codepoint);
|
||||
if (type >= MISC || result == null || !getShortest) return result;
|
||||
return getFirstValueAlias(result);
|
||||
}
|
||||
|
||||
public final String getFirstNameAlias() {
|
||||
if (firstNameAlias == null) {
|
||||
firstNameAlias = (String) getNameAliases().get(0);
|
||||
}
|
||||
return firstNameAlias;
|
||||
}
|
||||
|
||||
public final String getFirstValueAlias(String value) {
|
||||
if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
|
||||
return (String)valueToFirstValueAlias.get(value);
|
||||
}
|
||||
|
||||
private void _getFirstValueAliasCache() {
|
||||
maxValueWidth = 0;
|
||||
maxFirstValueAliasWidth = 0;
|
||||
valueToFirstValueAlias = new HashMap(1);
|
||||
Iterator it = getAvailableValues().iterator();
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
String first = (String) getValueAliases(value).get(0);
|
||||
if (first == null) { // internal error
|
||||
throw new IllegalArgumentException("Value not in value aliases: " + value);
|
||||
}
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println("First Alias: " + getName() + ": " + value + " => "
|
||||
+ first + new BagFormatter().join(getValueAliases(value)));
|
||||
}
|
||||
valueToFirstValueAlias.put(value,first);
|
||||
if (value.length() > maxValueWidth) {
|
||||
maxValueWidth = value.length();
|
||||
}
|
||||
if (first.length() > maxFirstValueAliasWidth) {
|
||||
maxFirstValueAliasWidth = first.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int maxValueWidth = -1;
|
||||
private int maxFirstValueAliasWidth = -1;
|
||||
|
||||
public int getMaxWidth(boolean getShortest) {
|
||||
if (maxValueWidth < 0) _getFirstValueAliasCache();
|
||||
if (getShortest) return maxFirstValueAliasWidth;
|
||||
return maxValueWidth;
|
||||
}
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue) {
|
||||
return getSet(propertyValue,null);
|
||||
}
|
||||
public final UnicodeSet getSet(Matcher matcher) {
|
||||
return getSet(matcher,null);
|
||||
}
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
|
||||
return getSet(new SimpleMatcher(propertyValue,
|
||||
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
|
||||
result);
|
||||
}
|
||||
|
||||
private UnicodeMap unicodeMap = null;
|
||||
|
||||
public static final String UNUSED = "??";
|
||||
|
||||
public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
if (isType(STRING_OR_MISC_MASK)) {
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String value = getValue(i);
|
||||
if (value != null && matcher.matches(value)) {
|
||||
result.add(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
List temp = new ArrayList(1); // to avoid reallocating...
|
||||
UnicodeMap um = getUnicodeMap();
|
||||
Iterator it = um.getAvailableValues(null).iterator();
|
||||
main:
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
temp.clear();
|
||||
Iterator it2 = getValueAliases(value,temp).iterator();
|
||||
while (it2.hasNext()) {
|
||||
String value2 = (String)it2.next();
|
||||
//System.out.println("Values:" + value2);
|
||||
if (matcher.matches(value2)
|
||||
|| matcher.matches(toSkeleton(value2))) {
|
||||
um.getSet(value, result);
|
||||
continue main;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
public UnicodeSet getMatchSet(UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
addAll(matchIterator, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setMatchSet(UnicodeSet set) {
|
||||
matchIterator = new UnicodeSetIterator(set);
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Utility for debugging
|
||||
*/
|
||||
public static String getStack() {
|
||||
Exception e = new Exception();
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
e.printStackTrace(pw);
|
||||
pw.flush();
|
||||
return "Showing Stack with fake " + sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
// TODO use this instead of plain strings
|
||||
public static class Name implements Comparable {
|
||||
private static Map skeletonCache;
|
||||
private String skeleton;
|
||||
private String pretty;
|
||||
public final int RAW = 0, TITLE = 1, NORMAL = 2;
|
||||
public Name(String name, int style) {
|
||||
if (name == null) name = "";
|
||||
if (style == RAW) {
|
||||
skeleton = pretty = name;
|
||||
} else {
|
||||
pretty = regularize(name, style == TITLE);
|
||||
skeleton = toSkeleton(pretty);
|
||||
}
|
||||
}
|
||||
public int compareTo(Object o) {
|
||||
return skeleton.compareTo(((Name)o).skeleton);
|
||||
}
|
||||
public boolean equals(Object o) {
|
||||
return skeleton.equals(((Name)o).skeleton);
|
||||
}
|
||||
public int hashCode() {
|
||||
return skeleton.hashCode();
|
||||
}
|
||||
public String toString() {
|
||||
return pretty;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the unicode map
|
||||
*/
|
||||
protected UnicodeMap getUnicodeMap() {
|
||||
if (unicodeMap == null) unicodeMap = _getUnicodeMap();
|
||||
return unicodeMap;
|
||||
}
|
||||
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
|
||||
result.put(i, getValue(i));
|
||||
}
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println(getName() + ":\t" + getClass().getName()
|
||||
+ "\t" + getVersion());
|
||||
System.out.println(getStack());
|
||||
System.out.println(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addUnique(Object obj, Collection result) {
|
||||
if (obj != null && !result.contains(obj)) result.add(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
return compareNames((String)o1, (String)o2);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*
|
||||
*/
|
||||
// TODO optimize
|
||||
public static boolean equalNames(String a, String b) {
|
||||
if (a == b) return true;
|
||||
if (a == null) return false;
|
||||
return toSkeleton(a).equals(toSkeleton(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO optimize
|
||||
public static int compareNames(String a, String b) {
|
||||
if (a == b) return 0;
|
||||
if (a == null) return -1;
|
||||
if (b == null) return 1;
|
||||
return toSkeleton(a).compareTo(toSkeleton(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO account for special names, tibetan, hangul
|
||||
public static String toSkeleton(String source) {
|
||||
if (source == null) return null;
|
||||
StringBuffer skeletonBuffer = new StringBuffer();
|
||||
boolean gotOne = false;
|
||||
// remove spaces, '_', '-'
|
||||
// we can do this with char, since no surrogates are involved
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
|
||||
gotOne = true;
|
||||
} else {
|
||||
char ch2 = Character.toLowerCase(ch);
|
||||
if (ch2 != ch) {
|
||||
gotOne = true;
|
||||
skeletonBuffer.append(ch2);
|
||||
} else {
|
||||
skeletonBuffer.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!gotOne) return source; // avoid string creation
|
||||
return skeletonBuffer.toString();
|
||||
}
|
||||
|
||||
// get the name skeleton
|
||||
public static String toNameSkeleton(String source) {
|
||||
if (source == null) return null;
|
||||
StringBuffer result = new StringBuffer();
|
||||
// remove spaces, medial '-'
|
||||
// we can do this with char, since no surrogates are involved
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') {
|
||||
result.append(ch);
|
||||
} else if (ch == ' ') {
|
||||
// don't copy ever
|
||||
} else if (ch == '-') {
|
||||
// only copy non-medials AND trailing O-E
|
||||
if (0 == i
|
||||
|| i == source.length() - 1
|
||||
|| source.charAt(i-1) == ' '
|
||||
|| source.charAt(i+1) == ' '
|
||||
|| (i == source.length() - 2
|
||||
&& source.charAt(i-1) == 'O'
|
||||
&& source.charAt(i+1) == 'E')) {
|
||||
System.out.println("****** EXCEPTION " + source);
|
||||
result.append(ch);
|
||||
}
|
||||
// otherwise don't copy
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* These routines use the Java functions, because they only need to act on ASCII
|
||||
* Changes space, - into _, inserts _ between lower and UPPER.
|
||||
*/
|
||||
public static String regularize(String source, boolean titlecaseStart) {
|
||||
if (source == null) return source;
|
||||
/*if (source.equals("noBreak")) { // HACK
|
||||
if (titlecaseStart) return "NoBreak";
|
||||
return source;
|
||||
}
|
||||
*/
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastCat = -1;
|
||||
boolean haveFirstCased = true;
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char c = source.charAt(i);
|
||||
if (c == ' ' || c == '-' || c == '_') {
|
||||
c = '_';
|
||||
haveFirstCased = true;
|
||||
}
|
||||
if (c == '=') haveFirstCased = true;
|
||||
int cat = Character.getType(c);
|
||||
if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
|
||||
result.append('_');
|
||||
}
|
||||
if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
|
||||
|| cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
|
||||
if (titlecaseStart) {
|
||||
c = Character.toUpperCase(c);
|
||||
}
|
||||
haveFirstCased = false;
|
||||
}
|
||||
result.append(c);
|
||||
lastCat = cat;
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for comparing codepoint to string without
|
||||
* generating new string.
|
||||
* @param codepoint
|
||||
* @param other
|
||||
* @return true if the codepoint equals the string
|
||||
*/
|
||||
public static final boolean equals(int codepoint, String other) {
|
||||
if (other.length() == 1) {
|
||||
return codepoint == other.charAt(0);
|
||||
}
|
||||
if (other.length() == 2) {
|
||||
return other.equals(UTF16.valueOf(codepoint));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility that should be on UnicodeSet
|
||||
* @param source
|
||||
* @param result
|
||||
*/
|
||||
static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
|
||||
while (source.nextRange()) {
|
||||
if (source.codepoint == UnicodeSetIterator.IS_STRING) {
|
||||
result.add(source.string);
|
||||
} else {
|
||||
result.add(source.codepoint, source.codepointEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Collection source, Collection result) {
|
||||
for (Iterator it = source.iterator(); it.hasNext();) {
|
||||
addUnique(it.next(), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Object[] source, Collection result) {
|
||||
for (int i = 0; i < source.length; ++i) {
|
||||
addUnique(source[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static public class Factory {
|
||||
static boolean DEBUG = false;
|
||||
|
||||
@ -503,21 +874,31 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class SimpleProperty extends UnicodeProperty {
|
||||
private List propertyAliases = new ArrayList(1);
|
||||
public static abstract class BaseProperty extends UnicodeProperty {
|
||||
protected List propertyAliases = new ArrayList(1);
|
||||
String version;
|
||||
public BaseProperty setMain(String alias, String shortAlias, int propertyType,
|
||||
String version) {
|
||||
setName(alias);
|
||||
setType(propertyType);
|
||||
propertyAliases.add(shortAlias);
|
||||
propertyAliases.add(alias);
|
||||
this.version = version;
|
||||
return this;
|
||||
}
|
||||
public String _getVersion() {
|
||||
return version;
|
||||
}
|
||||
public List _getNameAliases(List result) {
|
||||
addAllUnique(propertyAliases, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static abstract class SimpleProperty extends BaseProperty {
|
||||
List values;
|
||||
Map toValueAliases = new HashMap(1);
|
||||
String version;
|
||||
|
||||
public SimpleProperty setMain(String alias, String shortAlias, int propertyType,
|
||||
String version) {
|
||||
setName(alias);
|
||||
setType(propertyType);
|
||||
propertyAliases.add(shortAlias);
|
||||
propertyAliases.add(alias);
|
||||
this.version = version;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleProperty addName(String alias) {
|
||||
propertyAliases.add(alias);
|
||||
@ -546,11 +927,6 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
return this;
|
||||
}
|
||||
|
||||
public List _getNameAliases(List result) {
|
||||
addAllUnique(propertyAliases, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (toValueAliases == null) _fillValues();
|
||||
List a = (List) toValueAliases.get(valueAlias);
|
||||
@ -582,384 +958,27 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
addUnique(alias, aliases);
|
||||
addUnique(item, aliases);
|
||||
}
|
||||
|
||||
public String _getVersion() {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
||||
public static class UnicodeMapProperty extends SimpleProperty {
|
||||
private UnicodeMap unicodeMap;
|
||||
public static class UnicodeMapProperty extends BaseProperty {
|
||||
protected UnicodeMap unicodeMap;
|
||||
protected String _getValue(int codepoint) {
|
||||
return (String) unicodeMap.getValue(codepoint);
|
||||
}
|
||||
protected List _getValueAliases(String valueAlias, List result) {
|
||||
if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
|
||||
result.add(valueAlias);
|
||||
return result; // no other aliases
|
||||
}
|
||||
protected List _getAvailableValues(List result) {
|
||||
return (List) unicodeMap.getAvailableValues(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public final String getValue(int codepoint, boolean getShortest) {
|
||||
String result = getValue(codepoint);
|
||||
if (type >= MISC || result == null || !getShortest) return result;
|
||||
return getFirstValueAlias(result);
|
||||
}
|
||||
|
||||
public final String getFirstNameAlias() {
|
||||
if (firstNameAlias == null) {
|
||||
firstNameAlias = (String) getNameAliases().get(0);
|
||||
}
|
||||
return firstNameAlias;
|
||||
}
|
||||
|
||||
public final String getFirstValueAlias(String value) {
|
||||
if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
|
||||
return (String)valueToFirstValueAlias.get(value);
|
||||
}
|
||||
|
||||
private void _getFirstValueAliasCache() {
|
||||
maxValueWidth = 0;
|
||||
maxFirstValueAliasWidth = 0;
|
||||
valueToFirstValueAlias = new HashMap(1);
|
||||
Iterator it = getAvailableValues().iterator();
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
String first = (String) getValueAliases(value).get(0);
|
||||
if (first == null) { // internal error
|
||||
throw new IllegalArgumentException("Value not in value aliases: " + value);
|
||||
}
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println("First Alias: " + getName() + ": " + value + " => "
|
||||
+ first + new BagFormatter().join(getValueAliases(value)));
|
||||
}
|
||||
valueToFirstValueAlias.put(value,first);
|
||||
if (value.length() > maxValueWidth) {
|
||||
maxValueWidth = value.length();
|
||||
}
|
||||
if (first.length() > maxFirstValueAliasWidth) {
|
||||
maxFirstValueAliasWidth = first.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int maxValueWidth = -1;
|
||||
private int maxFirstValueAliasWidth = -1;
|
||||
|
||||
public int getMaxWidth(boolean getShortest) {
|
||||
if (maxValueWidth < 0) _getFirstValueAliasCache();
|
||||
if (getShortest) return maxFirstValueAliasWidth;
|
||||
return maxValueWidth;
|
||||
}
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue) {
|
||||
return getSet(propertyValue,null);
|
||||
}
|
||||
public final UnicodeSet getSet(Matcher matcher) {
|
||||
return getSet(matcher,null);
|
||||
}
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
|
||||
return getSet(new SimpleMatcher(propertyValue,
|
||||
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
|
||||
result);
|
||||
}
|
||||
|
||||
private UnicodeMap unicodeMap = null;
|
||||
|
||||
public static final String UNUSED = "??";
|
||||
|
||||
public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
if (isType(STRING_OR_MISC_MASK)) {
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String value = getValue(i);
|
||||
if (value != null && matcher.matches(value)) {
|
||||
result.add(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
List temp = new ArrayList(1); // to avoid reallocating...
|
||||
UnicodeMap um = getUnicodeMap();
|
||||
Iterator it = um.getAvailableValues(null).iterator();
|
||||
main:
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
temp.clear();
|
||||
Iterator it2 = getValueAliases(value,temp).iterator();
|
||||
while (it2.hasNext()) {
|
||||
String value2 = (String)it2.next();
|
||||
//System.out.println("Values:" + value2);
|
||||
if (matcher.matches(value2)
|
||||
|| matcher.matches(toSkeleton(value2))) {
|
||||
um.getSet(value, result);
|
||||
continue main;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
public UnicodeSet getMatchSet(UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
addAll(matchIterator, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setMatchSet(UnicodeSet set) {
|
||||
matchIterator = new UnicodeSetIterator(set);
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Utility for debugging
|
||||
*/
|
||||
public static String getStack() {
|
||||
Exception e = new Exception();
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
e.printStackTrace(pw);
|
||||
pw.flush();
|
||||
return "Showing Stack with fake " + sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
// TODO use this instead of plain strings
|
||||
public static class Name implements Comparable {
|
||||
private static Map skeletonCache;
|
||||
private String skeleton;
|
||||
private String pretty;
|
||||
public final int RAW = 0, TITLE = 1, NORMAL = 2;
|
||||
public Name(String name, int style) {
|
||||
if (name == null) name = "";
|
||||
if (style == RAW) {
|
||||
skeleton = pretty = name;
|
||||
} else {
|
||||
pretty = regularize(name, style == TITLE);
|
||||
skeleton = toSkeleton(pretty);
|
||||
}
|
||||
}
|
||||
public int compareTo(Object o) {
|
||||
return skeleton.compareTo(((Name)o).skeleton);
|
||||
}
|
||||
public boolean equals(Object o) {
|
||||
return skeleton.equals(((Name)o).skeleton);
|
||||
}
|
||||
public int hashCode() {
|
||||
return skeleton.hashCode();
|
||||
}
|
||||
public String toString() {
|
||||
return pretty;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
return compareNames((String)o1, (String)o2);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*
|
||||
*/
|
||||
// TODO optimize
|
||||
public static boolean equalNames(String a, String b) {
|
||||
if (a == b) return true;
|
||||
if (a == null) return false;
|
||||
return toSkeleton(a).equals(toSkeleton(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO optimize
|
||||
public static int compareNames(String a, String b) {
|
||||
if (a == b) return 0;
|
||||
if (a == null) return -1;
|
||||
if (b == null) return 1;
|
||||
return toSkeleton(a).compareTo(toSkeleton(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO account for special names, tibetan, hangul
|
||||
public static String toSkeleton(String source) {
|
||||
if (source == null) return null;
|
||||
StringBuffer skeletonBuffer = new StringBuffer();
|
||||
boolean gotOne = false;
|
||||
// remove spaces, '_', '-'
|
||||
// we can do this with char, since no surrogates are involved
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
|
||||
gotOne = true;
|
||||
} else {
|
||||
char ch2 = Character.toLowerCase(ch);
|
||||
if (ch2 != ch) {
|
||||
gotOne = true;
|
||||
skeletonBuffer.append(ch2);
|
||||
} else {
|
||||
skeletonBuffer.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!gotOne) return source; // avoid string creation
|
||||
return skeletonBuffer.toString();
|
||||
}
|
||||
|
||||
// get the name skeleton
|
||||
public static String toNameSkeleton(String source) {
|
||||
if (source == null) return null;
|
||||
StringBuffer result = new StringBuffer();
|
||||
// remove spaces, medial '-'
|
||||
// we can do this with char, since no surrogates are involved
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') {
|
||||
result.append(ch);
|
||||
} else if (ch == ' ') {
|
||||
// don't copy ever
|
||||
} else if (ch == '-') {
|
||||
// only copy non-medials AND trailing O-E
|
||||
if (0 == i
|
||||
|| i == source.length() - 1
|
||||
|| source.charAt(i-1) == ' '
|
||||
|| source.charAt(i+1) == ' '
|
||||
|| (i == source.length() - 2
|
||||
&& source.charAt(i-1) == 'O'
|
||||
&& source.charAt(i+1) == 'E')) {
|
||||
System.out.println("****** EXCEPTION " + source);
|
||||
result.append(ch);
|
||||
}
|
||||
// otherwise don't copy
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* These routines use the Java functions, because they only need to act on ASCII
|
||||
* Changes space, - into _, inserts _ between lower and UPPER.
|
||||
*/
|
||||
public static String regularize(String source, boolean titlecaseStart) {
|
||||
if (source == null) return source;
|
||||
/*if (source.equals("noBreak")) { // HACK
|
||||
if (titlecaseStart) return "NoBreak";
|
||||
return source;
|
||||
}
|
||||
*/
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastCat = -1;
|
||||
boolean haveFirstCased = true;
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char c = source.charAt(i);
|
||||
if (c == ' ' || c == '-' || c == '_') {
|
||||
c = '_';
|
||||
haveFirstCased = true;
|
||||
}
|
||||
if (c == '=') haveFirstCased = true;
|
||||
int cat = Character.getType(c);
|
||||
if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
|
||||
result.append('_');
|
||||
}
|
||||
if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
|
||||
|| cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
|
||||
if (titlecaseStart) {
|
||||
c = Character.toUpperCase(c);
|
||||
}
|
||||
haveFirstCased = false;
|
||||
}
|
||||
result.append(c);
|
||||
lastCat = cat;
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for comparing codepoint to string without
|
||||
* generating new string.
|
||||
* @param codepoint
|
||||
* @param other
|
||||
* @return true if the codepoint equals the string
|
||||
*/
|
||||
public static final boolean equals(int codepoint, String other) {
|
||||
if (other.length() == 1) {
|
||||
return codepoint == other.charAt(0);
|
||||
}
|
||||
if (other.length() == 2) {
|
||||
return other.equals(UTF16.valueOf(codepoint));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility that should be on UnicodeSet
|
||||
* @param source
|
||||
* @param result
|
||||
*/
|
||||
static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
|
||||
while (source.nextRange()) {
|
||||
if (source.codepoint == UnicodeSetIterator.IS_STRING) {
|
||||
result.add(source.string);
|
||||
} else {
|
||||
result.add(source.codepoint, source.codepointEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addUnique(Object obj, Collection result) {
|
||||
if (obj != null && !result.contains(obj)) result.add(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Collection source, Collection result) {
|
||||
for (Iterator it = source.iterator(); it.hasNext();) {
|
||||
addUnique(it.next(), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Object[] source, Collection result) {
|
||||
for (int i = 0; i < source.length; ++i) {
|
||||
addUnique(source[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the unicode map
|
||||
*/
|
||||
protected UnicodeMap getUnicodeMap() {
|
||||
if (unicodeMap == null) unicodeMap = _getUnicodeMap();
|
||||
return unicodeMap;
|
||||
}
|
||||
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
|
||||
result.put(i, getValue(i));
|
||||
}
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println(getName() + ":\t" + getClass().getName()
|
||||
+ "\t" + getVersion());
|
||||
System.out.println(getStack());
|
||||
System.out.println(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,9 @@
|
||||
*/
|
||||
package com.ibm.icu.dev.tool.cldr;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
@ -78,7 +80,9 @@ public class GenerateCldrTests {
|
||||
LOGDIR = 3,
|
||||
SOURCEDIR =4,
|
||||
MATCH = 5,
|
||||
FULLY_RESOLVED = 6;
|
||||
FULLY_RESOLVED = 6,
|
||||
LANGUAGES = 7,
|
||||
TZADIR = 8;
|
||||
|
||||
private static final UOption[] options = {
|
||||
UOption.HELP_H(),
|
||||
@ -88,17 +92,38 @@ public class GenerateCldrTests {
|
||||
UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\"),
|
||||
UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
|
||||
UOption.create("fullyresolved", 'f', UOption.NO_ARG),
|
||||
UOption.create("languages", 'g', UOption.NO_ARG),
|
||||
UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
|
||||
};
|
||||
|
||||
CldrCollations cldrCollations;
|
||||
static String logDir = null, destDir = null;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static boolean hasLocalizedLanguageFor(ULocale locale, ULocale otherLocale) {
|
||||
String lang = otherLocale.getLanguage();
|
||||
String localizedVersion = otherLocale.getDisplayLanguage(locale);
|
||||
return !lang.equals(localizedVersion);
|
||||
}
|
||||
|
||||
public static boolean hasLocalizedCountryFor(ULocale locale, ULocale otherLocale) {
|
||||
String country = otherLocale.getCountry();
|
||||
if (country.equals("")) return true;
|
||||
String localizedVersion = otherLocale.getDisplayCountry(locale);
|
||||
return !country.equals(localizedVersion);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
UOption.parseArgs(args, options);
|
||||
log = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.txt");
|
||||
try {
|
||||
if (options[LANGUAGES].doesOccur) {
|
||||
generateSize(true);
|
||||
return;
|
||||
}
|
||||
//generateSize();
|
||||
//if (true) return;
|
||||
//compareAvailable();
|
||||
|
||||
//compareAvailable();
|
||||
//if (true) return;
|
||||
//System.out.println(createCaseClosure(new UnicodeSet("[a{bc}{def}{oss}]")));
|
||||
//System.out.println(createCaseClosure(new UnicodeSet("[a-z\u00c3\u0178{aa}]")));
|
||||
@ -118,9 +143,271 @@ public class GenerateCldrTests {
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* @throws IOException
|
||||
*
|
||||
*/
|
||||
private static void generateSize(boolean transliterate) throws IOException {
|
||||
PrintWriter logHtml = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.html");
|
||||
String dir = options[SOURCEDIR].value + "main" + File.separator;
|
||||
DraftChecker dc = new DraftChecker(dir);
|
||||
Set filenames = getMatchingXMLFiles(dir, ".*");
|
||||
Collator col = Collator.getInstance(ULocale.ENGLISH);
|
||||
Set languages = new TreeSet(col), countries = new TreeSet(col),
|
||||
draftLanguages = new TreeSet(col), draftCountries = new TreeSet(col);
|
||||
Map nativeLanguages = new TreeMap(col), nativeCountries = new TreeMap(col),
|
||||
draftNativeLanguages = new TreeMap(col), draftNativeCountries = new TreeMap(col);
|
||||
int localeCount = 0;
|
||||
int draftLocaleCount = 0;
|
||||
for (Iterator it = filenames.iterator(); it.hasNext();) {
|
||||
String localeName = (String) it.next();
|
||||
if (localeName.equals("root")) continue; // skip root
|
||||
boolean draft = dc.isDraft(localeName);
|
||||
if (draft) {
|
||||
draftLocaleCount++;
|
||||
addCounts(localeName, true, draftLanguages, draftCountries, draftNativeLanguages, draftNativeCountries, col);
|
||||
} else {
|
||||
localeCount++;
|
||||
addCounts(localeName, false, languages, countries, nativeLanguages, nativeCountries, col);
|
||||
}
|
||||
if (false) log.println(draft + ", " + localeCount + ", " + languages.size() + ", " + countries.size() + ", "
|
||||
+ draftLocaleCount + ", " + draftLanguages.size() + ", " + draftCountries.size());
|
||||
}
|
||||
draftLanguages.removeAll(languages);
|
||||
for (Iterator it = nativeLanguages.keySet().iterator(); it.hasNext();) {
|
||||
draftNativeLanguages.remove(it.next());
|
||||
}
|
||||
logHtml.println("<html><head>");
|
||||
logHtml.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
logHtml.println("</head><body>");
|
||||
logHtml.println("<p><b>Locales:</b> " + localeCount);
|
||||
logHtml.println("<p><b>Languages:</b> " + languages.size());
|
||||
logHtml.println(showSet(nativeLanguages, transliterate, true));
|
||||
logHtml.println("<p><b>Countries:</b> " + countries.size());
|
||||
logHtml.println(showSet(nativeCountries, transliterate, false));
|
||||
logHtml.println("<p><b>Draft locales:</b> " + draftLocaleCount);
|
||||
logHtml.println("<p><b>Draft languages:</b> " + draftLanguages.size());
|
||||
logHtml.println(showSet(draftNativeLanguages, transliterate, true));
|
||||
logHtml.println("<p><b>Draft countries:</b> " + draftCountries.size());
|
||||
logHtml.println(showSet(draftNativeCountries, transliterate, false));
|
||||
logHtml.println("</body></html>");
|
||||
logHtml.close();
|
||||
}
|
||||
|
||||
static final UnicodeSet NON_LATIN = new UnicodeSet("[^[:latin:][:common:][:inherited:]]");
|
||||
|
||||
/**
|
||||
* @param uloc
|
||||
* @param isDraft TODO
|
||||
* @param draftLanguages
|
||||
* @param draftCountries
|
||||
* @param draftNativeLanguages
|
||||
* @param draftNativeCountries
|
||||
* @param lang
|
||||
* @param country
|
||||
*/
|
||||
private static void addCounts(String localeName, boolean isDraft, Set draftLanguages, Set draftCountries,
|
||||
Map draftNativeLanguages, Map draftNativeCountries, Comparator col) {
|
||||
ULocale uloc = new ULocale(localeName);
|
||||
String lang = localeName, country = "";
|
||||
if (localeName.length() > 3 && localeName.charAt(localeName.length() - 3) == '_') {
|
||||
lang = localeName.substring(0, localeName.length() - 3);
|
||||
country = localeName.substring(localeName.length() - 2);
|
||||
}
|
||||
|
||||
String nativeName, englishName;
|
||||
draftLanguages.add(lang);
|
||||
nativeName = uloc.getDisplayLanguage(uloc);
|
||||
englishName = uloc.getDisplayLanguage(ULocale.ENGLISH);
|
||||
if (!lang.equals("en") && nativeName.equals(englishName)) {
|
||||
log.println((isDraft ? "D" : "") +"\tWarning: in " + localeName + ", display name for " + lang + " equals English: " + nativeName);
|
||||
}
|
||||
draftNativeLanguages.put(fixedTitleCase(uloc, nativeName), localeName);
|
||||
if (!country.equals("")) {
|
||||
draftCountries.add(country);
|
||||
nativeName = getFixedDisplayCountry(uloc, uloc);
|
||||
englishName = getFixedDisplayCountry(uloc, ULocale.ENGLISH);
|
||||
if (!lang.equals("en") && nativeName.equals(englishName)) {
|
||||
log.println((isDraft ? "D" : "") + "\tWarning: in " + localeName + ", display name for " + country + " equals English: " + nativeName);
|
||||
}
|
||||
draftNativeCountries.put(fixedTitleCase(uloc, nativeName), localeName);
|
||||
}
|
||||
}
|
||||
|
||||
static String fixedTitleCase(ULocale uloc, String in) {
|
||||
String result = UCharacter.toTitleCase(uloc, in, null);
|
||||
result = replace(result, "U.s.", "U.S.");
|
||||
result = replace(result, "S.a.r.", "S.A.R.");
|
||||
return result;
|
||||
}
|
||||
/*
|
||||
static void addMapSet(Map m, Object key, Object value, Comparator com) {
|
||||
Set valueSet = (Set) m.get(key);
|
||||
if (valueSet == null) {
|
||||
valueSet = new TreeSet(com);
|
||||
m.put(key, valueSet);
|
||||
}
|
||||
valueSet.add(value);
|
||||
}
|
||||
*/
|
||||
/**
|
||||
* @param uloc
|
||||
* @return
|
||||
*/
|
||||
private static String getFixedDisplayCountry(ULocale uloc, ULocale forLanguage) {
|
||||
String name = uloc.getDisplayCountry(forLanguage);
|
||||
Object trial = fixCountryNames.get(name);
|
||||
if (trial != null) {
|
||||
return (String)trial;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
static Map fixCountryNames = new HashMap();
|
||||
static {
|
||||
fixCountryNames.put("\u0408\u0443\u0433\u043E\u0441\u043B\u0430\u0432\u0438\u0458\u0430", "\u0421\u0440\u0431\u0438\u0458\u0430 \u0438 \u0426\u0440\u043D\u0430 \u0413\u043E\u0440\u0430");
|
||||
fixCountryNames.put("Jugoslavija", "Srbija i Crna Gora");
|
||||
fixCountryNames.put("Yugoslavia", "Serbia and Montenegro");
|
||||
}
|
||||
static {
|
||||
// HACK around lack of Armenian, Ethiopic
|
||||
registerTransliteratorFromFile(options[TZADIR].value, "Latin-Armenian");
|
||||
registerTransliteratorFromFile(options[TZADIR].value, "Latin-Ethiopic");
|
||||
registerTransliteratorFromFile(options[TZADIR].value, "Cyrillic-Latin");
|
||||
registerTransliteratorFromFile(options[TZADIR].value, "Arabic-Latin");
|
||||
}
|
||||
public static final Transliterator toLatin = Transliterator.getInstance("any-latin");
|
||||
|
||||
static void registerTransliteratorFromFile(String dir, String id) {
|
||||
try {
|
||||
String filename = id.replace('-', '_');
|
||||
BufferedReader br = BagFormatter.openUTF8Reader(dir, filename + ".txt");
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
|
||||
buffer.append(line).append("\r\n");
|
||||
}
|
||||
br.close();
|
||||
String rules = buffer.toString();
|
||||
Transliterator t;
|
||||
int pos = id.indexOf('-');
|
||||
String rid;
|
||||
if (pos < 0) {
|
||||
rid = id + "-Any";
|
||||
id = "Any-" + id;
|
||||
} else {
|
||||
rid = id.substring(pos+1) + "-" + id.substring(0, pos);
|
||||
}
|
||||
Transliterator.unregister(id);
|
||||
t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
|
||||
Transliterator.registerInstance(t);
|
||||
|
||||
/*String test = "\u049A\u0430\u0437\u0430\u049B";
|
||||
System.out.println(t.transliterate(test));
|
||||
t = Transliterator.getInstance(id);
|
||||
System.out.println(t.transliterate(test));
|
||||
*/
|
||||
|
||||
Transliterator.unregister(rid);
|
||||
t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
|
||||
Transliterator.registerInstance(t);
|
||||
System.out.println("Registered new Transliterator: " + id + ", " + rid);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw new IllegalArgumentException("Can't open " + dir + ", " + id);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param nativeCountries
|
||||
* @param transliterate TODO
|
||||
* @param isLanguage TODO
|
||||
*/
|
||||
private static String showSet(Map nativeCountries, boolean transliterate, boolean isLanguage) {
|
||||
UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (Iterator it = nativeCountries.keySet().iterator(); it.hasNext();) {
|
||||
String name = (String) it.next();
|
||||
String locale = (String) nativeCountries.get(name);
|
||||
String lang = locale, country = "";
|
||||
if (locale.length() > 3 && locale.charAt(locale.length() - 3) == '_') {
|
||||
lang = locale.substring(0, locale.length() - 3);
|
||||
country = locale.substring(locale.length() - 2);
|
||||
}
|
||||
|
||||
if (result.length() != 0) {
|
||||
result.append(", ");
|
||||
}
|
||||
String title = "";
|
||||
if (isLanguage) {
|
||||
title = lang + ", " + new ULocale(locale).getDisplayLanguage(ULocale.ENGLISH);
|
||||
} else {
|
||||
title = country + ", " + getFixedDisplayCountry(new ULocale(locale), ULocale.ENGLISH);
|
||||
}
|
||||
if (transliterate && NON_LATIN.containsSome(name) && !lang.equals("ja")) {
|
||||
String transName = fixedTitleCase(ULocale.ENGLISH, toLatin.transliterate(name));
|
||||
if (NON_LATIN.containsSome(transName)) {
|
||||
log.println("Can't transliterate " + name + ": " + transName);
|
||||
} else {
|
||||
title += ", " + transName;
|
||||
}
|
||||
}
|
||||
String before = "", after = "";
|
||||
if (title.length() != 0) {
|
||||
before = "<span title=\'" + BagFormatter.toHTML.transliterate(title) + "'>";
|
||||
after = "</span>";
|
||||
}
|
||||
boolean isBIDI = BIDI_R.containsSome(name);
|
||||
if (isBIDI) result.append('\u200E');
|
||||
result.append(before).append(BagFormatter.toHTML.transliterate(name)).append(after);
|
||||
if (isBIDI) result.append('\u200E');
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static class DraftChecker {
|
||||
String dir;
|
||||
Map cache = new HashMap();
|
||||
Object TRUE = new Object();
|
||||
Object FALSE = new Object();
|
||||
public DraftChecker(String dir) {
|
||||
this.dir = dir;
|
||||
}
|
||||
|
||||
public boolean isDraft(String localeName) {
|
||||
Object check = cache.get(localeName);
|
||||
if (check != null) {
|
||||
return check == TRUE;
|
||||
}
|
||||
BufferedReader pw = null;
|
||||
boolean result = true;
|
||||
try {
|
||||
pw = BagFormatter.openUTF8Reader(dir, localeName + ".xml");
|
||||
while (true) {
|
||||
String line = pw.readLine();
|
||||
assert (line != null); // should never get here
|
||||
if (line.indexOf("<ldml") >= 0) {
|
||||
if (line.indexOf("draft") >= 0) {
|
||||
check = TRUE;
|
||||
} else {
|
||||
check = FALSE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
pw.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw new IllegalArgumentException("Failure on " + localeName + ": " + dir + localeName + ".xml");
|
||||
}
|
||||
cache.put(localeName, check);
|
||||
return check == TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
private static void compareAvailable() {
|
||||
ULocale[] cols = Collator.getAvailableULocales();
|
||||
Locale[] alocs = NumberFormat.getAvailableLocales();
|
||||
@ -137,11 +424,29 @@ public class GenerateCldrTests {
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param sLocs
|
||||
*/
|
||||
private static void showLocales(Set sLocs) {
|
||||
for (Iterator it = sLocs.iterator(); it.hasNext();) {
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static void checkLocaleNames() {
|
||||
ULocale[] locales = ULocale.getAvailableLocales();
|
||||
for (int i = 0; i < locales.length; ++i) {
|
||||
if (!hasLocalizedCountryFor(ULocale.ENGLISH, locales[i])
|
||||
|| !hasLocalizedLanguageFor(ULocale.ENGLISH, locales[i])
|
||||
|| !hasLocalizedCountryFor(locales[i], locales[i])
|
||||
|| !hasLocalizedLanguageFor(locales[i], locales[i])) {
|
||||
log.print("FAILURE\t");
|
||||
} else {
|
||||
log.print(" \t");
|
||||
}
|
||||
log.println(locales[i] + "\t" + locales[i].getDisplayName(ULocale.ENGLISH) + "\t" + locales[i].getDisplayName(locales[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sLocs
|
||||
*/
|
||||
private static void showLocales(Set sLocs) {
|
||||
for (Iterator it = sLocs.iterator(); it.hasNext();) {
|
||||
String s = (String) it.next();
|
||||
log.println(s + "\t" + ULocale.getDisplayLanguage(s,"en"));
|
||||
}
|
||||
@ -239,9 +544,14 @@ public class GenerateCldrTests {
|
||||
void generate(String pat) throws Exception {
|
||||
cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
|
||||
cldrOthers.show();
|
||||
|
||||
//if (true) return;
|
||||
cldrCollations = new CldrCollations(options[SOURCEDIR].value + "collation" + File.separator, pat);
|
||||
cldrCollations.show();
|
||||
|
||||
cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
|
||||
cldrOthers.show();
|
||||
|
||||
getLocaleList();
|
||||
|
||||
Matcher m = Pattern.compile(pat).matcher("");
|
||||
@ -600,8 +910,57 @@ public class GenerateCldrTests {
|
||||
return cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance(loc2)); // Collator.getInstance(loc1).equals(Collator.getInstance(loc2));
|
||||
}
|
||||
};
|
||||
static ULocale zhHack = new ULocale("zh"); // FIXME hack for zh
|
||||
|
||||
DataShower CollationShower = new DataShower() {
|
||||
public void show(ULocale locale, Collection others) {
|
||||
if (locale.equals(zhHack)) return;
|
||||
|
||||
showLocales("collation", others);
|
||||
|
||||
Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale);
|
||||
|
||||
UnicodeSet tailored = col.getTailoredSet();
|
||||
if (locale.getLanguage().equals("zh")) {
|
||||
tailored.addAll(new UnicodeSet("[[a-z]-[v]]"));
|
||||
log.println("HACK for Pinyin");
|
||||
}
|
||||
tailored = createCaseClosure(tailored);
|
||||
tailored = nfc(tailored);
|
||||
//System.out.println(tailored.toPattern(true));
|
||||
|
||||
UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE);
|
||||
// add all the exemplars
|
||||
if (false)
|
||||
for (Iterator it = others.iterator(); it.hasNext();) {
|
||||
exemplars.addAll(getExemplarSet((ULocale) it.next(),
|
||||
UnicodeSet.CASE));
|
||||
}
|
||||
|
||||
exemplars = createCaseClosure(exemplars);
|
||||
exemplars = nfc(exemplars);
|
||||
//System.out.println(exemplars.toPattern(true));
|
||||
tailored.addAll(exemplars);
|
||||
//UnicodeSet tailoredMinusHan = new
|
||||
// UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET);
|
||||
if (!exemplars.containsAll(tailored)) {
|
||||
//BagFormatter bf = new BagFormatter();
|
||||
log.println("In Tailored, but not Exemplar; Locale: " + locale
|
||||
+ "\t" + locale.getDisplayName());
|
||||
log.println(new UnicodeSet(tailored).removeAll(exemplars)
|
||||
.toPattern(false));
|
||||
//bf.(log,"tailored", tailored, "exemplars", exemplars);
|
||||
log.flush();
|
||||
}
|
||||
tailored.addAll(new UnicodeSet("[\\ .02{12}]"));
|
||||
tailored.removeAll(SKIP_COLLATION_SET);
|
||||
|
||||
SortedBag bag = new SortedBag(col);
|
||||
doCollationResult(col, tailored, bag);
|
||||
out.println(" </collation>");
|
||||
}
|
||||
};
|
||||
/*
|
||||
public void show(ULocale locale, Collection others) {
|
||||
showLocales("collation", others);
|
||||
|
||||
@ -641,6 +1000,7 @@ public class GenerateCldrTests {
|
||||
doCollationResult(col, tailored, bag);
|
||||
out.println(" </collation>");
|
||||
}};
|
||||
*/
|
||||
static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet(
|
||||
"[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]");
|
||||
|
||||
@ -804,6 +1164,13 @@ public class GenerateCldrTests {
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String replace(String source, String pattern, String replacement) {
|
||||
// dumb code for now
|
||||
for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
|
||||
source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
static class CldrCollations {
|
||||
Set validLocales = new TreeSet();
|
||||
@ -867,13 +1234,6 @@ public class GenerateCldrTests {
|
||||
}
|
||||
}
|
||||
|
||||
public static String replace(String source, String pattern, String replacement) {
|
||||
// dumb code for now
|
||||
for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
|
||||
source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
|
||||
}
|
||||
return source;
|
||||
}
|
||||
static Transliterator fromHex = Transliterator.getInstance("hex-any");
|
||||
|
||||
private void getCollationRules(String locale) throws Exception {
|
||||
@ -886,7 +1246,10 @@ public class GenerateCldrTests {
|
||||
Map types_rules = new TreeMap();
|
||||
locale_types_rules.put(locale, types_rules);
|
||||
for (Resource current = resource.first; current != null; current = current.next) {
|
||||
//System.out.println(current.name);
|
||||
if (current.name == null) {
|
||||
log.println("Collation: null name found in " + locale);
|
||||
continue;
|
||||
}
|
||||
if (current instanceof ICUResourceWriter.ResourceTable) {
|
||||
ICUResourceWriter.ResourceTable table = (ICUResourceWriter.ResourceTable) current;
|
||||
for (Resource current2 = table.first; current2 != null; current2 = current2.next) {
|
||||
@ -905,7 +1268,7 @@ public class GenerateCldrTests {
|
||||
String rules = fromHex.transliterate(foo.val);
|
||||
RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, rules);
|
||||
if (fixed != null) {
|
||||
log.println("Rules for: " + locale + "," + current.name);
|
||||
log.println("Rules for: " + locale + ", " + current.name);
|
||||
log.println(rules);
|
||||
if (!rules.equals(foo.val)) {
|
||||
log.println("Original Rules from Ram: ");
|
||||
|
@ -90,7 +90,8 @@ public class GenerateSidewaysView {
|
||||
SKIP = 5,
|
||||
TZADIR = 6,
|
||||
NONVALIDATING = 7,
|
||||
SHOW_DTD = 8;
|
||||
SHOW_DTD = 8,
|
||||
TRANSLIT = 9;
|
||||
|
||||
private static final String NEWLINE = "\n";
|
||||
|
||||
@ -104,11 +105,11 @@ public class GenerateSidewaysView {
|
||||
UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
|
||||
UOption.create("nonvalidating", 'n', UOption.NO_ARG),
|
||||
UOption.create("dtd", 'w', UOption.NO_ARG),
|
||||
UOption.create("transliterate", 'y', UOption.NO_ARG),
|
||||
};
|
||||
private static String timeZoneAliasDir = null;
|
||||
|
||||
public static void main(String[] args) throws SAXException, IOException {
|
||||
|
||||
UOption.parseArgs(args, options);
|
||||
|
||||
Matcher skipper = Pattern.compile(options[SKIP].value).matcher("");
|
||||
@ -1553,6 +1554,10 @@ public class GenerateSidewaysView {
|
||||
}
|
||||
*/
|
||||
void showCacheData() throws IOException {
|
||||
UnicodeSet untransliteratedCharacters = new UnicodeSet();
|
||||
Set translitErrors = new TreeSet();
|
||||
GenerateCldrTests.DraftChecker dc = new GenerateCldrTests.DraftChecker(options[SOURCEDIR].value);
|
||||
dc.isDraft("en");
|
||||
writeStyleSheet();
|
||||
PrintWriter out = null;
|
||||
String lastChainName = "";
|
||||
@ -1597,18 +1602,35 @@ public class GenerateSidewaysView {
|
||||
files.addAll(remainingFiles);
|
||||
dataStyle = " class='nodata'";
|
||||
}
|
||||
|
||||
String extra = "";
|
||||
if (data.string != null && options[TRANSLIT].doesOccur
|
||||
&& GenerateCldrTests.NON_LATIN.containsSome(data.string)) {
|
||||
try {
|
||||
extra = GenerateCldrTests.toLatin.transliterate(data.string);
|
||||
untransliteratedCharacters.addAll(extra);
|
||||
if (extra.equals(data.string)) extra = "";
|
||||
else extra = "<br>(\"" + BagFormatter.toHTML.transliterate(extra) + "\")";
|
||||
} catch (RuntimeException e) {
|
||||
translitErrors.add(e.getMessage());
|
||||
}
|
||||
}
|
||||
out.print("<tr><th" + dataStyle +
|
||||
(lineCounter == 1 ? " width='20%'" : "")
|
||||
+ ">\"" + data + "\"</th><td>");
|
||||
+ ">\"" + data + "\""
|
||||
+ extra
|
||||
+ "</th><td>");
|
||||
boolean first = true;
|
||||
for (Iterator it3 = files.iterator(); it3.hasNext();) {
|
||||
if (first) first = false;
|
||||
else out.print(" ");
|
||||
String localeID = (String)it3.next();
|
||||
boolean emphasize = localeID.equals("root") || localeID.indexOf('_') >= 0;
|
||||
if (dc.isDraft(localeID)) out.print("<i>");
|
||||
if (emphasize) out.print("<b>");
|
||||
out.print("\u00B7" + localeID + "\u00B7");
|
||||
if (emphasize) out.print("</b>");
|
||||
if (dc.isDraft(localeID)) out.print("</i>");
|
||||
}
|
||||
out.println("</td></tr>");
|
||||
}
|
||||
@ -1622,6 +1644,15 @@ public class GenerateSidewaysView {
|
||||
}
|
||||
writeIndex();
|
||||
tripleData.writeData();
|
||||
untransliteratedCharacters.retainAll(GenerateCldrTests.NON_LATIN);
|
||||
log.println("Untranslated Characters*: " + untransliteratedCharacters.toPattern(false));
|
||||
log.println("Untranslated Characters* (hex): " + untransliteratedCharacters.toPattern(true));
|
||||
untransliteratedCharacters.closeOver(UnicodeSet.CASE);
|
||||
log.println("Untranslated Characters: " + untransliteratedCharacters.toPattern(false));
|
||||
log.println("Untranslated Characters (hex): " + untransliteratedCharacters.toPattern(true));
|
||||
for (Iterator it = translitErrors.iterator(); it.hasNext();) {
|
||||
log.println(it.next());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1651,8 +1682,9 @@ public class GenerateSidewaysView {
|
||||
"Each value is listed under the field designator (in XML XPath format), " +
|
||||
"followed by all the locales that use it. " +
|
||||
"Locales are omitted if the value would be the same as the parent's. " +
|
||||
"The locales are listed in the format: \u00B7aa\u00B7 for searching. " +
|
||||
"The value appears in red if it is the same as the root. </p>");
|
||||
"The locales are listed in the format: \u00B7aa\u00B7 for searching. " +
|
||||
"The value appears in red if it is the same as the root. " +
|
||||
"Draft locales are italic-gray; territory locales are bold.</p>");
|
||||
out.println("<table>");
|
||||
return out;
|
||||
}
|
||||
@ -1661,6 +1693,7 @@ public class GenerateSidewaysView {
|
||||
out.println(".head { font-weight:bold; background-color:#DDDDFF }");
|
||||
out.println("td, th { border: 1px solid #0000FF; text-align }");
|
||||
out.println("th { width:10% }");
|
||||
out.println("i { color: gray }");
|
||||
out.println(".nodata { background-color:#FF0000 }");
|
||||
out.println("table {margin-top: 1em}");
|
||||
out.close();
|
||||
|
@ -38,6 +38,8 @@ import com.ibm.icu.text.DateFormat;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.SimpleDateFormat;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
|
||||
@ -201,37 +203,53 @@ public class TestCldr {
|
||||
|
||||
static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific"};
|
||||
|
||||
|
||||
// ============ Handler for Collation ============
|
||||
static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
|
||||
|
||||
static String remove(String in, UnicodeSet toRemove) {
|
||||
int cp;
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(in, i);
|
||||
if (!toRemove.contains(cp)) UTF16.append(result, cp);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
{
|
||||
addHandler("collation", new Handler() {
|
||||
public void handleResult(ULocale currentLocale, String value) {
|
||||
Collator col = Collator.getInstance(currentLocale);
|
||||
String lastLine = "";
|
||||
int count = 0;
|
||||
for (int pos = 0; pos < value.length();) {
|
||||
int nextPos = value.indexOf('\n', pos);
|
||||
if (nextPos < 0)
|
||||
nextPos = value.length();
|
||||
String line = value.substring(pos, nextPos).trim(); // HACK for SAX
|
||||
if (line.length() != 0) { // HACK for SAX
|
||||
int comp = col.compare(lastLine, line);
|
||||
if (comp > 0) {
|
||||
failures++;
|
||||
logln("\tLine " + (count + 1) + "\tFailure: " + showString(lastLine) + " should be leq " + showString(line));
|
||||
} else if (DEBUG) {
|
||||
System.out.println("OK: " + line);
|
||||
}
|
||||
}
|
||||
pos = nextPos + 1;
|
||||
lastLine = line;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
});
|
||||
addHandler("collation", new Handler() {
|
||||
public void handleResult(ULocale currentLocale, String value) {
|
||||
Collator col = Collator.getInstance(currentLocale);
|
||||
String lastLine = "";
|
||||
int count = 0;
|
||||
for (int pos = 0; pos < value.length();) {
|
||||
int nextPos = value.indexOf('\n', pos);
|
||||
if (nextPos < 0)
|
||||
nextPos = value.length();
|
||||
String line = value.substring(pos, nextPos);
|
||||
line = remove(line, controlsAndSpace); // HACK for SAX
|
||||
if (line.trim().length() != 0) { // HACK for SAX
|
||||
int comp = col.compare(lastLine, line);
|
||||
if (comp > 0) {
|
||||
failures++;
|
||||
logln("\tLine " + (count + 1) + "\tFailure: "
|
||||
+ showString(lastLine) + " should be leq "
|
||||
+ showString(line));
|
||||
} else if (DEBUG) {
|
||||
System.out.println("OK: " + line);
|
||||
}
|
||||
lastLine = line;
|
||||
}
|
||||
pos = nextPos + 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// ============ Handler for Numbers ============
|
||||
addHandler("number", new Handler() {
|
||||
public void handleResult(ULocale locale, String result) {
|
||||
addHandler("number", new Handler() {
|
||||
public void handleResult(ULocale locale, String result) {
|
||||
NumberFormat nf = null;
|
||||
double v = Double.NaN;
|
||||
for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
|
||||
|
@ -314,9 +314,10 @@ public class MakeUnicodeFiles {
|
||||
addValueComments(property, value, comments);
|
||||
comments = "";
|
||||
if (line.startsWith("Generate:")) {
|
||||
filesToDo = Utility.split(lineValue, ' ');
|
||||
if (filesToDo.length == 0) {
|
||||
filesToDo = new String[] {""};
|
||||
filesToDo = Utility.split(lineValue.trim(), ' ');
|
||||
if (filesToDo.length == 0
|
||||
|| (filesToDo.length == 1 && filesToDo[0].length() == 0)) {
|
||||
filesToDo = new String[] {".*"};
|
||||
}
|
||||
} else if (line.startsWith("DeltaVersion:")) {
|
||||
dVersion = Integer.parseInt(lineValue);
|
||||
@ -476,24 +477,22 @@ public class MakeUnicodeFiles {
|
||||
}
|
||||
|
||||
public static void generateFile() throws IOException {
|
||||
String[] lines = new String[2];
|
||||
Utility.filesAreIdentical("C:\\DATA\\UCD\\4.0.1-Update\\CaseFolding-4.0.1.txt",
|
||||
"C:\\DATA\\GEN\\DerivedData\\CaseFolding-4.1.0d13.txt", lines);
|
||||
for (int i = 0; i < Format.theFormat.filesToDo.length; ++i) {
|
||||
String fileName =
|
||||
Format.theFormat.filesToDo[i].trim().toLowerCase(
|
||||
Locale.ENGLISH);
|
||||
String fileNamePattern =
|
||||
Format.theFormat.filesToDo[i].trim(); // .toLowerCase(Locale.ENGLISH);
|
||||
Matcher matcher = Pattern.compile(fileNamePattern, Pattern.CASE_INSENSITIVE).matcher("");
|
||||
Iterator it = Format.theFormat.getFiles().iterator();
|
||||
boolean gotOne = false;
|
||||
while (it.hasNext()) {
|
||||
String propname = (String) it.next();
|
||||
if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue;
|
||||
if (!matcher.reset(propname).matches()) continue;
|
||||
//if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue;
|
||||
generateFile(propname);
|
||||
gotOne = true;
|
||||
}
|
||||
if (!gotOne) {
|
||||
throw new IllegalArgumentException(
|
||||
"Non-matching file name: " + fileName);
|
||||
"Non-matching file name: " + fileNamePattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -715,7 +714,8 @@ public class MakeUnicodeFiles {
|
||||
List propList = Format.theFormat.getPropertiesFromFile(filename);
|
||||
for (Iterator propIt = propList.iterator(); propIt.hasNext();) {
|
||||
BagFormatter bf = new BagFormatter(toolFactory);
|
||||
UnicodeProperty prop = toolFactory.getProperty((String) propIt.next());
|
||||
String nextPropName = (String) propIt.next();
|
||||
UnicodeProperty prop = toolFactory.getProperty(nextPropName);
|
||||
String name = prop.getName();
|
||||
System.out.println("Property: " + name + "; " + prop.getTypeName(prop.getType()));
|
||||
pw.println();
|
||||
|
@ -1,6 +1,18 @@
|
||||
Generate:
|
||||
DeltaVersion: 7
|
||||
|
||||
File: GraphemeClusterBreakProperty
|
||||
Property: Grapheme_Cluster_Break
|
||||
Format: skipValue=Other
|
||||
|
||||
File: WordBreakProperty
|
||||
Property: Word_Break
|
||||
Format: skipValue=Other
|
||||
|
||||
File: SentenceBreakProperty
|
||||
Property: Sentence_Break
|
||||
Format: skipValue=Other
|
||||
|
||||
File: Blocks
|
||||
Property: Block
|
||||
# Note: When comparing block names, casing, whitespace, hyphens,
|
||||
@ -41,6 +53,9 @@ Value: 3.2
|
||||
Value: 4.0
|
||||
# Newly assigned in Unicode 4.0.0 (April, 2003)
|
||||
|
||||
Value: 4.1
|
||||
# Newly assigned in Unicode 4.1.0 (XXX, 2005)
|
||||
|
||||
File: extracted/DerivedBidiClass
|
||||
Property: Bidi_Class
|
||||
# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
|
||||
@ -86,7 +101,7 @@ Property: ID_Start
|
||||
Property: ID_Continue
|
||||
# Derived Property: ID_Continue
|
||||
# Characters that can continue an identifier.
|
||||
# Generated from: ID_Start + Mn+Mc+Nd+Pc
|
||||
# Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue
|
||||
# NOTE: Cf characters should be filtered out.
|
||||
|
||||
|
||||
@ -109,7 +124,8 @@ Property: XID_Continue
|
||||
|
||||
Property: Default_Ignorable_Code_Point
|
||||
# Derived Property: Default_Ignorable_Code_Point
|
||||
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters - White_Space - Annotation_characters
|
||||
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters
|
||||
# - White_Space - FFF9..FFFB (Annotation Characters)
|
||||
|
||||
Property: Grapheme_Extend
|
||||
# Derived Property: Grapheme_Extend
|
||||
@ -307,6 +323,9 @@ Property: Other_ID_Continue
|
||||
Property: STerm
|
||||
|
||||
Property: Variation_Selector
|
||||
Property: Pattern_White_Space
|
||||
Property: Pattern_Syntax
|
||||
|
||||
|
||||
File: PropertyAliases
|
||||
Property: SPECIAL
|
||||
@ -315,7 +334,6 @@ File: PropertyValueAliases
|
||||
Property: SPECIAL
|
||||
|
||||
File: Scripts
|
||||
|
||||
Property: Script
|
||||
Format: nameStyle=none skipUnassigned=Common
|
||||
|
||||
|
@ -12,6 +12,7 @@ import java.util.TreeSet;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
@ -49,8 +50,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getName(codepoint);
|
||||
}
|
||||
}.setMain("Name", "na", UnicodeProperty.MISC, version)
|
||||
.setValues("<string>"));
|
||||
}.setValues("<string>")
|
||||
.setMain("Name", "na", UnicodeProperty.MISC, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
@ -63,24 +64,24 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
return ucd.blockData;
|
||||
}
|
||||
}.setMain("Block", "blk", UnicodeProperty.CATALOG, version)
|
||||
.setValues(ucd.getBlockNames(null)));
|
||||
}.setValues(ucd.getBlockNames(null))
|
||||
.setMain("Block", "blk", UnicodeProperty.CATALOG, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getBidiMirror(codepoint);
|
||||
}
|
||||
}.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version)
|
||||
.setValues("<string>"));
|
||||
}.setValues("<string>")
|
||||
.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
|
||||
}
|
||||
}.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version)
|
||||
.setValues("<string>"));
|
||||
}.setValues("<string>")
|
||||
.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
@ -121,8 +122,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO, YES_NO));
|
||||
}.setValues(LONG_YES_NO, YES_NO)
|
||||
.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
@ -133,8 +134,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
|
||||
}.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE)
|
||||
.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
@ -145,8 +146,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO, YES_NO));
|
||||
}.setValues(LONG_YES_NO, YES_NO)
|
||||
.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
@ -157,8 +158,11 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
|
||||
}.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE)
|
||||
.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
@ -207,6 +211,102 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
add(new ToolUnicodeProperty(name));
|
||||
}
|
||||
|
||||
add(new UnicodeProperty.UnicodeMapProperty() {
|
||||
{
|
||||
unicodeMap = new UnicodeMap();
|
||||
unicodeMap.setErrorOnReset(true);
|
||||
unicodeMap.put(0xD, "CR");
|
||||
unicodeMap.put(0xA, "LF");
|
||||
UnicodeProperty cat = getProperty("General_Category");
|
||||
UnicodeSet temp = cat.getSet("Line_Separator")
|
||||
.addAll(cat.getSet("Paragraph_Separator"))
|
||||
.addAll(cat.getSet("Control"))
|
||||
.addAll(cat.getSet("Format"))
|
||||
.remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
|
||||
unicodeMap.putAll(temp, "Control");
|
||||
UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
|
||||
unicodeMap.putAll(graphemeExtend,"Extend");
|
||||
UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
|
||||
unicodeMap.putAll(hangul.getSet("L"),"L");
|
||||
unicodeMap.putAll(hangul.getSet("V"),"V");
|
||||
unicodeMap.putAll(hangul.getSet("T"),"T");
|
||||
unicodeMap.putAll(hangul.getSet("LV"),"LV");
|
||||
unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
|
||||
unicodeMap.setMissing("Other");
|
||||
}
|
||||
}.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
add(new UnicodeProperty.UnicodeMapProperty() {
|
||||
{
|
||||
unicodeMap = new UnicodeMap();
|
||||
unicodeMap.setErrorOnReset(true);
|
||||
UnicodeProperty cat = getProperty("General_Category");
|
||||
unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
|
||||
UnicodeProperty script = getProperty("Script");
|
||||
unicodeMap.putAll(script.getSet("Katakana")
|
||||
.addAll(new UnicodeSet("[\u3031\u3032\u3033\u3034\u3035\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]"))
|
||||
, "Katakana");
|
||||
Object foo = unicodeMap.getSet("Katakana");
|
||||
UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
|
||||
unicodeMap.putAll(getProperty("Alphabetic").getSet("true")
|
||||
.add(0xA0).add(0x05F3)
|
||||
.removeAll(getProperty("Ideographic").getSet("true"))
|
||||
.removeAll(unicodeMap.getSet("Katakana"))
|
||||
.removeAll(script.getSet("Thai"))
|
||||
.removeAll(script.getSet("Lao"))
|
||||
.removeAll(script.getSet("Hiragana"))
|
||||
.removeAll(graphemeExtend),
|
||||
"ALetter");
|
||||
unicodeMap.putAll(new UnicodeSet("[\\u0027\\u00B7\\u05F4\\u2019\\u2027\\u003A]")
|
||||
,"MidLetter");
|
||||
UnicodeProperty lineBreak = getProperty("Line_Break");
|
||||
unicodeMap.putAll(lineBreak.getSet("Infix_Numeric")
|
||||
.remove(0x003A), "MidNum");
|
||||
unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
|
||||
unicodeMap.putAll(cat.getSet("Connector_Punctuation").remove(0x30FB).remove(0xFF65), "Numeric");
|
||||
unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
|
||||
unicodeMap.setMissing("Other");
|
||||
}
|
||||
}.setMain("Word_Break", "WB", UnicodeProperty.ENUMERATED, version));
|
||||
|
||||
add(new UnicodeProperty.UnicodeMapProperty() {
|
||||
{
|
||||
unicodeMap = new UnicodeMap();
|
||||
unicodeMap.setErrorOnReset(true);
|
||||
unicodeMap.putAll(new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]"), "Sep");
|
||||
UnicodeProperty cat = getProperty("General_Category");
|
||||
unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
|
||||
unicodeMap.putAll(getProperty("Whitespace").getSet("true")
|
||||
.removeAll(unicodeMap.getSet("Sep"))
|
||||
.remove(0xA0), "Sp");
|
||||
UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
|
||||
unicodeMap.putAll(getProperty("Lowercase").getSet("true")
|
||||
.removeAll(graphemeExtend), "Lower");
|
||||
unicodeMap.putAll(getProperty("Uppercase").getSet("true")
|
||||
.addAll(cat.getSet("Titlecase_Letter"))
|
||||
, "Upper");
|
||||
UnicodeSet temp = getProperty("Alphabetic").getSet("true")
|
||||
.add(0xA0).add(0x5F3)
|
||||
.removeAll(unicodeMap.getSet("Lower"))
|
||||
.removeAll(unicodeMap.getSet("Upper"))
|
||||
.removeAll(graphemeExtend);
|
||||
unicodeMap.putAll(temp, "OLetter");
|
||||
UnicodeProperty lineBreak = getProperty("Line_Break");
|
||||
unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
|
||||
unicodeMap.put(0x002E, "ATerm");
|
||||
unicodeMap.putAll(getProperty("STerm").getSet("true")
|
||||
.removeAll(unicodeMap.getSet("ATerm")), "STerm");
|
||||
unicodeMap.putAll(cat.getSet("Open_Punctuation")
|
||||
.addAll(cat.getSet("Close_Punctuation"))
|
||||
.addAll(lineBreak.getSet("Quotation"))
|
||||
.remove(0x05F3)
|
||||
.removeAll(unicodeMap.getSet("ATerm"))
|
||||
.removeAll(unicodeMap.getSet("STerm"))
|
||||
, "Close");
|
||||
unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
|
||||
unicodeMap.setMissing("Other");
|
||||
}
|
||||
}.setMain("Sentence_Break", "SB", UnicodeProperty.ENUMERATED, version));
|
||||
}
|
||||
|
||||
static String[] YES_NO_MAYBE = {"N", "M", "Y"};
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2004/11/12 23:17:15 $
|
||||
* $Revision: 1.35 $
|
||||
* $Date: 2004/11/13 23:10:32 $
|
||||
* $Revision: 1.36 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -1579,8 +1579,13 @@ to guarantee identifier closure.
|
||||
|
||||
//T = Mc + (Cf - ZWNJ - ZWJ)
|
||||
int cp = uData.codePoint;
|
||||
byte old = uData.joiningType;
|
||||
byte cat = uData.generalCategory;
|
||||
byte old = uData.joiningType;
|
||||
byte cat = uData.generalCategory;
|
||||
if (cat == Me) {
|
||||
if (compositeVersion >= 0x40100) {
|
||||
uData.joiningType = JT_T;
|
||||
}
|
||||
}
|
||||
//if (cp == 0x200D) {
|
||||
// uData.joiningType = JT_C;
|
||||
//} else
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2004/11/12 23:17:15 $
|
||||
* $Revision: 1.28 $
|
||||
* $Date: 2004/11/13 23:10:32 $
|
||||
* $Revision: 1.29 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -154,6 +154,8 @@ final class UCD_Names implements UCD_Types {
|
||||
"STerm",
|
||||
"Variation_Selector",
|
||||
"Other_ID_Continue",
|
||||
"Pattern_White_Space",
|
||||
"Pattern_Syntax"
|
||||
};
|
||||
|
||||
static final String[] SHORT_BP = {
|
||||
@ -191,7 +193,9 @@ final class UCD_Names implements UCD_Types {
|
||||
"OIDS",
|
||||
"STerm",
|
||||
"VS",
|
||||
"OIDC"
|
||||
"OIDC",
|
||||
"PatWS",
|
||||
"PatSyn"
|
||||
};
|
||||
|
||||
/*
|
||||
@ -253,7 +257,7 @@ final class UCD_Names implements UCD_Types {
|
||||
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
|
||||
"SA", "AI", "B2", "SG", "ZW",
|
||||
"NL",
|
||||
"WJ",
|
||||
"WJ", "JL", "JV", "JT", "H2", "H3"
|
||||
//"JL",
|
||||
//"JV",
|
||||
//"JT",
|
||||
@ -269,7 +273,7 @@ final class UCD_Names implements UCD_Types {
|
||||
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
|
||||
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
|
||||
"Next_Line",
|
||||
"Word_Joiner"
|
||||
"Word_Joiner", "JL", "JV", "JT", "H2", "H3"
|
||||
//"Leading_Jamo",
|
||||
//"Vowel_Jamo",
|
||||
//"Trailing_Jamo",
|
||||
@ -412,7 +416,8 @@ final class UCD_Names implements UCD_Types {
|
||||
"3.0",
|
||||
"3.1",
|
||||
"3.2",
|
||||
"4.0"
|
||||
"4.0",
|
||||
"4.1",
|
||||
};
|
||||
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.28 $
|
||||
* $Date: 2004/11/13 23:10:32 $
|
||||
* $Revision: 1.29 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
|
||||
|
||||
public interface UCD_Types {
|
||||
|
||||
static final byte BINARY_FORMAT = 15; // bumped if binary format of UCD changes. Forces rebuild
|
||||
static final byte BINARY_FORMAT = 16; // bumped if binary format of UCD changes. Forces rebuild
|
||||
|
||||
public static final String BASE_DIR = "C:\\DATA\\";
|
||||
public static final String UCD_DIR = BASE_DIR + "UCD\\";
|
||||
@ -214,7 +214,9 @@ public interface UCD_Types {
|
||||
Sentence_Terminal = 32,
|
||||
Variation_Selector = 33,
|
||||
Other_ID_Continue = 34,
|
||||
LIMIT_BINARY_PROPERTIES = 35;
|
||||
Pattern_White_Space = 35,
|
||||
Pattern_Syntax = 36,
|
||||
LIMIT_BINARY_PROPERTIES = 37;
|
||||
|
||||
/*
|
||||
static final int
|
||||
@ -247,10 +249,15 @@ public interface UCD_Types {
|
||||
LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
|
||||
LB_NL = 29,
|
||||
LB_WJ = 30,
|
||||
LB_JL = 31,
|
||||
LB_JV = 32,
|
||||
LB_JT = 33,
|
||||
LB_H2 = 34,
|
||||
LB_H3 = 35,
|
||||
//LB_JL = 29,
|
||||
//LB_JV = 30,
|
||||
//LB_JT = 31,
|
||||
LIMIT_LINE_BREAK = 31,
|
||||
LIMIT_LINE_BREAK = 36,
|
||||
LB_LIMIT = LIMIT_LINE_BREAK;
|
||||
|
||||
// east asian width
|
||||
@ -394,7 +401,8 @@ public interface UCD_Types {
|
||||
AGE31 = 5,
|
||||
AGE32 = 6,
|
||||
AGE40 = 7,
|
||||
LIMIT_AGE = 8;
|
||||
AGE41 = 8,
|
||||
LIMIT_AGE = 9;
|
||||
|
||||
static final String[] AGE_VERSIONS = {
|
||||
"?",
|
||||
@ -404,7 +412,8 @@ public interface UCD_Types {
|
||||
"3.0.0",
|
||||
"3.1.0",
|
||||
"3.2.0",
|
||||
"4.0.0"
|
||||
"4.0.0",
|
||||
"4.1.0"
|
||||
};
|
||||
|
||||
public static byte
|
||||
|
@ -71,7 +71,7 @@ $Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
|
||||
$Lowercase = [$GC:Ll $Other_Lowercase]
|
||||
$Uppercase = [$GC:Lu $Other_Uppercase]
|
||||
$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
|
||||
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
|
||||
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc] $Other_ID_Continue
|
||||
$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
|
||||
$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
|
||||
$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2004/11/12 23:17:15 $
|
||||
* $Revision: 1.45 $
|
||||
* $Date: 2004/11/13 23:10:32 $
|
||||
* $Revision: 1.46 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -673,6 +673,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
|
||||
private static final String[] searchPath = {
|
||||
"EXTRAS",
|
||||
"4.1.0",
|
||||
"4.0.1",
|
||||
"4.0.0",
|
||||
"3.2.0",
|
||||
|
Loading…
Reference in New Issue
Block a user