Fixed Fractional UCA for surrogates
X-SVN-Rev: 6504
This commit is contained in:
parent
02f44eee5c
commit
73ed7bfac5
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2001/10/26 23:32:03 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2001/10/31 00:01:28 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -236,7 +236,7 @@ final public class UCA implements Comparator {
|
||||
|
||||
// add weights
|
||||
char w = getPrimary(ce);
|
||||
if (DEBUG) System.out.println("\tCE: " + hex(ce));
|
||||
if (DEBUG) System.out.println("\tCE: " + Utility.hex(ce));
|
||||
if (w != 0) primaries.append(w);
|
||||
|
||||
w = getSecondary(ce);
|
||||
@ -490,7 +490,10 @@ final public class UCA implements Comparator {
|
||||
/**
|
||||
* Return the type of the CE
|
||||
*/
|
||||
public byte getCEType(char ch) {
|
||||
public byte getCEType(int ch) {
|
||||
|
||||
if (ch > 0xFFFF) ch = UTF16.getLeadSurrogate(ch); // first if expands
|
||||
|
||||
int ce = collationElements[ch];
|
||||
if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return NORMAL_CE;
|
||||
if (ce == UNSUPPORTED) {
|
||||
@ -586,7 +589,7 @@ final public class UCA implements Comparator {
|
||||
result.append("|");
|
||||
needSep = true;
|
||||
} else {
|
||||
result.append(hex(ch));
|
||||
result.append(Utility.hex(ch));
|
||||
needSep = true;
|
||||
}
|
||||
}
|
||||
@ -598,9 +601,9 @@ final public class UCA implements Comparator {
|
||||
* Produces a human-readable string for a collation element
|
||||
*/
|
||||
static public String ceToString(int ce) {
|
||||
return "[" + hex(getPrimary(ce)) + "."
|
||||
+ hex(getSecondary(ce)) + "."
|
||||
+ hex(getTertiary(ce)) + "]";
|
||||
return "[" + Utility.hex(getPrimary(ce)) + "."
|
||||
+ Utility.hex(getSecondary(ce)) + "."
|
||||
+ Utility.hex(getTertiary(ce)) + "]";
|
||||
}
|
||||
|
||||
/**
|
||||
@ -631,32 +634,36 @@ final public class UCA implements Comparator {
|
||||
/**
|
||||
* Supplies a zero-padded hex representation of an integer (without 0x)
|
||||
*/
|
||||
/*
|
||||
static public String hex(int i) {
|
||||
String result = Long.toString(i & 0xFFFFFFFFL, 16).toUpperCase();
|
||||
return "00000000".substring(result.length(),8) + result;
|
||||
}
|
||||
|
||||
*/
|
||||
/**
|
||||
* Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
|
||||
*/
|
||||
/*
|
||||
static public String hex(char i) {
|
||||
String result = Integer.toString(i, 16).toUpperCase();
|
||||
return "0000".substring(result.length(),4) + result;
|
||||
}
|
||||
|
||||
*/
|
||||
/**
|
||||
* Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
|
||||
*/
|
||||
/*
|
||||
static public String hex(byte b) {
|
||||
int i = b & 0xFF;
|
||||
String result = Integer.toString(i, 16).toUpperCase();
|
||||
return "00".substring(result.length(),2) + result;
|
||||
}
|
||||
|
||||
*/
|
||||
/**
|
||||
* Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
|
||||
*@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
|
||||
*/
|
||||
/*
|
||||
static public String hex(String s, String sep) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
@ -665,11 +672,12 @@ final public class UCA implements Comparator {
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
*/
|
||||
/**
|
||||
* Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
|
||||
*@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
|
||||
*/
|
||||
/*
|
||||
static public String hex(StringBuffer s, String sep) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
@ -678,6 +686,7 @@ final public class UCA implements Comparator {
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
*/
|
||||
|
||||
// =============================================================
|
||||
// Privates
|
||||
@ -1161,6 +1170,7 @@ final public class UCA implements Comparator {
|
||||
public class UCAContents {
|
||||
int current = -1;
|
||||
Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
|
||||
Normalizer nfd = skipDecomps;
|
||||
Iterator enum = null;
|
||||
byte ceLimit;
|
||||
int currentRange = Integer.MAX_VALUE; // set to ZERO to enable
|
||||
@ -1191,11 +1201,15 @@ final public class UCA implements Comparator {
|
||||
String result = null; // null if done
|
||||
|
||||
// normal case
|
||||
while (current++ <= 0xFFFF) {
|
||||
char ch = (char)current;
|
||||
if (getCEType(ch) >= ceLimit) continue;
|
||||
if (skipDecomps != null && skipDecomps.hasDecomposition(ch)) continue;
|
||||
result = String.valueOf(ch);
|
||||
while (current++ < 0x10FFFF) {
|
||||
//char ch = (char)current;
|
||||
byte type = getCEType(current);
|
||||
|
||||
if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) {
|
||||
if (type >= ceLimit) continue;
|
||||
if (skipDecomps != null && skipDecomps.hasDecomposition(current)) continue;
|
||||
}
|
||||
result = UTF16.valueOf(current);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1502,19 +1516,19 @@ final public class UCA implements Comparator {
|
||||
|
||||
hangulHackBottom = collationElements[0x1100] & 0xFFFF0000; // remove secondaries & tertiaries
|
||||
hangulHackTop = collationElements[0x11F9] | 0xFFFF; // bump up secondaries and tertiaries
|
||||
if (SHOW_STATS) System.out.println("\tHangul Hack: " + hex(hangulHackBottom) + ", " + hex(hangulHackTop));
|
||||
if (SHOW_STATS) System.out.println("\tHangul Hack: " + Utility.hex(hangulHackBottom) + ", " + Utility.hex(hangulHackTop));
|
||||
|
||||
// show some statistics
|
||||
if (SHOW_STATS) System.out.println("\tcount1: " + count1);
|
||||
if (SHOW_STATS) System.out.println("\tcount2: " + max2);
|
||||
if (SHOW_STATS) System.out.println("\tcount3: " + max3);
|
||||
|
||||
if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + hex(MIN1) + "/" + hex(MAX1));
|
||||
if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + hex(MIN2) + "/" + hex(MAX2));
|
||||
if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + hex(MIN3) + "/" + hex(MAX3));
|
||||
if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + Utility.hex(MIN1) + "/" + Utility.hex(MAX1));
|
||||
if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + Utility.hex(MIN2) + "/" + Utility.hex(MAX2));
|
||||
if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + Utility.hex(MIN3) + "/" + Utility.hex(MAX3));
|
||||
|
||||
if (SHOW_STATS) System.out.println("\tVar Min/Max: " + hex(variableLow) + "/" + hex(variableHigh));
|
||||
if (SHOW_STATS) System.out.println("\tNon-Var Min: " + hex(nonVariableLow));
|
||||
if (SHOW_STATS) System.out.println("\tVar Min/Max: " + Utility.hex(variableLow) + "/" + Utility.hex(variableHigh));
|
||||
if (SHOW_STATS) System.out.println("\tNon-Var Min: " + Utility.hex(nonVariableLow));
|
||||
|
||||
if (SHOW_STATS) System.out.println("\trenumberedVariable: " + renumberedVariable);
|
||||
}
|
||||
@ -1565,7 +1579,7 @@ final public class UCA implements Comparator {
|
||||
if (strength > 1) {
|
||||
if (weights.get(i)) {
|
||||
count++;
|
||||
p.println(mf.format(new Object[] {hex((char)i), new Integer(stCounts[strength][i])}));
|
||||
p.println(mf.format(new Object[] {Utility.hex((char)i), new Integer(stCounts[strength][i])}));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -1575,8 +1589,8 @@ final public class UCA implements Comparator {
|
||||
int last = i-1;
|
||||
int diff = last - first + 1;
|
||||
count += diff;
|
||||
String lastStr = last == first ? "" : hex((char)last);
|
||||
p.println(mf.format(new Object[] {hex((char)first),lastStr,new Integer(diff), new Integer(count)}));
|
||||
String lastStr = last == first ? "" : Utility.hex((char)last);
|
||||
p.println(mf.format(new Object[] {Utility.hex((char)first),lastStr,new Integer(diff), new Integer(count)}));
|
||||
first = -1;
|
||||
}
|
||||
}
|
||||
@ -1623,17 +1637,17 @@ final public class UCA implements Comparator {
|
||||
variable = false; // FIX DATA FILE
|
||||
}
|
||||
if (key2 > 0x1FF) {
|
||||
throw new IllegalArgumentException("Weight2 doesn't fit: " + hex(key2) + "," + line);
|
||||
throw new IllegalArgumentException("Weight2 doesn't fit: " + Utility.hex(key2) + "," + line);
|
||||
}
|
||||
if (key3 > 0x7F) {
|
||||
throw new IllegalArgumentException("Weight3 doesn't fit: " + hex(key3) + "," + line);
|
||||
throw new IllegalArgumentException("Weight3 doesn't fit: " + Utility.hex(key3) + "," + line);
|
||||
}
|
||||
// adjust variable bounds, if needed
|
||||
if (variable) {
|
||||
if (key1 > nonVariableLow) {
|
||||
if (!haveVariableWarning) {
|
||||
System.out.println("\tBAD DATA: Variable overlap, nonvariable low: "
|
||||
+ hex(nonVariableLow) + ", line: \"" + line + "\"");
|
||||
+ Utility.hex(nonVariableLow) + ", line: \"" + line + "\"");
|
||||
haveVariableWarning = true;
|
||||
}
|
||||
} else {
|
||||
@ -1644,7 +1658,7 @@ final public class UCA implements Comparator {
|
||||
if (key1 < variableHigh) {
|
||||
if (!haveVariableWarning) {
|
||||
System.out.println("\tBAD DATA: Variable overlap, variable high: "
|
||||
+ hex(variableHigh) + ", line: \"" + line + "\"");
|
||||
+ Utility.hex(variableHigh) + ", line: \"" + line + "\"");
|
||||
haveVariableWarning = true;
|
||||
}
|
||||
} else {
|
||||
@ -1717,8 +1731,8 @@ final public class UCA implements Comparator {
|
||||
Object ceObj = new Long(((long)result << 16) | fourth);
|
||||
Object probe = uniqueTable.get(ceObj);
|
||||
if (probe != null) {
|
||||
System.out.println("\tCE(" + hex(value)
|
||||
+ ")=CE(" + hex(((Character)probe).charValue()) + "); " + line);
|
||||
System.out.println("\tCE(" + Utility.hex(value)
|
||||
+ ")=CE(" + Utility.hex(((Character)probe).charValue()) + "); " + line);
|
||||
|
||||
} else {
|
||||
uniqueTable.put(ceObj, new Character(value));
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2001/10/26 23:32:03 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2001/10/31 00:01:28 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -14,6 +14,7 @@
|
||||
package com.ibm.text.UCA;
|
||||
|
||||
import java.util.*;
|
||||
import com.ibm.text.UTF16;
|
||||
|
||||
import java.io.*;
|
||||
//import java.text.*;
|
||||
@ -106,13 +107,13 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
Normalizer foo = new Normalizer(Normalizer.NFKD);
|
||||
char x = '\u1EE2';
|
||||
System.out.println(UCA.hex(x) + " " + ucd.getName(x));
|
||||
System.out.println(Utility.hex(x) + " " + ucd.getName(x));
|
||||
String nx = foo.normalize(x);
|
||||
for (int i = 0; i < nx.length(); ++i) {
|
||||
char c = nx.charAt(i);
|
||||
System.out.println(ucd.getCanonicalClass(c));
|
||||
}
|
||||
System.out.println(UCA.hex(nx, " ") + " " + ucd.getName(nx));
|
||||
System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx));
|
||||
*/
|
||||
|
||||
}
|
||||
@ -251,7 +252,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
CompactShortArray csa = new CompactShortArray((short)0);
|
||||
|
||||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (normKD.hasDecomposition(c)) {
|
||||
++count;
|
||||
@ -260,7 +261,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
if (max < decomp.length()) max = decomp.length();
|
||||
if (decomp.length() > 7) ++over7;
|
||||
csa.setElementAt(c, (short)count);
|
||||
log.println("\t KD[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';");
|
||||
log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
|
||||
}
|
||||
}
|
||||
csa.compact();
|
||||
@ -279,7 +280,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
csa = new CompactShortArray((short)0);
|
||||
|
||||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (normD.hasDecomposition(c)) {
|
||||
++count;
|
||||
@ -287,7 +288,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
datasize += decomp.length();
|
||||
if (max < decomp.length()) max = decomp.length();
|
||||
csa.setElementAt(c, (short)count);
|
||||
log.println("\t D[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';");
|
||||
log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
|
||||
}
|
||||
}
|
||||
csa.compact();
|
||||
@ -304,12 +305,12 @@ public class WriteCollationData implements UCD_Types {
|
||||
CompactByteArray cba = new CompactByteArray();
|
||||
|
||||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
int canClass = normKD.getCanonicalClass(c);
|
||||
if (canClass != 0) {
|
||||
++count;
|
||||
|
||||
log.println("\t CC[0x" + UCA.hex(c) + "]=" + canClass + ";");
|
||||
log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";");
|
||||
}
|
||||
}
|
||||
cba.compact();
|
||||
@ -332,7 +333,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
char val = (char) enum.value();
|
||||
if (0xAC00 <= val && val <= 0xD7A3) continue;
|
||||
++count;
|
||||
log.println("\tC[0x" + UCA.hex(key) + "]=0x" + UCA.hex(val) + ";");
|
||||
log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";");
|
||||
}
|
||||
log.println("// " + count + " composition mappings total");
|
||||
log.println();
|
||||
@ -480,7 +481,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
decompSortKey = remove(decompSortKey, '\u0020');
|
||||
}
|
||||
if (!sortKey.equals(decompSortKey)) {
|
||||
log.println("<tr><td>" + UCA.hex(ch)
|
||||
log.println("<tr><td>" + Utility.hex(ch)
|
||||
+ "</td><td>" + UCA.toString(sortKey)
|
||||
+ "</td><td>" + UCA.toString(decompSortKey)
|
||||
+ "</td><td>" + ucd.getName(ch)
|
||||
@ -649,9 +650,11 @@ public class WriteCollationData implements UCD_Types {
|
||||
static final byte getDecompType(int cp) {
|
||||
byte result = ucd.getDecompositionType(cp);
|
||||
if (result == ucd.CANONICAL) {
|
||||
String d = NFD.normalize((char)cp); // TODO
|
||||
for (int i = 0; i < d.length(); ++i) {
|
||||
byte t = ucd.getDecompositionType(d.charAt(i));
|
||||
String d = NFD.normalize(cp); // TODO
|
||||
int cp1;
|
||||
for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
|
||||
cp1 = UTF16.charAt(d, i);
|
||||
byte t = ucd.getDecompositionType(cp1);
|
||||
if (t > ucd.CANONICAL) return t;
|
||||
}
|
||||
}
|
||||
@ -707,7 +710,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
static int[] markCes = new int[50];
|
||||
|
||||
static int fixCompatibilityCE(String s, boolean decompose, int[] output, boolean compress) {
|
||||
byte type = getDecompType(s.charAt(0));
|
||||
byte type = getDecompType(UTF16.charAt(s, 0));
|
||||
char ch = s.charAt(0);
|
||||
|
||||
String decomp = NFKD.normalize(s);
|
||||
@ -1654,6 +1657,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
static final int COMMON = 5;
|
||||
|
||||
static int gapForA = 0;
|
||||
static int[] primaryDelta;
|
||||
|
||||
static void writeFractionalUCA(String filename) throws IOException {
|
||||
|
||||
@ -1672,9 +1676,9 @@ public class WriteCollationData implements UCD_Types {
|
||||
for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
|
||||
if (secondarySet.get(secondary)) {
|
||||
compactSecondary[secondary] = subtotal++;
|
||||
/*System.out.println("compact[" + UCA.hex(secondary)
|
||||
+ "]=" + UCA.hex(compactSecondary[secondary])
|
||||
+ ", " + UCA.hex(fixSecondary(secondary)));*/
|
||||
/*System.out.println("compact[" + Utility.hex(secondary)
|
||||
+ "]=" + Utility.hex(compactSecondary[secondary])
|
||||
+ ", " + Utility.hex(fixSecondary(secondary)));*/
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
@ -1687,7 +1691,9 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
System.out.println("Fixing Primaries");
|
||||
BitSet primarySet = collator.getWeightUsage(1);
|
||||
int[] primaryDelta = new int[65536];
|
||||
|
||||
primaryDelta = new int[65536];
|
||||
|
||||
// start at 1 so zero stays zero.
|
||||
for (int primary = 1; primary < 0xFFFF; ++primary) {
|
||||
if (primarySet.get(primary)) primaryDelta[primary] = 2;
|
||||
@ -1749,7 +1755,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
lastValue = primaryDelta[primary] = CE >>> 8;
|
||||
}
|
||||
//if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
|
||||
//if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
|
||||
}
|
||||
|
||||
|
||||
@ -1757,19 +1763,37 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
System.out.println("Sorting");
|
||||
Map ordered = new TreeMap();
|
||||
|
||||
for (char ch = 0; ch < 0xFFFF; ++ch) {
|
||||
byte type = collator.getCEType(ch);
|
||||
if (type >= UCA.FIXED_CE) continue;
|
||||
String s = String.valueOf(ch);
|
||||
UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null);
|
||||
int ccounter = 0;
|
||||
while (true) {
|
||||
Utility.dot(ccounter++);
|
||||
String s = ucac.next();
|
||||
if (s == null) break;
|
||||
ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
byte type = collator.getCEType(ch);
|
||||
if (type >= UCA.FIXED_CE && !nfd.hasDecomposition(ch))
|
||||
continue;
|
||||
}
|
||||
String s = com.ibm.text.UTF16.valueOf(ch);
|
||||
ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
|
||||
}
|
||||
|
||||
Hashtable multiTable = collator.getContracting();
|
||||
Enumeration enum = multiTable.keys();
|
||||
int ecount = 0;
|
||||
while (enum.hasMoreElements()) {
|
||||
Utility.dot(ecount++);
|
||||
String s = (String)enum.nextElement();
|
||||
ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
|
||||
}
|
||||
*/
|
||||
// JUST FOR TESTING
|
||||
if (false) {
|
||||
String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
|
||||
@ -1779,6 +1803,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
}
|
||||
}
|
||||
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing");
|
||||
PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024));
|
||||
PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_long.txt"), 32*1024));
|
||||
@ -1821,6 +1846,8 @@ public class WriteCollationData implements UCD_Types {
|
||||
String lastChr = "";
|
||||
int lastNp = 0;
|
||||
boolean doVariable = false;
|
||||
char[] codeUnits = new char[100];
|
||||
|
||||
|
||||
while (it.hasNext()) {
|
||||
Object sortKey = it.next();
|
||||
@ -1846,8 +1873,12 @@ public class WriteCollationData implements UCD_Types {
|
||||
wasVariable = isVariable;
|
||||
}
|
||||
oldStr.setLength(0);
|
||||
log.print(UCA.hex(chr, " ") + "; ");
|
||||
chr.getChars(0, chr.length(), codeUnits, 0);
|
||||
|
||||
log.print(Utility.hex(codeUnits, 0, chr.length(), " ") + "; ");
|
||||
boolean nonePrinted = true;
|
||||
boolean isFirst = true;
|
||||
|
||||
for (int q = 0; q < len; ++q) {
|
||||
nonePrinted = false;
|
||||
newPrimary.setLength(0);
|
||||
@ -1856,7 +1887,32 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
int pri = UCA.getPrimary(ces[q]);
|
||||
int sec = UCA.getSecondary(ces[q]);
|
||||
int ter = UCA.getTertiary(ces[q]);
|
||||
int ter = UCA.getTertiary(ces[q]);
|
||||
|
||||
oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
|
||||
|
||||
// special hack for unsupported!
|
||||
|
||||
if (pri >= UCA.UNSUPPORTED_BASE) {
|
||||
++q;
|
||||
oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
|
||||
|
||||
int pri2 = UCA.getPrimary(ces[q]);
|
||||
// get old code point
|
||||
// pri = UNSUPPORTED_BASE + (bigChar >>> 15)
|
||||
// pri2 = (bigChar & 0x7FFF) | 0x8000
|
||||
pri -= UCA.UNSUPPORTED_BASE;
|
||||
pri <<= 15;
|
||||
pri2 &= 0x7FFF;
|
||||
pri += pri2;
|
||||
System.out.println("Unsupported: "
|
||||
+ Utility.hex(UCA.getPrimary(ces[q-1]))
|
||||
+ ", " + Utility.hex(UCA.getPrimary(ces[q]))
|
||||
+ ", " + Utility.hex(pri)
|
||||
+ ", " + Utility.hex(fixPrimary(pri) & 0xFFFFFFFFL)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
if (sec != 0x20) {
|
||||
boolean changed = secEq.add(new Integer(sec), new Integer(pri));
|
||||
@ -1866,28 +1922,26 @@ public class WriteCollationData implements UCD_Types {
|
||||
}
|
||||
if (sampleEq[sec] == null) sampleEq[sec] = chr;
|
||||
if (sampleEq[ter] == null) sampleEq[ter] = chr;
|
||||
oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
|
||||
int oldPrimaryValue = UCA.getPrimary(ces[q]);
|
||||
int np = primaryDelta[oldPrimaryValue];
|
||||
if (oldPrimaryValue > 0x3400) {
|
||||
System.out.println(Utility.hex(oldPrimaryValue) + " => " + Utility.hex(np));
|
||||
}
|
||||
|
||||
// int oldPrimaryValue = UCA.getPrimary(ces[q]);
|
||||
int np = fixPrimary(pri);
|
||||
|
||||
hexBytes(np, newPrimary);
|
||||
hexBytes(fixSecondary(UCA.getSecondary(ces[q])), newSecondary);
|
||||
hexBytes(fixTertiary(UCA.getTertiary(ces[q])), newTertiary);
|
||||
if (q == 0) {
|
||||
hexBytes(fixSecondary(sec), newSecondary);
|
||||
hexBytes(fixTertiary(ter), newTertiary);
|
||||
if (isFirst) {
|
||||
if (!sameTopByte(np, lastNp)) {
|
||||
summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0)));
|
||||
summary.println("Last: " + Utility.hex(lastNp & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
|
||||
summary.println();
|
||||
if (doVariable) {
|
||||
doVariable = false;
|
||||
summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
|
||||
summary.println();
|
||||
}
|
||||
summary.println("First: " + Utility.hex(np) + " " + ucd.getName(chr.charAt(0)));
|
||||
summary.println("First: " + Utility.hex(np & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(chr,0)));
|
||||
}
|
||||
lastNp = np;
|
||||
isFirst = false;
|
||||
}
|
||||
log.print("[" + newPrimary
|
||||
+ ", " + newSecondary
|
||||
@ -1898,17 +1952,17 @@ public class WriteCollationData implements UCD_Types {
|
||||
log.print("[,,]");
|
||||
oldStr.append(UCA.ceToString(0));
|
||||
}
|
||||
longLog.print(" # " + oldStr + " # " + ucd.getName(chr.charAt(0)));
|
||||
longLog.print(" # " + oldStr + " # " + ucd.getName(UTF16.charAt(chr, 0)));
|
||||
log.println();
|
||||
lastChr = chr;
|
||||
}
|
||||
summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0)));
|
||||
summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(UTF16.charAt(lastChr, 0)));
|
||||
|
||||
/*
|
||||
String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
|
||||
for (int i = 0; i < sample.length(); ++i) {
|
||||
char ch = sample.charAt(i);
|
||||
log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch))
|
||||
log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
|
||||
+ " " + ucd.getName(ch));
|
||||
}
|
||||
*/
|
||||
@ -1981,8 +2035,24 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
|
||||
static boolean isFixedIdeograph(int cp) {
|
||||
return (0x3400 <= cp && cp <= 0x4DB5 || 0x4E00 <= cp && cp <= 0x9FA5 || 0xF900 <= cp && cp <= 0xFA2D);
|
||||
return (0x3400 <= cp && cp <= 0x4DB5
|
||||
|| 0x4E00 <= cp && cp <= 0x9FA5
|
||||
|| 0xF900 <= cp && cp <= 0xFA2D // compat: most of these decompose anyway
|
||||
|| 0x20000 <= cp && cp <= 0x2A6D6
|
||||
|| 0x2F800 <= cp && cp <= 0x2FA1D // compat: most of these decompose anyway
|
||||
);
|
||||
}
|
||||
/*
|
||||
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
|
||||
9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
|
||||
20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
|
||||
2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
|
||||
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
|
||||
...
|
||||
2FA1D;CJK COMPATIBILITY IDEOGRAPH-2FA1D;Lo;0;L;2A600;;;;N;;;;;
|
||||
*/
|
||||
|
||||
static int remapUCA_CompatibilityIdeographToCp(int cp) {
|
||||
switch (cp) {
|
||||
@ -2175,6 +2245,18 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
static final int secondaryDoubleStart = 0xD0;
|
||||
|
||||
static int fixPrimary(int x) {
|
||||
int result = 0;
|
||||
if (x <= 0xFFFF) result = primaryDelta[x];
|
||||
else result = getImplicitPrimary(x);
|
||||
|
||||
/*if (x > 0x3400) {
|
||||
System.out.println(Utility.hex(x) + " => " + Utility.hex(result));
|
||||
}
|
||||
*/
|
||||
return result;
|
||||
}
|
||||
|
||||
static int fixSecondary(int x) {
|
||||
x = compactSecondary[x];
|
||||
return fixSecondary2(x, compactSecondary[0x153], compactSecondary[0x157]);
|
||||
@ -2301,7 +2383,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
byte b = (byte)(x >>> shift);
|
||||
if (b != 0) {
|
||||
if (result.length() != 0) result.append(" ");
|
||||
result.append(UCA.hex(b));
|
||||
result.append(Utility.hex(b));
|
||||
//if (lastb == 0) System.err.println(" bad zero byte: " + result);
|
||||
}
|
||||
lastb = b;
|
||||
@ -2352,7 +2434,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
|
||||
scripts[script] = primary;
|
||||
scriptChar[script] = ch;
|
||||
if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch));
|
||||
if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
|
||||
}
|
||||
}
|
||||
// get representative char for primary
|
||||
@ -2469,7 +2551,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
source = source.substring(0,source.length()-1);
|
||||
if (endMark == MARK1) {
|
||||
log.println("<br>");
|
||||
log.println("Mismatch: " + UCA.hex(source, " ")
|
||||
log.println("Mismatch: " + Utility.hex(source, " ")
|
||||
+ ", " + ucd.getName(source) + "<br>");
|
||||
log.print(" NFD:");
|
||||
} else {
|
||||
@ -2557,11 +2639,11 @@ public class WriteCollationData implements UCD_Types {
|
||||
//if (firstRow) out.print(" width='6%'");
|
||||
out.print(">");
|
||||
|
||||
//log.println(UCA.hex(ch2.charAt(0)));
|
||||
//log.println(Utility.hex(ch2.charAt(0)));
|
||||
boolean ignorable = col2.charAt(0) == 0;
|
||||
out.print(HTMLString(ch2) + "<br><tt>"
|
||||
+ (ignorable ? "<u>" : "")
|
||||
+ UCA.hex(ch2, " ")
|
||||
+ Utility.hex(ch2, " ")
|
||||
+ (ignorable ? "</u>" : "")
|
||||
);
|
||||
if (SHOW_CE) out.print("</tt><br><tt><b>" + UCA.toString(col2) + "</b>");
|
||||
@ -2633,7 +2715,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String colNbase = collator.getSortKey(ch, option, false);
|
||||
String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
|
||||
if (!colNbase.equals(colCbase)) {
|
||||
/*System.out.println(UCA.hex(ch));
|
||||
/*System.out.println(Utility.hex(ch));
|
||||
System.out.println(printableKey(colNbase));
|
||||
System.out.println(printableKey(colNbase));
|
||||
System.out.println(printableKey(colNbase));*/
|
||||
@ -2747,10 +2829,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String ch = (String)sortedD.get(col);
|
||||
String colN = (String)backN.get(ch);
|
||||
if (colN == null || colN.length() < 1) {
|
||||
System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN));
|
||||
System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
|
||||
}
|
||||
if (col == null || col.length() < 1) {
|
||||
System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col));
|
||||
System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
|
||||
}
|
||||
|
||||
if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
|
||||
@ -2758,14 +2840,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
} else {
|
||||
if (true && count < 200) {
|
||||
System.out.println();
|
||||
System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " "));
|
||||
System.out.println(" col: " + UCA.hex(col, " "));
|
||||
System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
|
||||
System.out.println(" col: " + Utility.hex(col, " "));
|
||||
System.out.println(compareMinusLast(col, lastCol));
|
||||
System.out.println(" lastCol: " + UCA.hex(lastCol, " "));
|
||||
System.out.println(" lastCol: " + Utility.hex(lastCol, " "));
|
||||
System.out.println();
|
||||
System.out.println(" colN: " + UCA.hex(colN, " "));
|
||||
System.out.println(" colN: " + Utility.hex(colN, " "));
|
||||
System.out.println(compareMinusLast(colN, lastColN));
|
||||
System.out.println(" lastColN: " + UCA.hex(lastColN, " "));
|
||||
System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
|
||||
}
|
||||
if (!showedLast) {
|
||||
log.println("<tr><td colspan='3'></td><tr>");
|
||||
@ -2791,9 +2873,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
|
||||
static void showLine(int count, String ch, String keyD, String keyN) {
|
||||
String decomp = toD.normalize(ch);
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + UCA.hex(decomp, " ") + "> ";
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
|
||||
log.println("<tr><td>" + count + "</td><td>"
|
||||
+ UCA.hex(ch, " ")
|
||||
+ Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch)
|
||||
+ decomp
|
||||
+ "</td><td>");
|
||||
@ -2863,12 +2945,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
if (showName) {
|
||||
if (ch.equals(decomp)) {
|
||||
log.println(//title + counter + " "
|
||||
UCA.hex(ch, " ")
|
||||
Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch)
|
||||
);
|
||||
} else {
|
||||
log.println(//title + counter + " "
|
||||
"<b>" + UCA.hex(ch, " ")
|
||||
"<b>" + Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch) + "</b>"
|
||||
);
|
||||
}
|
||||
@ -2877,11 +2959,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String keyN = printableKey(backN.get(chobj));
|
||||
if (keyD.equals(keyN)) {
|
||||
log.println(//title + counter + " "
|
||||
UCA.hex(ch, " ") + " " + keyN);
|
||||
Utility.hex(ch, " ") + " " + keyN);
|
||||
} else {
|
||||
log.println(//title + counter + " "
|
||||
"<font color='#009900'>" + UCA.hex(ch, " ") + " " + keyN
|
||||
+ "</font><br><font color='#000099'>" + UCA.hex(decomp, " ") + " " + keyD + "</font>"
|
||||
"<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
|
||||
+ "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $
|
||||
* $Date: 2001/10/26 23:32:03 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/10/31 00:01:28 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -285,7 +285,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
decompSortKey = remove(decompSortKey, '\u0020');
|
||||
}
|
||||
if (!sortKey.equals(decompSortKey)) {
|
||||
log.println("<tr><td>" + UCA.hex(ch)
|
||||
log.println("<tr><td>" + Utility.hex(ch)
|
||||
+ "</td><td>" + UCA.toString(sortKey)
|
||||
+ "</td><td>" + UCA.toString(decompSortKey)
|
||||
+ "</td><td>" + ucd.getName(ch)
|
||||
@ -762,9 +762,9 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
|
||||
if (secondarySet.get(secondary)) {
|
||||
compactSecondary[secondary] = subtotal++;
|
||||
/*System.out.println("compact[" + UCA.hex(secondary)
|
||||
+ "]=" + UCA.hex(compactSecondary[secondary])
|
||||
+ ", " + UCA.hex(fixSecondary(secondary)));*/
|
||||
/*System.out.println("compact[" + Utility.hex(secondary)
|
||||
+ "]=" + Utility.hex(compactSecondary[secondary])
|
||||
+ ", " + Utility.hex(fixSecondary(secondary)));*/
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
@ -822,7 +822,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
|
||||
primaryDelta[primary] = CE >>> 8;
|
||||
}
|
||||
if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
|
||||
if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
|
||||
}
|
||||
|
||||
|
||||
@ -916,7 +916,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
wasVariable = isVariable;
|
||||
}
|
||||
oldStr.setLength(0);
|
||||
log.print(UCA.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; ");
|
||||
log.print(Utility.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; ");
|
||||
boolean nonePrinted = true;
|
||||
for (int q = 0; q < len; ++q) {
|
||||
nonePrinted = false;
|
||||
@ -972,7 +972,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
|
||||
for (int i = 0; i < sample.length(); ++i) {
|
||||
char ch = sample.charAt(i);
|
||||
log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch))
|
||||
log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
|
||||
+ " " + ucd.getName(ch));
|
||||
}
|
||||
*/
|
||||
@ -1311,7 +1311,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
byte b = (byte)(x >>> shift);
|
||||
if (b != 0) {
|
||||
if (result.length() != 0) result.append(" ");
|
||||
result.append(UCA.hex(b));
|
||||
result.append(Utility.hex(b));
|
||||
//if (lastb == 0) System.err.println(" bad zero byte: " + result);
|
||||
}
|
||||
lastb = b;
|
||||
@ -1360,7 +1360,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
|
||||
scripts[script] = primary;
|
||||
scriptChar[script] = ch;
|
||||
if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch));
|
||||
if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
|
||||
}
|
||||
}
|
||||
// get representative char for primary
|
||||
@ -1478,7 +1478,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
source = source.substring(0,source.length()-1);
|
||||
if (endMark == MARK1) {
|
||||
log.println("<br>");
|
||||
log.println("Mismatch: " + UCA.hex(source, " ")
|
||||
log.println("Mismatch: " + Utility.hex(source, " ")
|
||||
+ ", " + ucd.getName(source) + "<br>");
|
||||
log.print(" NFD:");
|
||||
} else {
|
||||
@ -1566,11 +1566,11 @@ public class WriteHTMLCollation implements UCD_Types {
|
||||
//if (firstRow) out.print(" width='6%'");
|
||||
out.print(">");
|
||||
|
||||
//log.println(UCA.hex(ch2.charAt(0)));
|
||||
//log.println(Utility.hex(ch2.charAt(0)));
|
||||
boolean ignorable = col2.charAt(0) == 0;
|
||||
out.print(HTMLString(ch2) + "<br><tt>"
|
||||
+ (ignorable ? "<u>" : "")
|
||||
+ UCA.hex(ch2, " ")
|
||||
+ Utility.hex(ch2, " ")
|
||||
+ (ignorable ? "</u>" : "")
|
||||
);
|
||||
if (SHOW_CE) out.print("</tt><br><tt><b>" + UCA.toString(col2) + "</b>");
|
||||
@ -1632,7 +1632,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String colNbase = collator.getSortKey(ch, option, false);
|
||||
String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
|
||||
if (!colNbase.equals(colCbase)) {
|
||||
/*System.out.println(UCA.hex(ch));
|
||||
/*System.out.println(Utility.hex(ch));
|
||||
System.out.println(printableKey(colNbase));
|
||||
System.out.println(printableKey(colNbase));
|
||||
System.out.println(printableKey(colNbase));*/
|
||||
@ -1746,10 +1746,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String ch = (String)sortedD.get(col);
|
||||
String colN = (String)backN.get(ch);
|
||||
if (colN == null || colN.length() < 1) {
|
||||
System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN));
|
||||
System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
|
||||
}
|
||||
if (col == null || col.length() < 1) {
|
||||
System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col));
|
||||
System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
|
||||
}
|
||||
|
||||
if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
|
||||
@ -1757,14 +1757,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
} else {
|
||||
if (true && count < 200) {
|
||||
System.out.println();
|
||||
System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " "));
|
||||
System.out.println(" col: " + UCA.hex(col, " "));
|
||||
System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
|
||||
System.out.println(" col: " + Utility.hex(col, " "));
|
||||
System.out.println(compareMinusLast(col, lastCol));
|
||||
System.out.println(" lastCol: " + UCA.hex(lastCol, " "));
|
||||
System.out.println(" lastCol: " + Utility.hex(lastCol, " "));
|
||||
System.out.println();
|
||||
System.out.println(" colN: " + UCA.hex(colN, " "));
|
||||
System.out.println(" colN: " + Utility.hex(colN, " "));
|
||||
System.out.println(compareMinusLast(colN, lastColN));
|
||||
System.out.println(" lastColN: " + UCA.hex(lastColN, " "));
|
||||
System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
|
||||
}
|
||||
if (!showedLast) {
|
||||
log.println("<tr><td colspan='3'></td><tr>");
|
||||
@ -1790,9 +1790,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
|
||||
static void showLine(int count, String ch, String keyD, String keyN) {
|
||||
String decomp = toD.normalize(ch);
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + UCA.hex(decomp, " ") + "> ";
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
|
||||
log.println("<tr><td>" + count + "</td><td>"
|
||||
+ UCA.hex(ch, " ")
|
||||
+ Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch)
|
||||
+ decomp
|
||||
+ "</td><td>");
|
||||
@ -1862,12 +1862,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
if (showName) {
|
||||
if (ch.equals(decomp)) {
|
||||
log.println(//title + counter + " "
|
||||
UCA.hex(ch, " ")
|
||||
Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch)
|
||||
);
|
||||
} else {
|
||||
log.println(//title + counter + " "
|
||||
"<b>" + UCA.hex(ch, " ")
|
||||
"<b>" + Utility.hex(ch, " ")
|
||||
+ " " + ucd.getName(ch) + "</b>"
|
||||
);
|
||||
}
|
||||
@ -1876,11 +1876,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
String keyN = printableKey(backN.get(chobj));
|
||||
if (keyD.equals(keyN)) {
|
||||
log.println(//title + counter + " "
|
||||
UCA.hex(ch, " ") + " " + keyN);
|
||||
Utility.hex(ch, " ") + " " + keyN);
|
||||
} else {
|
||||
log.println(//title + counter + " "
|
||||
"<font color='#009900'>" + UCA.hex(ch, " ") + " " + keyN
|
||||
+ "</font><br><font color='#000099'>" + UCA.hex(decomp, " ") + " " + keyD + "</font>"
|
||||
"<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
|
||||
+ "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2001/10/26 23:33:07 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2001/10/31 00:02:27 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -414,15 +414,36 @@ public class GenerateData implements UCD_Types {
|
||||
|
||||
|
||||
public static void listProperties() throws IOException {
|
||||
String propAbb = "";
|
||||
String prop = "";
|
||||
String propAbb = "";
|
||||
String value = "";
|
||||
String valueAbb = "";
|
||||
|
||||
Map duplicates = new TreeMap();
|
||||
Set sorted = new TreeSet(java.text.Collator.getInstance());
|
||||
Map accumulation = new TreeMap();
|
||||
Set accumulation = new TreeSet(java.text.Collator.getInstance());
|
||||
String spacing;
|
||||
|
||||
for(int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) {
|
||||
BufferedReader blocks = Utility.openUnicodeFile("Blocks", ucd.getVersion());
|
||||
String[] parts = new String[10];
|
||||
while (true) {
|
||||
String line = blocks.readLine();
|
||||
if (line == null) break;
|
||||
int commentPos = line.indexOf('#');
|
||||
if (commentPos >= 0) line = line.substring(0,commentPos);
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
int count = Utility.split(line,';',parts);
|
||||
if (count != 2) System.out.println("Whow!");
|
||||
value = fixGaps(parts[1].trim(), true);
|
||||
valueAbb = "n/a";
|
||||
spacing = Utility.repeat(" ", 10-valueAbb.length());
|
||||
sorted.add("blk; " + valueAbb + spacing + "; " + value);
|
||||
checkDuplicate(duplicates, accumulation, value, "Block=" + value);
|
||||
}
|
||||
blocks.close();
|
||||
|
||||
for (int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) {
|
||||
propAbb = fixGaps(UCD_Names.NON_ENUMERATED[k][0], false);
|
||||
prop = fixGaps(UCD_Names.NON_ENUMERATED[k][1], true);
|
||||
spacing = Utility.repeat(" ", 10-propAbb.length());
|
||||
@ -430,6 +451,15 @@ public class GenerateData implements UCD_Types {
|
||||
checkDuplicate(duplicates, accumulation, propAbb, prop);
|
||||
if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop);
|
||||
}
|
||||
|
||||
for (int k = 0; k < UCD_Names.SUPER_CATEGORIES.length; ++k) {
|
||||
valueAbb = fixGaps(UCD_Names.SUPER_CATEGORIES[k][0], false);
|
||||
value = fixGaps(UCD_Names.SUPER_CATEGORIES[k][1], true);
|
||||
spacing = Utility.repeat(" ", 10-valueAbb.length());
|
||||
sorted.add("gc; " + valueAbb + spacing + "; " + value);
|
||||
checkDuplicate(duplicates, accumulation, value, "General_Category=" + value);
|
||||
if (!value.equals(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, "General_Category=" + value);
|
||||
}
|
||||
|
||||
sorted.add("xx; T ; True");
|
||||
checkDuplicate(duplicates, accumulation, "T", "xx=True");
|
||||
@ -460,7 +490,7 @@ public class GenerateData implements UCD_Types {
|
||||
if (!ubp.isDefined(i)) continue;
|
||||
if (ubp.isTest(i)) continue;
|
||||
|
||||
String value = ubp.getID(i, LONG);
|
||||
value = ubp.getID(i, LONG);
|
||||
if (value.length() == 0) value = "none";
|
||||
else if (value.equals("<unused>")) continue;
|
||||
value = fixGaps(value, true);
|
||||
@ -469,9 +499,9 @@ public class GenerateData implements UCD_Types {
|
||||
value = ucd.getCase(value, FULL, TITLE);
|
||||
}
|
||||
|
||||
String abbvalue = ubp.getID(i, SHORT);
|
||||
if (abbvalue.length() == 0) abbvalue = "no";
|
||||
abbvalue = fixGaps(abbvalue, false);
|
||||
valueAbb = ubp.getID(i, SHORT);
|
||||
if (valueAbb.length() == 0) valueAbb = "no";
|
||||
valueAbb = fixGaps(valueAbb, false);
|
||||
|
||||
if (type == COMBINING_CLASS) {
|
||||
if (value.startsWith("Fixed_")) { continue; }
|
||||
@ -480,13 +510,13 @@ public class GenerateData implements UCD_Types {
|
||||
/*
|
||||
String elide = "";
|
||||
if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
|
||||
+ abbvalue
|
||||
+ valueAbb
|
||||
+ "}";
|
||||
String abb = "";
|
||||
if (type != BINARY_PROPERTIES) abb = "\\p{"
|
||||
+ UCD_Names.ABB_UNIFIED_PROPERTIES[i>>8]
|
||||
+ "="
|
||||
+ abbvalue
|
||||
+ valueAbb
|
||||
+ "}";
|
||||
String norm = "";
|
||||
if (type != BINARY_PROPERTIES) norm = "\\p{"
|
||||
@ -497,18 +527,18 @@ public class GenerateData implements UCD_Types {
|
||||
System.out.println("<tr><td>" + elide + "</td><td>" + abb + "</td><td>" + norm + "</td></tr>");
|
||||
*/
|
||||
|
||||
spacing = Utility.repeat(" ", 10-abbvalue.length());
|
||||
spacing = Utility.repeat(" ", 10-valueAbb.length());
|
||||
|
||||
if (type == BINARY_PROPERTIES || type == DERIVED) {
|
||||
sorted.add("ZZ; " + abbvalue + spacing + "; " + value);
|
||||
sorted.add("ZZ; " + valueAbb + spacing + "; " + value);
|
||||
checkDuplicate(duplicates, accumulation, value, value);
|
||||
if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, value);
|
||||
if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value);
|
||||
continue;
|
||||
}
|
||||
|
||||
sorted.add(propAbb + "; " + abbvalue + spacing + "; " + value);
|
||||
sorted.add(propAbb + "; " + valueAbb + spacing + "; " + value);
|
||||
checkDuplicate(duplicates, accumulation, value, prop + "=" + value);
|
||||
if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, prop + "=" + value);
|
||||
if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, prop + "=" + value);
|
||||
}
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("PropertyAliases-" + ucd.getVersion() + "dX.txt");
|
||||
@ -525,7 +555,7 @@ public class GenerateData implements UCD_Types {
|
||||
log.println("# Note: no two property names can be the same,");
|
||||
log.println("# nor can two property value names for the same property be the same.");
|
||||
log.println();
|
||||
Utility.print(log, accumulation.values(), "\r\n", new MyBreaker());
|
||||
Utility.print(log, accumulation, "\r\n", new MyBreaker());
|
||||
log.println();
|
||||
log.close();
|
||||
}
|
||||
@ -542,7 +572,7 @@ public class GenerateData implements UCD_Types {
|
||||
}
|
||||
}
|
||||
|
||||
static void checkDuplicate(Map m, Map accumulation, String toCheck, String originalComment) {
|
||||
static void checkDuplicate(Map m, Set accumulation, String toCheck, String originalComment) {
|
||||
toCheck = skeleton(toCheck);
|
||||
String comment = "{" + originalComment + "}";
|
||||
|
||||
@ -575,14 +605,15 @@ public class GenerateData implements UCD_Types {
|
||||
}
|
||||
|
||||
// accumulate differences
|
||||
/*
|
||||
String acc = (String)accumulation.get(toCheck);
|
||||
/*if (acc == null) {
|
||||
if (acc == null) {
|
||||
acc = "# \"" + toCheck + "\":\t" + originalComment;
|
||||
}
|
||||
acc += ";\t" + result;
|
||||
*/
|
||||
result.add(comment);
|
||||
accumulation.put(toCheck, "# \"" + toCheck + "\":\t" + result);
|
||||
accumulation.add("# " + result.toString() + ":\t" + toCheck);
|
||||
} else {
|
||||
result = new TreeSet();
|
||||
result.add(comment);
|
||||
|
@ -12,8 +12,10 @@
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line has three fields. Where the first field is AA, BB, or ZZ, then
|
||||
# the line describes a property name.
|
||||
# Each line has three fields, separated by semicolons.
|
||||
#
|
||||
# First Field: Where the first field is AA, BB, or ZZ, then the line describes a property name:
|
||||
#
|
||||
# AA - non-enumerated properties
|
||||
# BB - enumerated, non-binary properties
|
||||
# ZZ - binary properties and quick-check properties
|
||||
@ -28,10 +30,20 @@
|
||||
# xx stands for any binary property
|
||||
# qc stands for any quick-check property
|
||||
#
|
||||
# With loose matching of property names, case distinctions, whitespace,
|
||||
# Second Field: The second field is an abbreviated name.
|
||||
# If there is no abbreviated name available, the field is marked with "n/a".
|
||||
#
|
||||
# Third Field: The third field is a long name.
|
||||
#
|
||||
# With loose matching of property names, the case distinctions, whitespace,
|
||||
# and '_' are ignored.
|
||||
#
|
||||
# NOTE: the property value names are NOT unique across properties, especially
|
||||
# NOTE: Currently there is at most one abbreviated name and one long name for
|
||||
# each property and property value. However, in the future additional aliases
|
||||
# may be added. In such a case, the first line for the property or property value
|
||||
# would have the preferred alias for output.
|
||||
#
|
||||
# NOTE: The property value names are NOT unique across properties, especially
|
||||
# with loose matches. For example,
|
||||
# AL means Arabic Letter for the Bidi_Class property, and
|
||||
# AL means Alpha_Left for the Combining_Class property, and
|
||||
@ -41,5 +53,7 @@
|
||||
# cc means Combining_Class property, and
|
||||
# cc means the General_Category property value Control (cc)
|
||||
#
|
||||
# Comments at the end of the file show cases of non-unique names.
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
# For more information, see UTR #24: Regular Expression Guidelines
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2001/10/26 23:33:07 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/10/31 00:02:27 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -31,7 +31,8 @@ final class UCD_Names implements UCD_Types {
|
||||
{"suc", "Simple_Uppercase_Mapping"},
|
||||
{"stc", "Simple_Titlecase_Mapping"},
|
||||
{"sfc", "Simple_Case_Folding"},
|
||||
{"scc", "Special_Case_Condition"}
|
||||
{"scc", "Special_Case_Condition"},
|
||||
{"blk", "Block"}
|
||||
};
|
||||
|
||||
static final String[] UNIFIED_PROPERTIES = {
|
||||
@ -404,9 +405,19 @@ final class UCD_Names implements UCD_Types {
|
||||
"FinalPunctuation" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
|
||||
};
|
||||
|
||||
static final String[][] SUPER_CATEGORIES = {
|
||||
{"L", "Letter"},
|
||||
{"M", "Mark"},
|
||||
{"N", "Number"},
|
||||
{"Z", "Separator"},
|
||||
{"C", "Other"},
|
||||
{"S", "Symbol"},
|
||||
{"P", "Punctuation"},
|
||||
};
|
||||
|
||||
|
||||
static String[] BC = {
|
||||
|
||||
static final String[] BC = {
|
||||
"L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
|
||||
"R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
|
||||
"EN", // European Number
|
||||
@ -752,8 +763,6 @@ final class UCD_Names implements UCD_Types {
|
||||
"H", // U+11C2; H; HANGUL JONGSEONG HIEUH
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
static {
|
||||
UNASSIGNED_INFO.code = '\uFFFF';
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2001/10/26 23:33:48 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2001/10/31 00:02:54 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -122,6 +122,14 @@ public final class Utility { // COMMON UTILITIES
|
||||
return hex(ch,4);
|
||||
}
|
||||
|
||||
public static String hex(byte ch) {
|
||||
return hex(ch & 0xFF,2);
|
||||
}
|
||||
|
||||
public static String hex(char ch) {
|
||||
return hex(ch & 0xFFFF,4);
|
||||
}
|
||||
|
||||
public static String hex(Object s) {
|
||||
return hex(s, 4, " ");
|
||||
}
|
||||
@ -149,21 +157,21 @@ public final class Utility { // COMMON UTILITIES
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static String hex(byte[] o, int start, int end) {
|
||||
public static String hex(byte[] o, int start, int end, String separator) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
//int ch;
|
||||
for (int i = start; i < end; ++i) {
|
||||
if (i != 0) result.append(' ');
|
||||
result.append(hex(o[i] & 0xFF, 2));
|
||||
if (i != 0) result.append(separator);
|
||||
result.append(hex(o[i]));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static String hex(char[] o, int start, int end) {
|
||||
public static String hex(char[] o, int start, int end, String separator) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = start; i < end; ++i) {
|
||||
if (i != 0) result.append(' ');
|
||||
result.append(hex(o[i], 4));
|
||||
if (i != 0) result.append(separator);
|
||||
result.append(hex(o[i]));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user