reorg
X-SVN-Rev: 5824
This commit is contained in:
parent
dee8a86dee
commit
42bddd7bf5
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $
|
||||
* $Date: 2001/08/31 00:20:40 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:32:21 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -102,7 +102,8 @@ public final class CEList implements java.lang.Comparable, UCD_Types {
|
||||
|
||||
for (int i = startOffset; i < min; ++i) {
|
||||
if (contents[i] != that.contents[i + delta]) {
|
||||
if (contents[i] < that.contents[i + delta]) return -1;
|
||||
if ((contents[i] & 0xFFFFFFFFL)
|
||||
< (that.contents[i + delta] & 0xFFFFFFFFL)) return -1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -158,7 +159,9 @@ public final class CEList implements java.lang.Comparable, UCD_Types {
|
||||
public static String toString(int ce) {
|
||||
return "[" + Utility.hex(UCA.getPrimary(ce)) + "."
|
||||
+ Utility.hex(UCA.getSecondary(ce)) + "."
|
||||
+ Utility.hex(UCA.getTertiary(ce)) + "](" + NAME3[UCA.getTertiary(ce)] + ")";
|
||||
+ Utility.hex(UCA.getTertiary(ce)) + "]"
|
||||
// + "(" + NAME3[UCA.getTertiary(ce)] + ")"
|
||||
;
|
||||
}
|
||||
|
||||
static final String[] NAME3 = {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
|
||||
* $Date: 2001/09/06 01:30:31 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:32:21 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -43,7 +43,7 @@ public class GenOverlap implements UCD_Types {
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
|
||||
// store data for faster lookup
|
||||
|
||||
@ -307,7 +307,7 @@ public class GenOverlap implements UCD_Types {
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
|
||||
// store data for faster lookup
|
||||
|
||||
@ -505,7 +505,7 @@ public class GenOverlap implements UCD_Types {
|
||||
//nfd = new Normalizer(Normalizer.NFD);
|
||||
//nfkd = new Normalizer(Normalizer.NFKD);
|
||||
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
|
||||
|
20
tools/unicodetools/com/ibm/text/UCA/Main.java
Normal file
20
tools/unicodetools/com/ibm/text/UCA/Main.java
Normal file
@ -0,0 +1,20 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2001/09/19 23:31:50 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCA;
|
||||
|
||||
public class Main {
|
||||
public static void main(String args[]) throws Exception {
|
||||
WriteCollationData.main(args); // TODO, pull from there to here.
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2001/09/06 01:30:31 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:32:21 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -765,15 +765,6 @@ final public class UCA {
|
||||
*/
|
||||
static final int EXCEPTION_CE_MASK = 0xFFC00000;
|
||||
|
||||
/**
|
||||
* Any unsupported characters (those not in the UCA data tables)
|
||||
* are marked with a exception bit combination
|
||||
* so that they can be treated specially.<br>
|
||||
* There are at least 34 values, so that we can use a range for surrogates
|
||||
* However, we do add to the first weight if we have surrogate pairs!
|
||||
*/
|
||||
static final int UNSUPPORTED = 0xFFC20101;
|
||||
|
||||
/**
|
||||
* Used to composed Hangul and Han characters
|
||||
*/
|
||||
@ -781,6 +772,18 @@ final public class UCA {
|
||||
static final int NEUTRAL_SECONDARY = 0x20;
|
||||
static final int NEUTRAL_TERTIARY = 0x02;
|
||||
|
||||
/**
|
||||
* Any unsupported characters (those not in the UCA data tables)
|
||||
* are marked with a exception bit combination
|
||||
* so that they can be treated specially.<br>
|
||||
* There are at least 34 values, so that we can use a range for surrogates
|
||||
* However, we do add to the first weight if we have surrogate pairs!
|
||||
*/
|
||||
static final int UNSUPPORTED_P = 0xFFC2;
|
||||
static final int UNSUPPORTED = makeKey(UNSUPPORTED_P, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
|
||||
|
||||
// was 0xFFC20101;
|
||||
|
||||
/**
|
||||
* Contracting characters are marked with a exception bit combination
|
||||
* in the collationElement table.
|
||||
@ -968,9 +971,14 @@ final public class UCA {
|
||||
// in code order.
|
||||
// add bottom 5 bits to UNSUPPORTED, and push rest
|
||||
//return UNSUPPORTED + (bigChar & 0xFFFF0000); // top bits added
|
||||
expandingStack.push(makeKey((bigChar & 0x7FFF) | 0x8000, 0, 0)); // primary = bottom 15 bits plus turn bottom bit on.
|
||||
// secondary and tertiary are both zero
|
||||
return makeKey(UNSUPPORTED_P + (bigChar >> 15), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY); // top 34 values plus UNSUPPORTED
|
||||
/*
|
||||
expandingStack.push(((bigChar & 0x7FFF) << 16) | 0x10000000); // primary = bottom 15 bits plus turn bottom bit on.
|
||||
// secondary and tertiary are both zero
|
||||
return UNSUPPORTED + ((bigChar << 1) & 0xFFFF0000); // top 34 values plus UNSUPPORTED
|
||||
*/
|
||||
}
|
||||
if (ce == CONTRACTING) {
|
||||
// Contracting is probably the most interesting (read "tricky") part
|
||||
@ -1127,11 +1135,11 @@ final public class UCA {
|
||||
return new Hashtable(multiTable);
|
||||
}
|
||||
|
||||
public CollationContents getCollationContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
return new CollationContents(ceLimit, skipDecomps);
|
||||
public UCAContents getContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
return new UCAContents(ceLimit, skipDecomps);
|
||||
}
|
||||
|
||||
public class CollationContents {
|
||||
public class UCAContents {
|
||||
int current = -1;
|
||||
Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
|
||||
Iterator enum = null;
|
||||
@ -1140,16 +1148,15 @@ final public class UCA {
|
||||
/**
|
||||
* use FIXED_CE as the limit
|
||||
*/
|
||||
CollationContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
UCAContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
this.ceLimit = ceLimit;
|
||||
this.skipDecomps = skipDecomps;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns a string and its ces
|
||||
* returns a string
|
||||
*/
|
||||
public String next(int[] ces, int[] len) {
|
||||
|
||||
public String next() {
|
||||
String result = null; // null if done
|
||||
|
||||
// normal case
|
||||
@ -1158,7 +1165,6 @@ final public class UCA {
|
||||
if (getCEType(ch) >= ceLimit) continue;
|
||||
if (skipDecomps != null && skipDecomps.hasDecomposition(ch)) continue;
|
||||
result = String.valueOf(ch);
|
||||
len[0] = getCEs(result, true, ces);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1166,11 +1172,36 @@ final public class UCA {
|
||||
if (enum == null) enum = multiTable.keySet().iterator();
|
||||
if (enum.hasNext()) {
|
||||
result = (String)enum.next();
|
||||
len[0] = getCEs(result, true, ces);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns a string and its ces
|
||||
*/
|
||||
public String next(int[] ces, int[] len) {
|
||||
|
||||
String result = next(); // null if done
|
||||
if (result != null) {
|
||||
len[0] = getCEs(result, true, ces);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int[] lengthBuffer = new int[1];
|
||||
|
||||
/**
|
||||
* returns a string and its ces
|
||||
*/
|
||||
public boolean next(Pair result) {
|
||||
String s = next(ceListBuffer, lengthBuffer);
|
||||
if (s == null) return false;
|
||||
result.first = new CEList(ceListBuffer, 0, lengthBuffer[0]);
|
||||
result.second = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
213
tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
Normal file
213
tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
Normal file
@ -0,0 +1,213 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2001/09/19 23:31:50 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCA;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import java.io.*;
|
||||
import com.ibm.text.UCD.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.text.UTF16;
|
||||
|
||||
public class WriteCharts implements UCD_Types {
|
||||
|
||||
static UCD ucd;
|
||||
|
||||
static public void test(UCA uca) throws IOException {
|
||||
|
||||
uca.setAlternate(UCA.NON_IGNORABLE);
|
||||
|
||||
ucd = UCD.make();
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
|
||||
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
while (true) {
|
||||
String x = cc.next();
|
||||
if (x == null) break;
|
||||
set.add(new Pair(uca.getSortKey(x), x));
|
||||
}
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
int oldScript = -999;
|
||||
|
||||
int[] scriptCount = new int[LIMIT_SCRIPT];
|
||||
|
||||
int counter = 0;
|
||||
|
||||
int lastPrimary = -1;
|
||||
|
||||
String lastSortKey = null;
|
||||
|
||||
int high = uca.getSortKey("a").charAt(0);
|
||||
int variable = UCA.getPrimary(uca.getVariableHigh());
|
||||
|
||||
int columnCount = 0;
|
||||
|
||||
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html");
|
||||
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
indexFile.println("<base target='main'>");
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
String sortKey = (String) p.first;
|
||||
String s = (String) p.second;
|
||||
|
||||
int cp = UTF16.charAt(s,0);
|
||||
byte script = ucd.getScript(cp);
|
||||
if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
|
||||
else if (script == INHERITED_SCRIPT) script = COMMON_SCRIPT;
|
||||
|
||||
// get first non-zero primary
|
||||
int primary = sortKey.charAt(0);
|
||||
if (sortKey.length() < 4) script = -3;
|
||||
else if (primary == 0) script = -2;
|
||||
else if (primary < variable) script = -1;
|
||||
else if (primary < high) script = COMMON_SCRIPT;
|
||||
|
||||
if (script != oldScript
|
||||
&& (oldScript < COMMON_SCRIPT || script != COMMON_SCRIPT && script != INHERITED_SCRIPT)) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
}
|
||||
if (output == null) {
|
||||
++scriptCount[script+3];
|
||||
if (scriptCount[script+3] > 1) {
|
||||
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
|
||||
ucd.getScriptID_fromIndex(script) + ", " + ucd.getCodeAndName(s));
|
||||
}
|
||||
output = openFile(scriptCount[script+3], script);
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
int strength = 6;
|
||||
if (lastSortKey != null && sortKey.charAt(0) == lastSortKey.charAt(0)) {
|
||||
strength = uca.strengthDifference(sortKey, lastSortKey);
|
||||
if (strength < 0) strength = -strength;
|
||||
}
|
||||
lastSortKey = sortKey;
|
||||
String breaker = "";
|
||||
if (columnCount > 10 || strength > 5) {
|
||||
if (strength <= 5) breaker = "</tr><tr><td></td>";
|
||||
else breaker = "</tr><tr>";
|
||||
columnCount = 0;
|
||||
}
|
||||
output.println(breaker + CLASSNAME[strength] + s
|
||||
+ "<br><tt>" + Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
//+ "<br>" + UCA.toString(sortKey)
|
||||
+ "</tt></td>");
|
||||
++columnCount;
|
||||
}
|
||||
|
||||
closeFile(output);
|
||||
indexFile.println("</body></html>");
|
||||
indexFile.close();
|
||||
}
|
||||
|
||||
static final String[] CLASSNAME = {
|
||||
"<td class='q'>",
|
||||
"<td class='q'>",
|
||||
"<td class='q'>",
|
||||
"<td class='t'>",
|
||||
"<td class='s'>",
|
||||
"<td class='p'>",
|
||||
"<td class='f'>"};
|
||||
|
||||
|
||||
static PrintWriter indexFile;
|
||||
|
||||
static PrintWriter openFile(int count, byte script) throws IOException {
|
||||
String scriptName = getChunkName(script);
|
||||
scriptName = ucd.getCase(scriptName, FULL, TITLE);
|
||||
|
||||
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
|
||||
PrintWriter output = Utility.openPrintWriter("CollationCharts\\" + fileName);
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing: " + scriptName);
|
||||
|
||||
indexFile.println(" | <a href = '" + fileName + "'>" + scriptName + "</a>");
|
||||
String title = "UCA: " + scriptName;
|
||||
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
output.println("<title>" + title + "</title>");
|
||||
output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
|
||||
output.println("</head><body><h2>" + scriptName + "</h2>");
|
||||
output.println("<table>");
|
||||
return output;
|
||||
}
|
||||
|
||||
static String getChunkName(byte script) {
|
||||
if (script == -3) return "NULL";
|
||||
else if (script == -2) return "IGNORABLE";
|
||||
else if (script == -1) return "VARIABLE";
|
||||
else if (script == HIRAGANA_SCRIPT) return "KATAKANA-HIRAGANA";
|
||||
else return ucd.getScriptID_fromIndex(script);
|
||||
}
|
||||
|
||||
static void closeFile(PrintWriter output) {
|
||||
if (output == null) return;
|
||||
output.println("</body></table></html>");
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
static final IntStack p1 = new IntStack(30);
|
||||
static final IntStack s1 = new IntStack(30);
|
||||
static final IntStack t1 = new IntStack(30);
|
||||
static final IntStack p2 = new IntStack(30);
|
||||
static final IntStack s2 = new IntStack(30);
|
||||
static final IntStack t2 = new IntStack(30);
|
||||
|
||||
static int getStrengthDifference(CEList ceList, CEList lastCEList) {
|
||||
extractNonzeros(ceList, p1, s1, t1);
|
||||
extractNonzeros(lastCEList, p2, s2, t2);
|
||||
int temp = p1.compareTo(p2);
|
||||
if (temp != 0) return 3;
|
||||
temp = s1.compareTo(s2);
|
||||
if (temp != 0) return 2;
|
||||
temp = t1.compareTo(t2);
|
||||
if (temp != 0) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void extractNonzeros(CEList ceList, IntStack primaries, IntStack secondaries, IntStack tertiaries) {
|
||||
primaries.clear();
|
||||
secondaries.clear();
|
||||
tertiaries.clear();
|
||||
|
||||
for (int i = 0; i < ceList.length(); ++i) {
|
||||
int ce = ceList.at(i);
|
||||
int temp = UCA.getPrimary(ce);
|
||||
if (temp != 0) primaries.push(temp);
|
||||
temp = UCA.getSecondary(ce);
|
||||
if (temp != 0) secondaries.push(temp);
|
||||
temp = UCA.getTertiary(ce);
|
||||
if (temp != 0) tertiaries.push(temp);
|
||||
}
|
||||
}
|
||||
*/
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2001/09/06 01:30:30 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:32:21 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -34,7 +34,6 @@ public class WriteCollationData implements UCD_Types {
|
||||
static final boolean EXCLUDE_UNSUPPORTED = true;
|
||||
static final boolean GENERATED_NFC_MISMATCHES = true;
|
||||
static final boolean DO_CHARTS = true;
|
||||
static final boolean WRITE_NAME_IN_CONFORMANCE = true;
|
||||
|
||||
|
||||
static UCA collator;
|
||||
@ -58,12 +57,13 @@ public class WriteCollationData implements UCD_Types {
|
||||
ucd = UCD.make("");
|
||||
|
||||
if (args.length == 0) args = new String[] {"?"}; // force the help comment
|
||||
boolean hex = false;
|
||||
boolean shortPrint = false;
|
||||
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
if (arg.equalsIgnoreCase("WriteRulesWithNames")) writeRules(WITH_NAMES);
|
||||
else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(collator);
|
||||
else if (arg.equalsIgnoreCase("WriteCharts")) WriteCharts.test(collator);
|
||||
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
|
||||
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(collator);
|
||||
|
||||
@ -72,15 +72,15 @@ public class WriteCollationData implements UCD_Types {
|
||||
else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) checkDisjointIgnorables();
|
||||
else if (arg.equalsIgnoreCase("writeContractions")) writeContractions();
|
||||
else if (arg.equalsIgnoreCase("FractionalUCA")) writeFractionalUCA("FractionalUCA");
|
||||
else if (arg.equalsIgnoreCase("writeConformance")) writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE, hex);
|
||||
else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED, hex);
|
||||
else if (arg.equalsIgnoreCase("writeConformance")) writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
|
||||
else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
|
||||
else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) testCompatibilityCharacters();
|
||||
else if (arg.equalsIgnoreCase("writeCollationValidityLog")) writeCollationValidityLog();
|
||||
else if (arg.equalsIgnoreCase("writeCaseExceptions")) writeCaseExceptions();
|
||||
else if (arg.equalsIgnoreCase("writeJavascriptInfo")) writeJavascriptInfo();
|
||||
else if (arg.equalsIgnoreCase("writeCaseFolding")) writeCaseFolding();
|
||||
else if (arg.equalsIgnoreCase("javatest")) javatest();
|
||||
else if (arg.equalsIgnoreCase("hex")) hex = true;
|
||||
else if (arg.equalsIgnoreCase("short")) shortPrint = true;
|
||||
else {
|
||||
System.out.println();
|
||||
System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
|
||||
@ -339,15 +339,17 @@ public class WriteCollationData implements UCD_Types {
|
||||
}
|
||||
|
||||
|
||||
static void writeConformance(String filename, byte option, boolean hex) throws IOException {
|
||||
UCD ucd30 = UCD.make("300");
|
||||
static void writeConformance(String filename, byte option, boolean shortPrint) throws IOException {
|
||||
UCD ucd30 = UCD.make("3.0.0");
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter(filename);
|
||||
if (!hex) log.write('\uFEFF');
|
||||
PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt");
|
||||
if (!shortPrint) log.write('\uFEFF');
|
||||
|
||||
System.out.println("Sorting");
|
||||
int counter = 0;
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
Utility.dot(counter++);
|
||||
if (!ucd.isRepresented(i)) continue;
|
||||
addStringX(UTF32.valueOf32(i), option);
|
||||
}
|
||||
@ -355,11 +357,14 @@ public class WriteCollationData implements UCD_Types {
|
||||
Hashtable multiTable = collator.getContracting();
|
||||
Enumeration enum = multiTable.keys();
|
||||
while (enum.hasMoreElements()) {
|
||||
Utility.dot(counter++);
|
||||
addStringX((String)enum.nextElement(), option);
|
||||
}
|
||||
|
||||
for (int i = 0; i < extraConformanceTests.length; ++i) { // put in sample non-characters
|
||||
Utility.dot(counter++);
|
||||
String s = UTF32.valueOf32(extraConformanceTests[i]);
|
||||
Utility.fixDot();
|
||||
System.out.println("Adding: " + Utility.hex(s));
|
||||
addStringX(s, option);
|
||||
}
|
||||
@ -367,6 +372,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
for (int i = 0; ; ++i) { // add first unallocated character
|
||||
if (!ucd.isAssigned(i)) {
|
||||
String s = UTF32.valueOf32(i);
|
||||
Utility.fixDot();
|
||||
System.out.println("Adding: " + Utility.hex(s));
|
||||
addStringX(s, option);
|
||||
break;
|
||||
@ -375,6 +381,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
|
||||
for (int i = 0; i < extraConformanceRanges.length; ++i) {
|
||||
Utility.dot(counter++);
|
||||
int start = extraConformanceRanges[i][0];
|
||||
int end = extraConformanceRanges[i][1];
|
||||
int increment = ((end - start + 1) / 303) + 1;
|
||||
@ -388,6 +395,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
addStringX(end, option);
|
||||
}
|
||||
|
||||
Utility.fixDot();
|
||||
System.out.println("Total: " + sortedD.size());
|
||||
Iterator it;
|
||||
|
||||
@ -399,6 +407,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
String lastKey = "";
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
String key = (String) it.next();
|
||||
String source = (String) sortedD.get(key);
|
||||
int fluff = key.charAt(key.length() - 1);
|
||||
@ -408,14 +417,12 @@ public class WriteCollationData implements UCD_Types {
|
||||
//log.println(source);
|
||||
String clipped = source.substring(0, source.length()-1);
|
||||
String stren = source.substring(source.length()-1);
|
||||
if (hex) {
|
||||
if (!shortPrint) {
|
||||
log.print(Utility.hex(source));
|
||||
} else {
|
||||
log.print(source + "\t" + Utility.hex(clipped));
|
||||
}
|
||||
if (WRITE_NAME_IN_CONFORMANCE) {
|
||||
log.print(
|
||||
";\t#" + ucd.getName(clipped)+ "\t" + UCA.toString(key));
|
||||
} else {
|
||||
log.print(source + "\t" + Utility.hex(clipped));
|
||||
}
|
||||
log.println();
|
||||
}
|
||||
@ -754,7 +761,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
|
||||
int[] ces = new int[50];
|
||||
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
diLog.println("# Contractions");
|
||||
@ -819,7 +826,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
String s = String.valueOf(ch);
|
||||
int len = collator.getCEs(s, true, ces);
|
||||
*/
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set sortedCodes = new TreeSet();
|
||||
@ -987,7 +994,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
String s = String.valueOf(ch);
|
||||
int len = collator.getCEs(s, true, ces);
|
||||
*/
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set sortedCodes = new TreeSet();
|
||||
@ -1179,7 +1186,7 @@ public class WriteCollationData implements UCD_Types {
|
||||
java.util.Comparator cm = new RuleComparator();
|
||||
Map ordered = new TreeMap(cm);
|
||||
|
||||
UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE,
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE,
|
||||
SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2001/09/06 01:29:48 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -58,8 +58,10 @@ public class DerivedProperty implements UCD_Types {
|
||||
DefaultIgnorable = 26,
|
||||
GraphemeExtend = 27,
|
||||
GraphemeBase = 28,
|
||||
|
||||
FC_NFC_Closure = 29,
|
||||
|
||||
LIMIT = 29;
|
||||
LIMIT = 30;
|
||||
|
||||
|
||||
public DerivedProperty(UCD ucd) {
|
||||
@ -156,8 +158,8 @@ public class DerivedProperty implements UCD_Types {
|
||||
compName = "NFD for the character";
|
||||
}
|
||||
header = "# Derived Property: " + name
|
||||
+ "\r\n# Normalized form " + NAME[i-GenNFD] + ", where DIFFERENT from " + compName + "."
|
||||
+ "\r\n# HANGUL SYLLABLES are algorithmically decomposed, and not listed explicitly."
|
||||
+ "\r\n# Lists characters in normalized form " + NAME[i-GenNFD] + "."
|
||||
+ "\r\n# Only those characters whith normalized forms are DIFFERENT from " + compName + " are listed!"
|
||||
+ "\r\n# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
|
||||
+ "\r\n# It is NOT sufficient to replace characters one-by-one with these results!";
|
||||
}
|
||||
@ -422,6 +424,25 @@ of characters, the first of which has a non-zero combining class.
|
||||
boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
|
||||
};
|
||||
|
||||
dprops[FC_NFC_Closure] = new DProp() {
|
||||
{
|
||||
name = "FC_NFC_Closure";
|
||||
header = "# Derived Property: " + name
|
||||
+ "\r\n# Generated from computing: b = NFC(Fold(a)); c = NFC(Fold(b));"
|
||||
+ "\r\n# Then if (c != b) add the mapping from a to c to the set of"
|
||||
+ "\r\n# mappings that constitute the FC_NFC_Closure list";
|
||||
}
|
||||
public boolean propertyVaries() {return true;} // default
|
||||
public String getProperty(int cp) {
|
||||
if (!ucdData.isRepresented(cp)) return "";
|
||||
String b = nfc.normalize(fold(cp));
|
||||
String c = nfc.normalize(fold(b));
|
||||
if (c.equals(b)) return "";
|
||||
return "FN; " + Utility.hex(c);
|
||||
} // default
|
||||
boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
|
||||
};
|
||||
|
||||
for (int i = QuickNFD; i <= QuickNFKC; ++i) {
|
||||
dprops[i] = new QuickDProp(i);
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
|
||||
* $Date: 2001/09/06 01:29:48 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -27,7 +27,7 @@ final class DerivedPropertyLister extends PropertyLister {
|
||||
int width;
|
||||
boolean varies;
|
||||
|
||||
public DerivedPropertyLister(UCD ucd, int propMask, PrintStream output) {
|
||||
public DerivedPropertyLister(UCD ucd, int propMask, PrintWriter output) {
|
||||
this.propMask = propMask;
|
||||
this.output = output;
|
||||
this.ucdData = ucd;
|
||||
@ -87,7 +87,7 @@ final class DerivedPropertyLister extends PropertyLister {
|
||||
String last;
|
||||
|
||||
public byte status(int cp) {
|
||||
if (!ucdData.isAssigned(cp)) return EXCLUDE;
|
||||
if (!ucdData.isAssigned(cp) && propMask != DerivedProperty.DefaultIgnorable) return EXCLUDE;
|
||||
if (!varies) {
|
||||
return dprop.hasProperty(cp, propMask) ? INCLUDE : EXCLUDE;
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,14 +17,11 @@ import java.io.*;
|
||||
class DiffPropertyLister extends PropertyLister {
|
||||
private UCD oldUCD;
|
||||
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintStream output) {
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
|
||||
this.output = output;
|
||||
this.ucdData = UCD.make(newUCDName);
|
||||
if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName);
|
||||
}
|
||||
|
||||
public byte status (int cp) {
|
||||
return INCLUDE;
|
||||
breakByCategory = false;
|
||||
}
|
||||
|
||||
public String propertyName(int cp) {
|
||||
@ -42,14 +39,23 @@ class DiffPropertyLister extends PropertyLister {
|
||||
*/
|
||||
|
||||
|
||||
public byte status(int lastCp, int cp) {
|
||||
public byte status(int cp) {
|
||||
/*if (cp == 0xFFFF) {
|
||||
System.out.println("# " + Utility.hex(cp));
|
||||
}
|
||||
*/
|
||||
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
|
||||
}
|
||||
|
||||
|
||||
public String headerString() {
|
||||
if (oldUCD != null) {
|
||||
return "# Differences between " + ucdData.getVersion() + " and " + oldUCD.getVersion();
|
||||
} else {
|
||||
return "# Allocated as of " + ucdData.getVersion();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public int print() {
|
||||
String status;
|
||||
if (oldUCD != null) {
|
||||
@ -73,6 +79,7 @@ class DiffPropertyLister extends PropertyLister {
|
||||
output.println();
|
||||
return count;
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -20,7 +20,7 @@ import com.ibm.text.utility.*;
|
||||
|
||||
public class GenerateCaseFolding implements UCD_Types {
|
||||
public static boolean DEBUG = false;
|
||||
public static UCD ucd = UCD.make("310");
|
||||
public static UCD ucd = UCD.make("");
|
||||
|
||||
public static void main(String[] args) throws java.io.IOException {
|
||||
makeCaseFold();
|
||||
@ -285,71 +285,4 @@ public class GenerateCaseFolding implements UCD_Types {
|
||||
}
|
||||
return result + "}";
|
||||
}
|
||||
|
||||
static final void getAge() throws IOException {
|
||||
PrintStream log = new PrintStream(
|
||||
new BufferedOutputStream (
|
||||
new FileOutputStream("UnicodeAge.txt"),
|
||||
4*1024));
|
||||
try {
|
||||
log.println("# Derived file showing when various code points were allocated in Unicode");
|
||||
log.println("# author: M. Davis");
|
||||
log.println("# generated: " + new Date());
|
||||
log.println("# Notes:");
|
||||
log.println("# - The old Hangul Syllables (removed from 2.0) are not included in the 110 listing.");
|
||||
log.println("# - The supplementary private use code points, although allocated earlier,");
|
||||
log.println("# were NOT specifically listed in the UCD until 3.0.1, and are not included until then.");
|
||||
new DiffPropertyLister(null, "110", log).print();
|
||||
new DiffPropertyLister("110", "200", log).print();
|
||||
new DiffPropertyLister("200", "210", log).print();
|
||||
new DiffPropertyLister("210", "300", log).print();
|
||||
new DiffPropertyLister("300", "310", log).print();
|
||||
/*
|
||||
printDiff("110", "200");
|
||||
UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
|
||||
UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
|
||||
UnicodeSet u21 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.1.txt", false);
|
||||
UnicodeSet u30 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.0.txt", false);
|
||||
UnicodeSet u31 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.1.txt", false);
|
||||
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 1.1 (minus Hangul Syllables): "
|
||||
+ n.format(u11.count()));
|
||||
log.println();
|
||||
u11.print(log, false, false, "1.1");
|
||||
|
||||
UnicodeSet u20m = new UnicodeSet(u20).remove(u11);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 2.0 (minus Unicode 1.1): "
|
||||
+ n.format(u20m.count()));
|
||||
log.println();
|
||||
u20m.print(log, false, false, "2.0");
|
||||
|
||||
UnicodeSet u21m = new UnicodeSet(u21).remove(u20);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 2.1 (minus Unicode 2.0): "
|
||||
+ n.format(u21m.count()));
|
||||
log.println();
|
||||
u21m.print(log, false, false, "2.1");
|
||||
|
||||
UnicodeSet u30m = new UnicodeSet(u30).remove(u21);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 3.0 (minus Unicode 2.1): "
|
||||
+ n.format(u30m.count()));
|
||||
log.println();
|
||||
u30m.print(log, false, false, "3.0");
|
||||
|
||||
UnicodeSet u31m = new UnicodeSet(u31).remove(u30);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 3.1 (minus Unicode 3.0): "
|
||||
+ n.format(u31m.count()));
|
||||
log.println();
|
||||
u31m.print(log, false, false, "3.1");
|
||||
*/
|
||||
} finally {
|
||||
if (log != null) log.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2001/09/06 01:29:48 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -22,9 +22,9 @@ import com.ibm.text.utility.*;
|
||||
|
||||
public class GenerateData implements UCD_Types {
|
||||
|
||||
public static void main (String[] args) throws IOException {
|
||||
public static void main (String inVersion, String[] args) throws IOException {
|
||||
System.out.println("START");
|
||||
ucd = UCD.make();
|
||||
ucd = UCD.make(inVersion);
|
||||
System.out.println("Loaded UCD " + ucd.getVersion() + " " + (new Date(ucd.getDate())));
|
||||
String version = ucd.getVersion();
|
||||
|
||||
@ -36,10 +36,7 @@ public class GenerateData implements UCD_Types {
|
||||
Utility.fixDot();
|
||||
System.out.println("Argument: " + args[i]);
|
||||
|
||||
if (arg.equalsIgnoreCase("version")) {
|
||||
version = args[++i];
|
||||
ucd = UCD.make(version);
|
||||
} else if (arg.equalsIgnoreCase("partition")) {
|
||||
if (arg.equalsIgnoreCase("partition")) {
|
||||
partitionProperties();
|
||||
} else if (arg.equalsIgnoreCase("list")) {
|
||||
listProperties();
|
||||
@ -91,9 +88,12 @@ public class GenerateData implements UCD_Types {
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf);
|
||||
mask = Utility.setBits(mask, DerivedProperty.DefaultIgnorable, DerivedProperty.LIMIT-1);
|
||||
mask = Utility.setBits(mask, DerivedProperty.DefaultIgnorable, DerivedProperty.FC_NFC_Closure-1);
|
||||
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-" + version );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedAge")) {
|
||||
generateAge("DerivedAge-" + version );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
|
||||
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||
"DerivedLineBreak-" + version );
|
||||
@ -181,7 +181,7 @@ public class GenerateData implements UCD_Types {
|
||||
|
||||
static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
|
||||
|
||||
public static void doHeader(String fileName, PrintStream output, int headerChoice) {
|
||||
public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
|
||||
output.println("# " + fileName + ".txt");
|
||||
output.println("#");
|
||||
if (headerChoice == HEADER_SCRIPTS) {
|
||||
@ -203,7 +203,7 @@ public class GenerateData implements UCD_Types {
|
||||
}
|
||||
|
||||
public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName + "dX.txt"));
|
||||
PrintWriter output = Utility.openPrintWriter(fileName + "dX.txt");
|
||||
doHeader(fileName, output, headerChoice);
|
||||
for (int i = 0; i < DerivedProperty.LIMIT; ++i) {
|
||||
if ((bitMask & (1<<i)) == 0) continue;
|
||||
@ -218,8 +218,8 @@ public class GenerateData implements UCD_Types {
|
||||
|
||||
/*
|
||||
public static void listStrings(String file, int type, int subtype) throws IOException {
|
||||
ucd = UCD.make("310");
|
||||
UCD ucd30 = UCD.make("300");
|
||||
ucd = UCD.make("3.1.0");
|
||||
UCD ucd30 = UCD.make("3.0.0");
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
||||
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
@ -238,7 +238,7 @@ public class GenerateData implements UCD_Types {
|
||||
*/
|
||||
|
||||
public static void generateCompExclusions() throws IOException {
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
|
||||
PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
|
||||
new CompLister(output).print();
|
||||
output.close();
|
||||
}
|
||||
@ -247,10 +247,10 @@ public class GenerateData implements UCD_Types {
|
||||
UCD oldUCD;
|
||||
int oldLength = 0;
|
||||
|
||||
public CompLister(PrintStream output) {
|
||||
public CompLister(PrintWriter output) {
|
||||
this.output = output;
|
||||
ucdData = UCD.make("310");
|
||||
oldUCD = UCD.make("300");
|
||||
ucdData = UCD.make("3.1.0");
|
||||
oldUCD = UCD.make("3.0.0");
|
||||
showOnConsole = true;
|
||||
}
|
||||
public String propertyName(int cp) {
|
||||
@ -310,7 +310,7 @@ public class GenerateData implements UCD_Types {
|
||||
|
||||
public static void listDifferences() throws IOException {
|
||||
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "PropertyDifferences.txt"));
|
||||
PrintWriter output = Utility.openPrintWriter("PropertyDifferences.txt");
|
||||
|
||||
for (int i = 1; i < LIMIT_ENUM; ++i) {
|
||||
int iType = i & 0xFF00;
|
||||
@ -441,7 +441,7 @@ public class GenerateData implements UCD_Types {
|
||||
//*/
|
||||
|
||||
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file + "dX.txt"));
|
||||
PrintWriter output = Utility.openPrintWriter(file + "dX.txt");
|
||||
doHeader(file, output, headerChoice);
|
||||
int last = -1;
|
||||
for (int i = startEnum; i < endEnum; ++i) {
|
||||
@ -685,5 +685,81 @@ public class GenerateData implements UCD_Types {
|
||||
"\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"
|
||||
|
||||
};
|
||||
|
||||
static final void generateAge(String filename) throws IOException {
|
||||
PrintWriter log = Utility.openPrintWriter(filename + "dX.txt");
|
||||
try {
|
||||
log.println("# Derived file showing when various code points were allocated in Unicode");
|
||||
log.println("# author: M. Davis");
|
||||
log.println("# generated: " + new Date());
|
||||
log.println("# Notes:");
|
||||
log.println("# - The old Hangul Syllables (removed from 2.0) are not included in the 1.1.0 listing.");
|
||||
log.println("# - The supplementary private use code points and the non-character code points");
|
||||
log.println("# were allocated in version 2.0, but not specifically listed in the UCD");
|
||||
log.println("# until versions 3.0.1 and 3.1.0 respectively.");
|
||||
|
||||
log.println("# ================================================");
|
||||
log.println();
|
||||
new DiffPropertyLister(null, "1.1.0", log).print();
|
||||
log.println("# ================================================");
|
||||
log.println();
|
||||
new DiffPropertyLister("1.1.0", "2.0.0", log).print();
|
||||
log.println("# ================================================");
|
||||
log.println();
|
||||
new DiffPropertyLister("2.0.0", "2.1.2", log).print();
|
||||
log.println("# ================================================");
|
||||
log.println();
|
||||
new DiffPropertyLister("2.1.2", "3.0.0", log).print();
|
||||
log.println("# ================================================");
|
||||
log.println();
|
||||
new DiffPropertyLister("3.0.0", "3.1.0", log).print();
|
||||
/*
|
||||
printDiff("110", "200");
|
||||
UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
|
||||
UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
|
||||
UnicodeSet u21 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.1.txt", false);
|
||||
UnicodeSet u30 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.0.txt", false);
|
||||
UnicodeSet u31 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.1.txt", false);
|
||||
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 1.1 (minus Hangul Syllables): "
|
||||
+ n.format(u11.count()));
|
||||
log.println();
|
||||
u11.print(log, false, false, "1.1");
|
||||
|
||||
UnicodeSet u20m = new UnicodeSet(u20).remove(u11);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 2.0 (minus Unicode 1.1): "
|
||||
+ n.format(u20m.count()));
|
||||
log.println();
|
||||
u20m.print(log, false, false, "2.0");
|
||||
|
||||
UnicodeSet u21m = new UnicodeSet(u21).remove(u20);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 2.1 (minus Unicode 2.0): "
|
||||
+ n.format(u21m.count()));
|
||||
log.println();
|
||||
u21m.print(log, false, false, "2.1");
|
||||
|
||||
UnicodeSet u30m = new UnicodeSet(u30).remove(u21);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 3.0 (minus Unicode 2.1): "
|
||||
+ n.format(u30m.count()));
|
||||
log.println();
|
||||
u30m.print(log, false, false, "3.0");
|
||||
|
||||
UnicodeSet u31m = new UnicodeSet(u31).remove(u30);
|
||||
log.println();
|
||||
log.println("# Code points assigned in Unicode 3.1 (minus Unicode 3.0): "
|
||||
+ n.format(u31m.count()));
|
||||
log.println();
|
||||
u31m.print(log, false, false, "3.1");
|
||||
*/
|
||||
} finally {
|
||||
if (log != null) log.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2001/09/06 01:29:48 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -49,7 +49,7 @@ public final class Main {
|
||||
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
|
||||
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
|
||||
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
|
||||
else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(Utility.split(args[++i],','));
|
||||
else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(ucdVersion, Utility.split(args[++i],','));
|
||||
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,7 +17,7 @@ import java.io.*;
|
||||
class MyFloatLister extends PropertyLister {
|
||||
private float propMask;
|
||||
|
||||
public MyFloatLister(UCD ucd, float f, PrintStream output) {
|
||||
public MyFloatLister(UCD ucd, float f, PrintWriter output) {
|
||||
this.propMask = f;
|
||||
this.output = output;
|
||||
this.ucdData = ucd;
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -22,7 +22,7 @@ final class MyPropertyLister extends PropertyLister {
|
||||
|
||||
private int propMask;
|
||||
|
||||
public MyPropertyLister(UCD ucd, int propMask, PrintStream output) {
|
||||
public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) {
|
||||
this.propMask = propMask;
|
||||
this.output = output;
|
||||
this.ucdData = ucd;
|
||||
@ -67,7 +67,7 @@ final class MyPropertyLister extends PropertyLister {
|
||||
if (s.length() == 0) s = "Other Combining Class";
|
||||
return "# " + s;
|
||||
} else if (main == BINARY_PROPERTIES) {
|
||||
return "# Binary Property";
|
||||
return "";
|
||||
} else if (main == JOINING_GROUP) {
|
||||
return "";
|
||||
} else {
|
||||
|
348
tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
Normal file
348
tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
Normal file
@ -0,0 +1,348 @@
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import com.ibm.text.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
|
||||
/**
|
||||
* Implements Unicode Normalization Forms C, D, KC, KD.<br>
|
||||
* See UTR#15 for details.<br>
|
||||
* Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
|
||||
* The Unicode Consortium makes no expressed or implied warranty of any
|
||||
* kind, and assumes no liability for errors or omissions.
|
||||
* No liability is assumed for incidental and consequential damages
|
||||
* in connection with or arising out of the use of the information here.
|
||||
* @author Mark Davis
|
||||
*/
|
||||
|
||||
public class NormalizerSample implements UCD_Types {
|
||||
static final String copyright = "Copyright (C) 2001, IBM Corp. and Unicode Inc. All Rights Reserved.";
|
||||
|
||||
public static boolean SHOW_PROGRESS = false;
|
||||
|
||||
/**
|
||||
* Create a normalizer for a given form.
|
||||
*/
|
||||
public NormalizerSample(byte form, String unicodeVersion) {
|
||||
this.composition = (form & COMPOSITION_MASK) != 0;
|
||||
this.compatibility = (form & COMPATIBILITY_MASK) != 0;
|
||||
this.data = getData(unicodeVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a normalizer for a given form.
|
||||
*/
|
||||
public NormalizerSample(byte form) {
|
||||
this(form,"");
|
||||
}
|
||||
|
||||
/**
|
||||
* Masks for the form selector
|
||||
*/
|
||||
public static final byte
|
||||
COMPATIBILITY_MASK = 1,
|
||||
COMPOSITION_MASK = 2;
|
||||
|
||||
/**
|
||||
* Normalization Form Selector
|
||||
*/
|
||||
public static final byte
|
||||
NFD = 0 ,
|
||||
NFKD = COMPATIBILITY_MASK,
|
||||
NFC = COMPOSITION_MASK,
|
||||
NFKC = (byte)(COMPATIBILITY_MASK + COMPOSITION_MASK);
|
||||
|
||||
/**
|
||||
* Normalizes text according to the chosen form,
|
||||
* replacing contents of the target buffer.
|
||||
* @param source the original text, unnormalized
|
||||
* @param target the resulting normalized text
|
||||
*/
|
||||
public StringBuffer normalize(String source, StringBuffer target) {
|
||||
|
||||
// First decompose the source into target,
|
||||
// then compose if the form requires.
|
||||
|
||||
if (source.length() != 0) {
|
||||
internalDecompose(source, target);
|
||||
if (composition) {
|
||||
internalCompose(target);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes text according to the chosen form
|
||||
* @param source the original text, unnormalized
|
||||
* @return target the resulting normalized text
|
||||
*/
|
||||
public String normalize(String source) {
|
||||
return normalize(source, new StringBuffer()).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes text according to the chosen form
|
||||
* @param source the original text, unnormalized
|
||||
* @return target the resulting normalized text
|
||||
*/
|
||||
public String normalize(int cp) {
|
||||
return normalize(UTF16.valueOf(cp));
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
private StringBuffer hasDecompositionBuffer = new StringBuffer();
|
||||
|
||||
public boolean hasDecomposition(int cp) {
|
||||
hasDecompositionBuffer.setLength(0);
|
||||
normalize(UTF16.valueOf(cp), hasDecompositionBuffer);
|
||||
if (hasDecompositionBuffer.length() != 1) return true;
|
||||
return cp != hasDecompositionBuffer.charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility: Checks whether there is a recursive decomposition of a character from the
|
||||
* Unicode Character Database. It is compatibility or canonical according to the particular
|
||||
* normalizer.
|
||||
* @param ch the source character
|
||||
*/
|
||||
public boolean normalizationDiffers(int ch) {
|
||||
return data.normalizationDiffers(ch, composition, compatibility);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility: Gets recursive decomposition of a character from the
|
||||
* Unicode Character Database.
|
||||
* @param compatibility If false selects the recursive
|
||||
* canonical decomposition, otherwise selects
|
||||
* the recursive compatibility AND canonical decomposition.
|
||||
* @param ch the source character
|
||||
* @param buffer buffer to be filled with the decomposition
|
||||
*/
|
||||
public void getRecursiveDecomposition(char ch, StringBuffer buffer) {
|
||||
data.getRecursiveDecomposition(ch, buffer, compatibility);
|
||||
}
|
||||
|
||||
|
||||
// ======================================
|
||||
// PRIVATES
|
||||
// ======================================
|
||||
|
||||
/**
|
||||
* The current form.
|
||||
*/
|
||||
private boolean composition;
|
||||
private boolean compatibility;
|
||||
|
||||
/**
|
||||
* Decomposes text, either canonical or compatibility,
|
||||
* replacing contents of the target buffer.
|
||||
* @param form the normalization form. If COMPATIBILITY_MASK
|
||||
* bit is on in this byte, then selects the recursive
|
||||
* compatibility decomposition, otherwise selects
|
||||
* the recursive canonical decomposition.
|
||||
* @param source the original text, unnormalized
|
||||
* @param target the resulting normalized text
|
||||
*/
|
||||
private void internalDecompose(String source, StringBuffer target) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
int ch32;
|
||||
for (int i = 0; i < source.length(); i += UTF16.getCharCount(ch32)) {
|
||||
buffer.setLength(0);
|
||||
ch32 = UTF16.charAt(source, i);
|
||||
data.getRecursiveDecomposition(ch32, buffer, compatibility);
|
||||
|
||||
// add all of the characters in the decomposition.
|
||||
// (may be just the original character, if there was
|
||||
// no decomposition mapping)
|
||||
|
||||
int ch;
|
||||
for (int j = 0; j < buffer.length(); j += UTF16.getCharCount(ch)) {
|
||||
ch = UTF16.charAt(buffer, j);
|
||||
int chClass = data.getCanonicalClass(ch);
|
||||
int k = target.length(); // insertion point
|
||||
if (chClass != 0) {
|
||||
|
||||
// bubble-sort combining marks as necessary
|
||||
|
||||
int ch2;
|
||||
for (; k > 0; k -= UTF16.getCharCount(ch2)) {
|
||||
ch2 = UTF16.charAt(target, k-1);
|
||||
if (data.getCanonicalClass(ch2) <= chClass) break;
|
||||
}
|
||||
}
|
||||
target.insert(k, UTF16.valueOf(ch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Composes text in place. Target must already
|
||||
* have been decomposed.
|
||||
* Uses UTF16, which is a utility class for supplementary character support in Java.
|
||||
* @param target input: decomposed text.
|
||||
* output: the resulting normalized text.
|
||||
*/
|
||||
private void internalCompose(StringBuffer target) {
|
||||
int starterPos = 0;
|
||||
int starterCh = UTF16.charAt(target,0);
|
||||
int compPos = UTF16.getCharCount(starterCh); // length of last composition
|
||||
int lastClass = data.getCanonicalClass(starterCh);
|
||||
if (lastClass != 0) lastClass = 256; // fix for strings staring with a combining mark
|
||||
int oldLen = target.length();
|
||||
|
||||
// Loop on the decomposed characters, combining where possible
|
||||
|
||||
int ch;
|
||||
for (int decompPos = compPos; decompPos < target.length(); decompPos += UTF16.getCharCount(ch)) {
|
||||
ch = UTF16.charAt(target, decompPos);
|
||||
if (SHOW_PROGRESS) System.out.println(Utility.hex(target)
|
||||
+ ", decompPos: " + decompPos
|
||||
+ ", compPos: " + compPos
|
||||
+ ", ch: " + Utility.hex(ch)
|
||||
);
|
||||
int chClass = data.getCanonicalClass(ch);
|
||||
int composite = data.getPairwiseComposition(starterCh, ch);
|
||||
if (composite != data.NOT_COMPOSITE
|
||||
&& (lastClass < chClass || lastClass == 0)) {
|
||||
UTF16.setCharAt(target, starterPos, composite);
|
||||
// we know that we will only be replacing non-supplementaries by non-supplementaries
|
||||
// so we don't have to adjust the decompPos
|
||||
starterCh = composite;
|
||||
} else {
|
||||
if (chClass == 0) {
|
||||
starterPos = compPos;
|
||||
starterCh = ch;
|
||||
}
|
||||
lastClass = chClass;
|
||||
UTF16.setCharAt(target, compPos, ch);
|
||||
if (target.length() != oldLen) { // MAY HAVE TO ADJUST!
|
||||
System.out.println("ADJUSTING: " + Utility.hex(target));
|
||||
decompPos += target.length() - oldLen;
|
||||
oldLen = target.length();
|
||||
}
|
||||
compPos += UTF16.getCharCount(ch);
|
||||
}
|
||||
}
|
||||
target.setLength(compPos);
|
||||
}
|
||||
|
||||
// The following class makes use of the UCD class, which accesses data in the Unicode Character Database
|
||||
|
||||
static class Stub {
|
||||
private UCD ucd;
|
||||
private HashMap compTable = new HashMap();
|
||||
private BitSet isSecond = new BitSet();
|
||||
private BitSet canonicalRecompose = new BitSet();
|
||||
private BitSet compatibilityRecompose = new BitSet();
|
||||
static final int NOT_COMPOSITE = 0xFFFF;
|
||||
|
||||
Stub(String version) {
|
||||
ucd = UCD.make(version);
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!ucd.isAssigned(i)) continue;
|
||||
if (ucd.isPUA(i)) continue;
|
||||
if (ucd.isTrailingJamo(i)) isSecond.set(i);
|
||||
byte dt = ucd.getDecompositionType(i);
|
||||
if (dt != CANONICAL) continue;
|
||||
if (!ucd.getBinaryProperty(i, CompositionExclusion)) {
|
||||
try {
|
||||
String s = ucd.getDecompositionMapping(i);
|
||||
int len = UTF16.countCodePoint(s);
|
||||
if (len != 2) {
|
||||
if (len > 2) throw new IllegalArgumentException("BAD LENGTH: " + len + ucd.toString(i));
|
||||
continue;
|
||||
}
|
||||
int a = UTF16.charAt(s, 0);
|
||||
if (ucd.getCombiningClass(a) != 0) continue;
|
||||
|
||||
int b = UTF16.charAt(s, UTF16.getCharCount(a));
|
||||
isSecond.set(b);
|
||||
|
||||
// have a recomposition, so set the bit
|
||||
canonicalRecompose.set(i);
|
||||
|
||||
// set the compatibility recomposition bit
|
||||
// ONLY if the component characters
|
||||
// don't compatibility decompose
|
||||
if (ucd.getDecompositionType(a) <= CANONICAL
|
||||
&& ucd.getDecompositionType(b) <= CANONICAL) {
|
||||
compatibilityRecompose.set(i);
|
||||
}
|
||||
|
||||
long key = (((long)a)<<32) | b;
|
||||
|
||||
compTable.put(new Long(key), new Integer(i));
|
||||
} catch (Exception e) {
|
||||
throw new ChainException("Error: {0}", new Object[]{ucd.toString(i)}, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
short getCanonicalClass(int cp) {
|
||||
return ucd.getCombiningClass(cp);
|
||||
}
|
||||
|
||||
boolean isTrailing(int cp) {
|
||||
return isSecond.get(cp);
|
||||
}
|
||||
|
||||
boolean normalizationDiffers(int cp, boolean composition, boolean compatibility) {
|
||||
byte dt = ucd.getDecompositionType(cp);
|
||||
if (!composition) {
|
||||
if (compatibility) return dt >= CANONICAL;
|
||||
else return dt == CANONICAL;
|
||||
} else {
|
||||
// almost the same, except that we add back in the characters
|
||||
// that RECOMPOSE
|
||||
if (compatibility) return dt >= CANONICAL && !compatibilityRecompose.get(cp);
|
||||
else return dt == CANONICAL && !canonicalRecompose.get(cp);
|
||||
}
|
||||
}
|
||||
|
||||
public void getRecursiveDecomposition(int cp, StringBuffer buffer, boolean compatibility) {
|
||||
byte dt = ucd.getDecompositionType(cp);
|
||||
// we know we decompose all CANONICAL, plus > CANONICAL if compatibility is TRUE.
|
||||
if (dt == CANONICAL || dt > CANONICAL && compatibility) {
|
||||
String s = ucd.getDecompositionMapping(cp);
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
getRecursiveDecomposition(cp, buffer, compatibility);
|
||||
}
|
||||
} else {
|
||||
UTF16.append(buffer, cp);
|
||||
}
|
||||
}
|
||||
|
||||
int getPairwiseComposition(int starterCh, int ch) {
|
||||
int hangulPoss = UCD.composeHangul(starterCh, ch);
|
||||
if (hangulPoss != 0xFFFF) return hangulPoss;
|
||||
Object obj = compTable.get(new Long((((long)starterCh)<<32) | ch));
|
||||
if (obj == null) return 0xFFFF;
|
||||
return ((Integer)obj).intValue();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains normalization data from the Unicode Character Database.
|
||||
* use false for the minimal set, true for the real set.
|
||||
*/
|
||||
private Stub data;
|
||||
|
||||
private static HashMap versionCache = new HashMap();
|
||||
|
||||
private static Stub getData (String version) {
|
||||
if (version.length() == 0) version = UCD.latestVersion;
|
||||
Stub result = (Stub)versionCache.get(version);
|
||||
if (result == null) {
|
||||
result = new Stub(version);
|
||||
versionCache.put(version, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -15,6 +15,7 @@ package com.ibm.text.UCD;
|
||||
|
||||
import java.io.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
|
||||
abstract public class PropertyLister implements UCD_Types {
|
||||
@ -24,9 +25,10 @@ abstract public class PropertyLister implements UCD_Types {
|
||||
|
||||
|
||||
protected UCD ucdData;
|
||||
protected PrintStream output;
|
||||
protected PrintWriter output;
|
||||
protected boolean showOnConsole;
|
||||
protected boolean usePropertyComment = true;
|
||||
protected boolean breakByCategory = true;
|
||||
protected int firstRealCp = -2;
|
||||
protected int lastRealCp = -2;
|
||||
protected boolean alwaysBreaks = false; // set to true if property only breaks
|
||||
@ -51,7 +53,7 @@ abstract public class PropertyLister implements UCD_Types {
|
||||
}
|
||||
|
||||
public String optionalComment(int cp) {
|
||||
if (!usePropertyComment) return "";
|
||||
if (!usePropertyComment || !breakByCategory) return "";
|
||||
int cat = ucdData.getCategory(cp);
|
||||
if (cat == Lt || cat == Ll || cat == Lu) return "L&";
|
||||
return ucdData.getCategoryID(cp);
|
||||
@ -167,7 +169,7 @@ abstract public class PropertyLister implements UCD_Types {
|
||||
if (s == INCLUDE && firstRealCp != -1) {
|
||||
byte cat = ucdData.getCategory(cp);
|
||||
if (cat == Lt || cat == Ll) cat = Lu;
|
||||
if (cat != firstRealCpCat) s = BREAK;
|
||||
if (breakByCategory && cat != firstRealCpCat) s = BREAK;
|
||||
}
|
||||
|
||||
switch(s) {
|
||||
@ -208,9 +210,12 @@ abstract public class PropertyLister implements UCD_Types {
|
||||
}
|
||||
|
||||
if (count == 0) System.out.println("WARNING -- ZERO COUNT FOR " + header);
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
nf.setMaximumFractionDigits(0);
|
||||
output.println();
|
||||
output.println("# Total code points: " + count);
|
||||
output.println("# Total code points: " + nf.format(count));
|
||||
output.println();
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||
* $Date: 2001/09/01 00:06:15 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -145,7 +145,7 @@ public class TestData implements UCD_Types {
|
||||
|
||||
static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
|
||||
|
||||
public static void doHeader(String fileName, PrintStream output, int headerChoice) {
|
||||
public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
|
||||
output.println("# " + fixFile(fileName));
|
||||
output.println("#");
|
||||
if (headerChoice == HEADER_SCRIPTS) {
|
||||
@ -167,8 +167,8 @@ public class TestData implements UCD_Types {
|
||||
}
|
||||
|
||||
public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
|
||||
ucd = UCD.make("310");
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName));
|
||||
ucd = UCD.make("3.1.0");
|
||||
PrintWriter output = Utility.openPrintWriter(fileName);
|
||||
doHeader(fileName, output, headerChoice);
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if ((bitMask & (1<<i)) == 0) continue;
|
||||
@ -183,9 +183,9 @@ public class TestData implements UCD_Types {
|
||||
|
||||
/*
|
||||
public static void listStrings(String file, int type, int subtype) throws IOException {
|
||||
ucd = UCD.make("310");
|
||||
UCD ucd30 = UCD.make("300");
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
||||
ucd = UCD.make("3.1.0");
|
||||
UCD ucd30 = UCD.make("3.0.0");
|
||||
PrintWriter output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
||||
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if ((i & 0xFFF) == 0) System.out.println("# " + i);
|
||||
@ -203,7 +203,7 @@ public class TestData implements UCD_Types {
|
||||
*/
|
||||
|
||||
public static void generateCompExclusions() throws IOException {
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
|
||||
PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
|
||||
new CompLister(output).print();
|
||||
output.close();
|
||||
}
|
||||
@ -212,10 +212,10 @@ public class TestData implements UCD_Types {
|
||||
UCD oldUCD;
|
||||
int oldLength = 0;
|
||||
|
||||
public CompLister(PrintStream output) {
|
||||
public CompLister(PrintWriter output) {
|
||||
this.output = output;
|
||||
ucdData = UCD.make("310");
|
||||
oldUCD = UCD.make("300");
|
||||
ucdData = UCD.make("3.1.0");
|
||||
oldUCD = UCD.make("3.0.0");
|
||||
showOnConsole = true;
|
||||
}
|
||||
public String propertyName(int cp) {
|
||||
@ -249,7 +249,7 @@ public class TestData implements UCD_Types {
|
||||
//*/
|
||||
|
||||
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
||||
PrintWriter output = Utility.openPrintWriter(file);
|
||||
doHeader(file, output, headerChoice);
|
||||
int last = -1;
|
||||
for (int i = startEnum; i < endEnum; ++i) {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2001/08/31 00:30:17 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -42,6 +42,7 @@ public final class UCD implements UCD_Types {
|
||||
*/
|
||||
public static UCD make(String version) {
|
||||
if (version == null || version.length() == 0) version = latestVersion;
|
||||
if (version.indexOf('.') < 0) throw new IllegalArgumentException("Version must be of form 3.1.1");
|
||||
UCD result = (UCD)versionCache.get(version);
|
||||
if (result == null) {
|
||||
result = new UCD();
|
||||
@ -74,6 +75,7 @@ public final class UCD implements UCD_Types {
|
||||
if (major < 2 && codePoint > 0xFFFF) return false;
|
||||
return true; // Noncharacter
|
||||
}
|
||||
if (major >= 2 && codePoint >= 0xF0000 && codePoint <= 0x10FFFD) return true;
|
||||
if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && major >= 3 && minor >= 1) return true;
|
||||
return false;
|
||||
}
|
||||
@ -438,6 +440,21 @@ public final class UCD implements UCD_Types {
|
||||
public byte getScript(int codePoint) {
|
||||
return get(codePoint, false).script;
|
||||
}
|
||||
|
||||
|
||||
public byte getScript(String s) {
|
||||
byte result = COMMON_SCRIPT;
|
||||
if (s == null || s.length() == 0) return result;
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
|
||||
cp = UTF32.char32At(s, i);
|
||||
byte script = getScript(cp);
|
||||
if (script == INHERITED_SCRIPT) continue;
|
||||
result = script;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public byte getAge(int codePoint) {
|
||||
return get(codePoint, false).age;
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2001/08/31 00:29:50 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:16 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -15,8 +15,8 @@ package com.ibm.text.UCD;
|
||||
|
||||
public interface UCD_Types {
|
||||
public static final String DATA_DIR = "C:\\DATA\\";
|
||||
public static final String BIN_DIR = DATA_DIR + "\\BIN\\";
|
||||
public static final String GEN_DIR = DATA_DIR + "\\GEN\\";
|
||||
public static final String BIN_DIR = DATA_DIR + "BIN\\";
|
||||
public static final String GEN_DIR = DATA_DIR + "GEN\\";
|
||||
|
||||
|
||||
static final byte BINARY_FORMAT = 5; // bumped if binary format of UCD changes
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2001/09/06 01:29:48 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2001/09/19 23:33:15 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -20,6 +20,7 @@ import java.math.BigDecimal;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
//import java.text.*;
|
||||
import com.ibm.text.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
@ -331,6 +332,7 @@ public class VerifyUCD implements UCD_Types {
|
||||
System.out.println("Checking Prohibited and Unassigned");
|
||||
System.out.println();
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (mappedOut.get(cp)) continue;
|
||||
|
||||
boolean ucdUnassigned = !ucd.isAllocated(cp);
|
||||
@ -339,33 +341,89 @@ public class VerifyUCD implements UCD_Types {
|
||||
boolean idnProhibited = prohibited.get(cp);
|
||||
|
||||
if (ucdUnassigned && !idnUnassigned) {
|
||||
showError("UCD Unassigned but not IDN Unassigned: ", cp);
|
||||
showError("?UCD Unassigned but not IDN Unassigned", cp, "");
|
||||
++errorCount;
|
||||
} else if (!ucdUnassigned && idnUnassigned) {
|
||||
showError("Not UCD Unassigned but IDN Unassigned: ", cp);
|
||||
showError("?Not UCD Unassigned but IDN Unassigned", cp, "");
|
||||
++errorCount;
|
||||
}
|
||||
|
||||
if (idnProhibited && unassigned.get(cp)) {
|
||||
showError("Both IDN Unassigned AND IDN Prohibited: ", cp);
|
||||
showError("?Both IDN Unassigned AND IDN Prohibited", cp, "");
|
||||
++errorCount;
|
||||
}
|
||||
|
||||
if (guess && !idnProhibited) {
|
||||
showError("UCD ?prohibited? but not IDN Prohibited: ", cp);
|
||||
showError("?UCD ?prohibited? but not IDN Prohibited ", cp, "");
|
||||
++errorCount;
|
||||
} else if (!guess && idnProhibited) {
|
||||
showError("Not UCD ?prohibited? but IDN Prohibited: ", cp);
|
||||
showError("?Not UCD ?prohibited? but IDN Prohibited ", cp, "");
|
||||
++errorCount;
|
||||
}
|
||||
|
||||
if (cp == 0x3131) {
|
||||
System.out.println("Debug: " + idnProhibited
|
||||
+ ", " + idnUnassigned
|
||||
+ ", " + nfkc.hasDecomposition(cp)
|
||||
+ ", " + ucd.getCodeAndName(nfkc.normalize(cp))
|
||||
+ ", " + ucd.getCodeAndName(nfc.normalize(cp)));
|
||||
}
|
||||
|
||||
if (!idnProhibited && ! idnUnassigned && nfkc.hasDecomposition(cp)) {
|
||||
String kc = nfkc.normalize(cp);
|
||||
String c = nfc.normalize(cp);
|
||||
if (kc.equals(c)) continue;
|
||||
int cp2;
|
||||
boolean excluded = false;
|
||||
for (int j = 0; j < kc.length(); j += UTF16.getCharCount(cp2)) {
|
||||
cp2 = UTF16.charAt(kc, j);
|
||||
if (prohibited.get(cp2)) {
|
||||
showError("Prohibited with NFKC, but output with NFC", cp, "");
|
||||
excluded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!excluded) {
|
||||
showError("Remapped to core abstract character with NFKC (but not NFC)", cp, ""); // , "\t=> " + ucd.getCodeAndName(kc));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
System.out.println();
|
||||
System.out.println("Total Errors: " + errorCount);
|
||||
System.out.println("Writing IDNCheck.txt");
|
||||
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("IDNCheck.txt");
|
||||
log.println("IDN Check");
|
||||
log.println("Total Errors: " + errorCount);
|
||||
|
||||
Iterator it = idnMap.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String description = (String) it.next();
|
||||
Map map = (Map) idnMap.get(description);
|
||||
log.println();
|
||||
log.println(description);
|
||||
log.println("Total: " + map.size());
|
||||
log.println();
|
||||
|
||||
Iterator it2 = map.keySet().iterator();
|
||||
while (it2.hasNext()) {
|
||||
Object key = it2.next();
|
||||
String line = (String) map.get(key);
|
||||
log.println(" " + line);
|
||||
}
|
||||
}
|
||||
log.close();
|
||||
}
|
||||
|
||||
static Map idnMap = new HashMap();
|
||||
|
||||
static void showError(String description, int cp) {
|
||||
System.out.println(description + ucd.getCodeAndName(cp) + " (" + ucd.getCategoryID(cp) + ")");
|
||||
static void showError(String description, int cp, String option) {
|
||||
Map probe = (Map) idnMap.get(description);
|
||||
if (probe == null) {
|
||||
probe = new TreeMap();
|
||||
idnMap.put(description, probe);
|
||||
}
|
||||
probe.put(new Integer(cp), ucd.getCodeAndName(cp) + " (" + ucd.getCategoryID(cp) + ")" + option);
|
||||
}
|
||||
|
||||
|
||||
@ -611,8 +669,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
||||
if (reason.equals("Map out")) {
|
||||
value = Utility.fromHex(parts[1]);
|
||||
Utility.fixDot();
|
||||
System.out.println("Note, Mapping Out: " + ucd.getCodeAndName(cp)
|
||||
+ ", " + ucd.getCodeAndName(value) + ", " + ucd.getCategoryID(cp));
|
||||
showError("Mapping Out: ", cp, "");
|
||||
mappedOut.set(cp);
|
||||
}
|
||||
idnFold.put(key, value);
|
||||
@ -1033,26 +1090,37 @@ E0020-E007F; [TAGGING CHARACTERS]
|
||||
int sum = 0;
|
||||
long start, end;
|
||||
|
||||
java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = count; i >= 0; --i) {
|
||||
sum += dummy0(i).length();
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
double base = end - start;
|
||||
|
||||
System.out.println("unsynchronized static char[]: " + nf.format((end - start)/base));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = count; i >= 0; --i) {
|
||||
sum += dummy2(i).length();
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("synchronized: " + (end - start));
|
||||
System.out.println("synchronized static char[]: " + nf.format((end - start)/base));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = count; i >= 0; --i) {
|
||||
sum += dummy1(i).length();
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("char[] each time: " + (end - start));
|
||||
System.out.println("char[] each time: " + nf.format((end - start)/base));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = count; i >= 0; --i) {
|
||||
sum += dummy3(i).length();
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("String +: " + (end - start));
|
||||
System.out.println("two valueofs: " + nf.format((end - start)/base));
|
||||
|
||||
System.out.println(sum);
|
||||
}
|
||||
@ -1074,6 +1142,12 @@ E0020-E007F; [TAGGING CHARACTERS]
|
||||
}
|
||||
}
|
||||
|
||||
static String dummy0(int a) {
|
||||
temp2[0] = (char)(a >>> 16);
|
||||
temp2[1] = (char)a;
|
||||
return new String(temp2);
|
||||
}
|
||||
|
||||
static String dummy3(int a) {
|
||||
return String.valueOf((char)(a >>> 16)) + (char)a;
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/IntStack.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:52 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,7 +17,7 @@ package com.ibm.text.utility;
|
||||
// Simple stack mechanism, with push, pop and access
|
||||
// =============================================================
|
||||
|
||||
public final class IntStack {
|
||||
public final class IntStack implements Comparable {
|
||||
private int[] values;
|
||||
private int top = 0;
|
||||
|
||||
@ -51,4 +51,31 @@ public final class IntStack {
|
||||
public boolean isEmpty() {
|
||||
return top == 0;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
top = 0;
|
||||
}
|
||||
|
||||
public int compareTo(Object other) {
|
||||
IntStack that = (IntStack) other;
|
||||
int min = top;
|
||||
if (min < that.top) min = that.top;
|
||||
for (int i = 0; i < min; ++i) {
|
||||
int result = values[i] - that.values[i];
|
||||
if (result != 0) return result;
|
||||
}
|
||||
return top - that.top;
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
return compareTo(other) == 0;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
int result = top;
|
||||
for (int i = 0; i < top; ++i) {
|
||||
result = result * 37 + values[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -5,15 +5,15 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Pair.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:52 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.utility;
|
||||
|
||||
public final class Pair implements java.lang.Comparable {
|
||||
public final class Pair implements java.lang.Comparable, Cloneable {
|
||||
|
||||
public Comparable first, second;
|
||||
|
||||
@ -41,4 +41,12 @@ public final class Pair implements java.lang.Comparable {
|
||||
if (trial != 0) return trial;
|
||||
return second.compareTo(that.second);
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
try {
|
||||
return super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2001/09/19 23:33:52 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -56,7 +56,7 @@ public final class UTF8StreamWriter extends Writer {
|
||||
TRAILING_TOP = 0x80;
|
||||
|
||||
private static final int MAGIC = 0x10000 + ((0 - 0xD800) << 10) + (0 - 0xDC00);
|
||||
|
||||
|
||||
public final void write(char[] buffer, int cStart, int cLength) throws IOException {
|
||||
int cEnd = cStart + cLength;
|
||||
while (cStart < cEnd) {
|
||||
@ -71,6 +71,8 @@ public final class UTF8StreamWriter extends Writer {
|
||||
// get code point
|
||||
|
||||
int utf32 = buffer[cStart++];
|
||||
|
||||
if (utf32 == 0x0D) continue; // skip write
|
||||
|
||||
// special check for surrogates
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2001/09/06 01:29:03 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2001/09/19 23:33:52 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -408,12 +408,15 @@ public final class Utility { // COMMON UTILITIES
|
||||
|
||||
private static final String[] searchPath = {
|
||||
"EXTRAS",
|
||||
"3.1.2",
|
||||
"3.2.0",
|
||||
"3.1.1",
|
||||
"3.1.0",
|
||||
"3.0.1",
|
||||
"3.0.0",
|
||||
"2.1.9",
|
||||
"2.1.8",
|
||||
"2.1.5",
|
||||
"2.1.2",
|
||||
"2.0.0",
|
||||
"1.1.0",
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user