Added chart program, minor edits.

X-SVN-Rev: 9918
This commit is contained in:
Mark Davis 2002-09-25 06:40:14 +00:00
parent ca34222583
commit 5395623062
7 changed files with 544 additions and 210 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
* $Date: 2002/05/31 01:41:03 $
* $Revision: 1.9 $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -164,8 +164,8 @@ public class GenOverlap implements UCD_Types {
static boolean PROGRESS = false;
static void fullCheck() throws IOException {
PrintWriter log = Utility.openPrintWriter("Overlap.html");
PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt");
PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS);
PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS);
Iterator it = completes.keySet().iterator();
int counter = 0;
@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types {
newKeys.removeAll(joint);
oldKeys.removeAll(joint);
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false, false);
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS);
Iterator it = list.iterator();
int last = -1;
while (it.hasNext()) {
@ -631,7 +631,7 @@ public class GenOverlap implements UCD_Types {
System.out.println("Data Gathered");
PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html");
PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html", Utility.UTF8_WINDOWS);
Utility.writeHtmlHeader(log, "Check Hash");
log.println("<h1>Collisions</h1>");
log.println("<p>Shows collisions among primary values when hashed to table size = " + tableLength + ".");
@ -694,7 +694,7 @@ public class GenOverlap implements UCD_Types {
}
public static void listCyrillic(UCA collatorIn) throws IOException {
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false, false);
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", Utility.UTF8_WINDOWS);
Set set = new TreeSet(collatorIn);
Set set2 = new TreeSet(collatorIn);
ucd = UCD.make();

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
* $Date: 2002/07/03 02:15:47 $
* $Revision: 1.9 $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -55,6 +55,7 @@ public class Main {
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2002/07/03 02:15:47 $
* $Revision: 1.11 $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -19,6 +19,11 @@ import java.io.*;
import com.ibm.text.UCD.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UnicodeSet;
import java.text.SimpleDateFormat;
public class WriteCharts implements UCD_Types {
@ -82,7 +87,7 @@ public class WriteCharts implements UCD_Types {
Utility.copyTextFile("charts.css", false, folder + "charts.css");
Utility.copyTextFile("help.html", true, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
Utility.appendFile("index_header.html", true, indexFile, replacement);
/*
@ -242,7 +247,7 @@ public class WriteCharts implements UCD_Types {
Utility.copyTextFile("charts.css", false, folder + "charts.css");
Utility.copyTextFile("norm_help.html", true, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
Utility.appendFile("index_header.html", true, indexFile, replacement);
/*
@ -351,7 +356,7 @@ public class WriteCharts implements UCD_Types {
Utility.copyTextFile("charts.css", false, folder + "charts.css");
Utility.copyTextFile("case_help.html", true, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
Utility.appendFile("index_header.html", true, indexFile, replacement);
/*
@ -505,7 +510,7 @@ public class WriteCharts implements UCD_Types {
Utility.copyTextFile("charts.css", false, folder + "charts.css");
Utility.copyTextFile("name_help.html", true, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
Utility.appendFile("index_header.html", true, indexFile, replacement);
int columnCount = 0;
@ -614,7 +619,7 @@ public class WriteCharts implements UCD_Types {
String hover = scriptName.equals(shortScriptName) ? "" : "' title='" + shortScriptName;
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
Utility.fixDot();
System.out.println("Writing: " + scriptName);
indexFile.println(" <a href = '" + fileName + hover + "'>" + scriptName + "</a>");
@ -629,7 +634,7 @@ public class WriteCharts implements UCD_Types {
static PrintWriter openFile2(int count, String directory, String name) throws IOException {
String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
Utility.fixDot();
System.out.println("Writing: " + name);
indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
@ -719,6 +724,203 @@ public class WriteCharts implements UCD_Types {
return false;
}
static final Transliterator addCircle = Transliterator.createFromRules(
"any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD);
public static void writeCompositionChart() throws IOException {
Default.setUCD();
UCA uca = new UCA(null,"");
Set letters = new TreeSet();
Set marks = new TreeSet(uca);
Set totalMarks = new TreeSet(uca);
Map decomposes = new HashMap();
Set notPrinted = new TreeSet(new UTF16.StringComparator());
Set printed = new HashSet();
// UnicodeSet latin = new UnicodeSet("[:latin:]");
PrintWriter out = Utility.openPrintWriter("composition_chart.html", Utility.UTF8_WINDOWS);
try {
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
out.println("<style>");
out.println("body { font-family: Arial Unicode MS }");
out.println("td { text-align: Center ; vertical-align: top; width: 1%; background-color: #EEEEEE }");
out.println("tt { font-size: 50% }");
out.println("table { width='1%' }");
out.println(".w { background-color: #FFFFFF }");
out.println(".h { background-color: #EEEEFF }");
out.println(".r { background-color: #FF0000 }");
out.println("</style>");
out.println("</head><body bgcolor='#FFFFFF'>");
out.println("<h1>Composites</h1>");
UnicodeSetIterator it = new UnicodeSetIterator();
for (byte script = 0; script < UCD_Types.LIMIT_SCRIPT; ++script) {
String scriptName = "";
try {
scriptName = Default.ucd.getScriptID_fromIndex(script);
Utility.fixDot();
System.out.println(scriptName);
} catch (IllegalArgumentException e) {
System.out.println("Failed to create transliterator for: " + scriptName + "(" + script + ")");
continue;
}
letters.clear();
letters.add(""); // header row
marks.clear();
notPrinted.clear();
printed.clear();
for (int cp = 0; cp < 0x10FFFF; ++cp) {
byte type = Default.ucd.getCategory(cp);
if (type == Default.ucd.UNASSIGNED || type == Default.ucd.PRIVATE_USE) continue; // skip chaff
Utility.dot(cp);
byte newScript = Default.ucd.getScript(cp);
if (newScript != script) continue;
String source = UTF16.valueOf(cp);
String decomp = Default.nfd.normalize(source);
if (decomp.equals(source)) continue;
// pick up all decompositions
int count = UTF16.getCharCount(UTF16.charAt(decomp, 0));
if (count == decomp.length()) {
notPrinted.add(source);
continue; // skip unless marks
}
if (UCD.isHangulSyllable(cp)) count = 2;
String first = decomp.substring(0, count);
String second = decomp.substring(count);
//if (!markSet.containsAll(second)) continue; // skip unless marks
letters.add(first);
marks.add(second);
Utility.addToSet(decomposes, decomp, source);
notPrinted.add(source);
if (source.equals("\u212b")) System.out.println("A-RING!");
}
if (marks.size() != 0) {
totalMarks.addAll(marks);
out.println("<table border='1' cellspacing='0'>");
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " × " + marks.size() + ")</caption>");
Iterator it2 = letters.iterator();
while (it2.hasNext()) {
String let = (String)it2.next();
out.println("<tr>" + showCell(Default.nfc.normalize(let), "class='h'"));
Iterator it3 = marks.iterator();
while (it3.hasNext()) {
String mark = (String)it3.next();
String merge = let + mark;
if (let.length() != 0 && decomposes.get(merge) == null) {
out.println("<td>&nbsp;</td>");
continue;
}
String comp;
try {
comp = Default.nfc.normalize(merge);
} catch (Exception e) {
System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">");
continue;
}
// skip unless single char or header
/*if (let.length() != 0
&& (UTF16.countCodePoint(comp) != 1 || comp.equals(merge))) {
out.println("<td class='x'>&nbsp;</td>");
continue;
}
*/
Set decomps = (Set) decomposes.get(merge);
if (let.length() == 0) {
printed.add(comp);
out.println(showCell(comp, "class='h'"));
} else if (decomps.contains(comp)) {
printed.add(comp);
out.println(showCell(comp, "class='w'"));
} else {
comp = (String) new ArrayList(decomps).get(0);
printed.add(comp);
out.println(showCell(comp, "class='r'"));
}
}
out.println("</tr>");
}
out.println("</table><br>");
//out.println("<table><tr><th>Other Letters</th><th>Other Marks</th></tr><tr><td>");
//tabulate(out, atomics.iterator(),16);
//out.println("</td><td>");
//out.println("</td></tr></table>");
}
notPrinted.removeAll(printed);
if (notPrinted.size() != 0) {
tabulate(out, scriptName + " Excluded", notPrinted.iterator(), 24, "class='r'");
out.println("<br>");
}
}
Set otherMarks = new TreeSet(uca);
UnicodeSet markSet = new UnicodeSet("[[:Me:][:Mn:]]");
it.reset(markSet);
while (it.next()) {
int cp = it.codepoint;
String source = UTF16.valueOf(cp);
if (totalMarks.contains(source)) continue; // skip all that we have already
otherMarks.add(source);
}
tabulate(out, "Marks that never combine", otherMarks.iterator(), 24, "class='b'");
out.println("</body></html>");
} finally {
out.close();
}
}
public static void tabulate(PrintWriter out, String caption, Iterator it2, int limit, String classType) {
int count = 0;
out.println("<table border='1' cellspacing='0'><tr>");
if (caption != null && caption.length() != 0) {
out.println("<caption>" + caption + "</caption>");
}
while (it2.hasNext()) {
if (++count > limit) {
out.println("</tr><tr>");
count = 1;
}
out.println(showCell((String)it2.next(), classType));
}
out.println("</tr></table>");
}
public static String showCell(String comp, String classType) {
if (comp == null) {
return "<td "
+ classType + (classType.length() != 0 ? " " : "")
+ ">&nbsp;</td>";
}
return "<td "
+ classType + (classType.length() != 0 ? " " : "")
+ "title='" + Utility.hex(comp) + " " + Default.ucd.getName(comp) + "'>" + addCircle.transliterate(comp)
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2002/07/15 15:23:01 $
* $Revision: 1.26 $
* $Date: 2002/09/25 06:40:14 $
* $Revision: 1.27 $
*
*******************************************************************************
*/
@ -144,7 +144,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, false);
// new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
// log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
log = Utility.openPrintWriter("CaseFolding_data.js", false, false);
log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
log.println("var CF = new Object();");
int count = 0;
while (true) {
@ -189,7 +189,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
//log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
log = Utility.openPrintWriter("Normalization_data.js", false, false);
log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
int count = 0;
@ -318,7 +318,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}
}
PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, true);
PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
//if (!shortPrint) log.write('\uFEFF');
log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
log.println("# Generated: " + getNormalDate());
@ -702,7 +702,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}*/
static void testCompatibilityCharacters() throws IOException {
log = Utility.openPrintWriter("UCA_CompatComparison.txt");
log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
int[] kenCes = new int[50];
int[] markCes = new int[50];
@ -1196,7 +1196,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
"UTF8"),
32*1024));
*/
PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", false, false);
PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
diLog.write('\uFEFF');
@ -1234,7 +1234,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
"UTF8"),
32*1024));
*/
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", false, false);
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.LATIN1_WINDOWS);
diLog.write('\uFEFF');
@ -1413,7 +1413,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
"UTF8"),
32*1024));
*/
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", false, false);
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.LATIN1_WINDOWS);
diLog.write('\uFEFF');
@ -1660,7 +1660,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
int[] lenArray = new int[1];
Set alreadyDone = new HashSet();
PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", false, false);
PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.LATIN1_WINDOWS);
while (true) {
String s = cc.next(ces, lenArray);
@ -1784,7 +1784,7 @@ F900..FAFF; CJK Compatibility Ideographs
if (shortPrint) filename += "_SHORT";
if (option == IN_XML) filename += ".xml"; else filename += ".txt";
log = Utility.openPrintWriter(filename, false, false);
log = Utility.openPrintWriter(filename, Utility.LATIN1_WINDOWS);
String[] commentText = {
"UCA Rules",
@ -3951,7 +3951,7 @@ static int swapCJK(int i) {
Default.setUCD();
//log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
log = Utility.openPrintWriter("CheckCollationValidity.html", false, false);
log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS);
log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
log.println("<title>UCA Validity Log</title>");
@ -4618,7 +4618,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException {
PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html");
PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS);
copyFile(out, "HTML-Part1.txt");
/*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2002/08/09 23:56:24 $
* $Revision: 1.22 $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.23 $
*
*******************************************************************************
*/
@ -73,11 +73,15 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2002/08/04 21:38:45 $
* $Revision: 1.17 $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -964,6 +964,9 @@ to guarantee identifier closure.
public boolean hasComputableName(int codePoint) {
if (codePoint >= 0xF900 && codePoint <= 0xFA2D) return true;
if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true;
if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
int rangeStart = mapToRepresentative(codePoint, major < 2);
switch (rangeStart) {
default:

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2002/08/04 21:38:44 $
* $Revision: 1.24 $
* $Date: 2002/09/25 06:40:14 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
@ -18,11 +18,16 @@ import java.text.*;
import java.io.*;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.Replaceable;
import com.ibm.icu.text.ReplaceableString;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.text.UCD.*;
public final class Utility implements UCD_Types { // COMMON UTILITIES
static final boolean UTF8 = true; // TODO -- make argument
public static final char BOM = '\uFEFF';
public static String[] append(String[] array1, String[] array2) {
String[] temp = new String[array1.length + array2.length];
@ -335,6 +340,83 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return output.toString();
}
public static final class Position {
public int start, limit;
}
/**
* Finds the next position in the text that matches.
* @param divider A UnicodeMatcher, such as a UnicodeSet.
* @text obvious
* @offset starting offset
* @output start and limit of the piece found. If the return is false, then start,limit = length
* @return true iff match found
*/
public static boolean next(UnicodeMatcher matcher, Replaceable text, int offset,
Position output) {
int[] io = new int[1]; // TODO replace later; extra object creation
int limit = text.length();
// don't worry about surrogates; matcher will handle
for (int i = offset; i <= limit; ++i) {
io[0] = i;
if (matcher.matches(text, io, limit, false) == UnicodeMatcher.U_MATCH) {
// a hit, return
output.start = i;
output.limit = io[0];
return true;
}
}
output.start = output.limit = limit;
return false;
}
/**
* Finds the next position in the text that matches.
* @param divider A UnicodeMatcher, such as a UnicodeSet.
* @text obvious
* @offset starting offset
* @output start and limit of the piece found. If the return is false, then start,limit = 0
* @return true iff match found
*/
public static boolean previous(UnicodeMatcher matcher, Replaceable text, int offset,
Position output) {
int[] io = new int[1]; // TODO replace later; extra object creation
int limit = 0;
// don't worry about surrogates; matcher will handle
for (int i = offset; i >= limit; --i) {
io[0] = i;
if (matcher.matches(text, io, offset, false) == UnicodeMatcher.U_MATCH) {
// a hit, return
output.start = i;
output.limit = io[0];
return true;
}
}
output.start = output.limit = limit;
return false;
}
/**
* Splits a string containing divider into pieces, storing in output
* and returns the number of pieces. The string does not have to be terminated:
* the segment after the last divider is returned in the last output element.
* Thus if the string has no dividers, then the whole string is returned in output[0]
* with a return value of 1.
* @param divider A UnicodeMatcher, such as a UnicodeSet.
* @param s the text to be divided
* @param output where the resulting pieces go
* @return the number of items put into output
*/
public static int split(UnicodeMatcher divider, Replaceable text, Position[] output) {
int index = 0;
for (int offset = 0;; offset = output[index-1].limit) {
if (output[index] == null) output[index] = new Position();
boolean matches = next(divider, text, offset, output[index++]);
if (!matches) return index;
}
}
/**
* Splits a string containing divider into pieces, storing in output
* and returns the number of pieces.
@ -358,14 +440,14 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
public static String[] split(String s, char divider) {
String[] result = new String[100];
String[] result = new String[100]; // HACK
int count = split(s, divider, result);
return extract(result, 0, count);
}
public static String[] extract(String[] source, int start, int end) {
String[] result = new String[end-start];
System.arraycopy(source, start, result, 0, end - start);
public static String[] extract(String[] source, int start, int limit) {
String[] result = new String[limit-start];
System.arraycopy(source, start, result, 0, limit - start);
return result;
}
@ -564,7 +646,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
// Or if they are UTF8, use true, false
public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
File file = new File(getOutputName(filename));
System.out.println("Creating File: " + file);
Utility.fixDot();
System.out.println("Creating File: " + file.getCanonicalPath());
File parent = new File(file.getParent());
//System.out.println("Creating File: "+ parent);
parent.mkdirs();
@ -609,6 +692,28 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
}
public static void print(PrintWriter pw, Map c, String pairSeparator, String separator, Breaker b) {
Iterator it = c.keySet().iterator();
boolean first = true;
Object last = null;
while (it.hasNext()) {
Object obj = it.next();
Object result = c.get(obj);
if (b != null && !b.filter(obj)) continue;
if (first) {
first = false;
} else {
pw.print(separator);
}
if (b != null) {
pw.print(b.get(obj, last) + pairSeparator + result);
} else {
pw.print(obj + pairSeparator + result);
}
last = obj;
}
}
public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
appendFile(filename, utf8, output, null);
}
@ -870,19 +975,35 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
static PrintWriter showSetNamesPw;
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
showSetNames(showSetNamesPw, prefix, set, separateLines, false, ucd);
showSetNamesPw.flush();
showSetNames(prefix, set, separateLines, false, false, ucd);
}
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
showSetNames(prefix, set, separateLines, IDN, false, ucd);
}
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
showSetNames( pw, prefix, set, separateLines, IDN, false, ucd);
}
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, boolean withChar, UCD ucd) {
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
showSetNames(showSetNamesPw, prefix, set, separateLines, IDN, withChar, ucd);
showSetNamesPw.flush();
}
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN,
boolean withChar, UCD ucd) {
int count = set.getRangeCount();
for (int i = 0; i < count; ++i) {
int start = set.getRangeStart(i);
int end = set.getRangeEnd(i);
if (separateLines || (IDN && isSeparateLineIDN(start,end,ucd))) {
for (int cp = start; cp <= end; ++cp) {
if (!IDN) pw.println(prefix + ucd.getCodeAndName(cp));
if (!IDN) pw.println(prefix + ucd.getCode(cp)
+ "\t# "
+ (withChar ? " (" + UTF16.valueOf(cp) + ") " : "")
+ ucd.getName(cp));
else {
pw.println(prefix + Utility.hex(cp,4) + "; " + ucd.getName(cp));
}
@ -891,7 +1012,10 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
if (!IDN) {
pw.println(prefix + ucd.getCode(start)
+ ((start != end) ? (".." + ucd.getCode(end)) : "")
+ "\t# " + ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
+ "\t# "
+ (withChar ? " (" + UTF16.valueOf(start)
+ ((start != end) ? (".." + UTF16.valueOf(end)) : "") + ") " : "")
+ ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
);
} else {