2001-09-19 23:33:52 +00:00
|
|
|
/**
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2002-09-25 06:40:14 +00:00
|
|
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
2004-02-12 08:23:19 +00:00
|
|
|
* $Date: 2004/02/12 08:23:19 $
|
|
|
|
* $Revision: 1.20 $
|
2001-09-19 23:33:52 +00:00
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.ibm.text.UCA;
|
|
|
|
|
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
import java.io.*;
|
|
|
|
import com.ibm.text.UCD.*;
|
|
|
|
import com.ibm.text.utility.*;
|
2002-03-15 01:57:01 +00:00
|
|
|
import com.ibm.icu.text.UTF16;
|
2002-09-25 06:40:14 +00:00
|
|
|
import com.ibm.icu.text.UnicodeSetIterator;
|
|
|
|
import com.ibm.icu.text.Transliterator;
|
|
|
|
import com.ibm.icu.text.UnicodeSet;
|
2002-10-03 22:58:17 +00:00
|
|
|
import java.text.NumberFormat;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
import java.text.SimpleDateFormat;
|
2001-09-19 23:33:52 +00:00
|
|
|
|
|
|
|
public class WriteCharts implements UCD_Types {
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
static boolean HACK_KANA = false;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
static public void special() {
|
2004-02-07 01:01:17 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
for (int i = 0xE000; i < 0x10000; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isRepresented(i)) continue;
|
|
|
|
if (!Default.nfkc().isNormalized(i)) continue;
|
|
|
|
System.out.println(Default.ucd().getCodeAndName(i));
|
2002-05-29 02:01:00 +00:00
|
|
|
}
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static public void collationChart(UCA uca) throws IOException {
|
2002-04-23 01:59:16 +00:00
|
|
|
Default.setUCD(uca.getUCDVersion());
|
|
|
|
HACK_KANA = true;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
uca.setAlternate(UCA.NON_IGNORABLE);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
//Normalizer nfd = new Normalizer(Normalizer.NFD);
|
|
|
|
//Normalizer nfc = new Normalizer(Normalizer.NFC);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
|
2001-10-26 23:33:48 +00:00
|
|
|
cc.enableSamples();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
Set set = new TreeSet();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
while (true) {
|
|
|
|
String x = cc.next();
|
|
|
|
if (x == null) break;
|
2002-04-23 01:59:16 +00:00
|
|
|
if (x.equals("\u2F00")) {
|
|
|
|
System.out.println("debug");
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
set.add(new Pair(uca.getSortKey(x), x));
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
PrintWriter output = null;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
Iterator it = set.iterator();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-26 23:33:48 +00:00
|
|
|
byte oldScript = -127;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-26 23:33:48 +00:00
|
|
|
int[] scriptCount = new int[128];
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
int counter = 0;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-25 20:35:42 +00:00
|
|
|
String lastSortKey = "\u0000";
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
int high = uca.getSortKey("a").charAt(0);
|
|
|
|
int variable = UCA.getPrimary(uca.getVariableHigh());
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
int columnCount = 0;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
String[] replacement = new String[] {"%%%", "Collation Charts"};
|
|
|
|
String folder = "charts\\uca\\";
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
|
|
|
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
|
|
|
Utility.copyTextFile("help.html", Utility.UTF8, folder + "help.html");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.appendFile("index_header.html", Utility.UTF8, indexFile, replacement);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-26 23:33:48 +00:00
|
|
|
/*
|
2001-09-19 23:33:52 +00:00
|
|
|
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
indexFile.println("<title>UCA Default Collation Table</title>");
|
|
|
|
indexFile.println("<base target='main'>");
|
2001-10-25 20:35:42 +00:00
|
|
|
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
2001-09-19 23:33:52 +00:00
|
|
|
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
|
|
|
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
2001-10-26 23:33:48 +00:00
|
|
|
*/
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
while (it.hasNext()) {
|
|
|
|
Utility.dot(counter);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
Pair p = (Pair) it.next();
|
|
|
|
String sortKey = (String) p.first;
|
|
|
|
String s = (String) p.second;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
int cp = UTF16.charAt(s,0);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
byte script = Default.ucd().getScript(cp);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
// get first non-zero primary
|
2002-04-23 01:59:16 +00:00
|
|
|
int currentPrimary = getFirstPrimary(sortKey);
|
|
|
|
int primary = currentPrimary >>> 16;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
if (sortKey.length() < 4) script = NULL_ORDER;
|
|
|
|
else if (primary == 0) script = IGNORABLE_ORDER;
|
2003-08-22 16:51:21 +00:00
|
|
|
else if (primary <= variable) script = VARIABLE_ORDER;
|
2001-09-19 23:33:52 +00:00
|
|
|
else if (primary < high) script = COMMON_SCRIPT;
|
2002-07-03 02:15:47 +00:00
|
|
|
else if (UCA.isImplicitLeadPrimary(primary)) {
|
|
|
|
if (primary < UCA_Types.UNSUPPORTED_CJK_AB_BASE) script = CJK;
|
|
|
|
else if (primary < UCA_Types.UNSUPPORTED_OTHER_BASE) script = CJK_AB;
|
|
|
|
else script = UNSUPPORTED;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-26 23:33:48 +00:00
|
|
|
if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
|
|
|
|
else if ((script == INHERITED_SCRIPT || script == COMMON_SCRIPT) && oldScript >= 0) script = oldScript;
|
|
|
|
|
2002-09-25 06:40:14 +00:00
|
|
|
if (script != oldScript
|
2001-10-26 23:33:48 +00:00
|
|
|
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
|
|
|
) {
|
2001-09-19 23:33:52 +00:00
|
|
|
closeFile(output);
|
|
|
|
output = null;
|
2001-10-26 23:33:48 +00:00
|
|
|
oldScript = script;
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
if (output == null) {
|
|
|
|
++scriptCount[script+3];
|
|
|
|
if (scriptCount[script+3] > 1) {
|
2002-09-25 06:40:14 +00:00
|
|
|
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
|
2004-02-07 01:01:17 +00:00
|
|
|
getChunkName(script, LONG) + ", " + Default.ucd().getCodeAndName(s));
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
2002-04-23 22:50:15 +00:00
|
|
|
output = openFile(scriptCount[script+3], folder, script);
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
boolean firstPrimaryEquals = currentPrimary == getFirstPrimary(lastSortKey);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-25 20:35:42 +00:00
|
|
|
int strength = uca.strengthDifference(sortKey, lastSortKey);
|
|
|
|
if (strength < 0) strength = -strength;
|
2001-09-19 23:33:52 +00:00
|
|
|
lastSortKey = sortKey;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-25 20:35:42 +00:00
|
|
|
// find out if this is an expansion: more than one primary weight
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-25 20:35:42 +00:00
|
|
|
int primaryCount = 0;
|
|
|
|
for (int i = 0; i < sortKey.length(); ++i) {
|
|
|
|
char w = sortKey.charAt(i);
|
|
|
|
if (w == 0) break;
|
2002-06-02 05:07:08 +00:00
|
|
|
if (UCA.isImplicitLeadPrimary(w)) {
|
2002-04-23 01:59:16 +00:00
|
|
|
++i; // skip next
|
|
|
|
}
|
2001-10-25 20:35:42 +00:00
|
|
|
++ primaryCount;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
String breaker = "";
|
2001-10-25 20:35:42 +00:00
|
|
|
if (columnCount > 10 || !firstPrimaryEquals) {
|
2001-09-19 23:33:52 +00:00
|
|
|
columnCount = 0;
|
2002-04-23 01:59:16 +00:00
|
|
|
if (!firstPrimaryEquals || script == UNSUPPORTED) breaker = "</tr><tr>";
|
|
|
|
else {
|
|
|
|
breaker = "</tr><tr><td></td>"; // indent 1 cell
|
|
|
|
++columnCount;
|
|
|
|
}
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-10-25 20:35:42 +00:00
|
|
|
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
String outline = showCell2(sortKey, s, script, classname);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
output.println(breaker + outline);
|
2001-09-19 23:33:52 +00:00
|
|
|
++columnCount;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
closeFile(output);
|
2002-04-23 22:50:15 +00:00
|
|
|
closeIndexFile(indexFile, "<br>UCA: " + uca.getDataVersion(), COLLATION);
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
private static String showCell2(
|
|
|
|
String sortKey,
|
|
|
|
String s,
|
|
|
|
byte script,
|
|
|
|
String classname) {
|
2004-02-07 01:01:17 +00:00
|
|
|
String name = Default.ucd().getName(s);
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
if (s.equals("\u1eaf")) {
|
|
|
|
System.out.println("debug");
|
|
|
|
}
|
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
String comp = Default.nfc().normalize(s);
|
|
|
|
int cat = Default.ucd().getCategory(UTF16.charAt(comp,0));
|
2004-02-06 18:32:05 +00:00
|
|
|
if (cat == Mn || cat == Mc || cat == Me) {
|
|
|
|
comp = '\u25CC' + comp;
|
|
|
|
if (s.equals("\u0300")) {
|
2004-02-07 01:01:17 +00:00
|
|
|
System.out.println(Default.ucd().getCodeAndName(comp));
|
2004-02-06 18:32:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// TODO: merge with showCell
|
|
|
|
|
|
|
|
String outline = classname
|
|
|
|
+ " title='"
|
|
|
|
+ (script != UNSUPPORTED
|
|
|
|
? Utility.quoteXML(name, true) + ": "
|
|
|
|
: "")
|
|
|
|
+ UCA.toString(sortKey) + "'>"
|
|
|
|
+ Utility.quoteXML(comp, true)
|
|
|
|
+ "<br><tt>"
|
|
|
|
+ Utility.hex(s)
|
|
|
|
//+ "<br>" + script
|
|
|
|
+ "</tt></td>"
|
|
|
|
+ (script == UNSUPPORTED
|
|
|
|
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
|
|
|
|
: "")
|
|
|
|
;
|
|
|
|
return outline;
|
|
|
|
}
|
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
static public void normalizationChart() throws IOException {
|
|
|
|
HACK_KANA = false;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Set set = new TreeSet();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
for (int i = 0; i <= 0x10FFFF; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isRepresented(i)) {
|
2002-04-24 02:38:53 +00:00
|
|
|
if (i < 0xAC00) continue;
|
|
|
|
if (i > 0xD7A3) continue;
|
|
|
|
if (i > 0xACFF && i < 0xD700) continue;
|
|
|
|
}
|
2004-02-07 01:01:17 +00:00
|
|
|
byte cat = Default.ucd().getCategory(i);
|
2002-04-23 01:59:16 +00:00
|
|
|
if (cat == Cs || cat == Co) continue;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
if (Default.nfkd().isNormalized(i)) continue;
|
|
|
|
String decomp = Default.nfkd().normalize(i);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
byte script = getBestScript(decomp);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
2004-02-07 01:01:17 +00:00
|
|
|
new Pair(Default.ucd().getCase(decomp, FULL, FOLD),
|
2002-04-23 01:59:16 +00:00
|
|
|
new Integer(i))));
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
PrintWriter output = null;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Iterator it = set.iterator();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
int oldScript = -127;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
int counter = 0;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
String[] replacement = new String[] {"%%%", "Normalization Charts"};
|
|
|
|
String folder = "charts\\normalization\\";
|
|
|
|
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
|
|
|
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
|
|
|
Utility.copyTextFile("norm_help.html", Utility.UTF8, folder + "help.html");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.appendFile("index_header.html", Utility.UTF8, indexFile, replacement);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
/*
|
|
|
|
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
indexFile.println("<title>UCA Default Collation Table</title>");
|
|
|
|
indexFile.println("<base target='main'>");
|
|
|
|
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
|
|
|
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
|
|
|
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
|
|
|
*/
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
while (it.hasNext()) {
|
|
|
|
Utility.dot(counter);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Pair p = (Pair) it.next();
|
|
|
|
int script = ((Integer) p.first).intValue();
|
|
|
|
int cp = ((Integer)((Pair) p.second).second).intValue();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
if (script != oldScript
|
2002-04-23 01:59:16 +00:00
|
|
|
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
|
|
|
) {
|
|
|
|
closeFile(output);
|
|
|
|
output = null;
|
|
|
|
oldScript = script;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
if (output == null) {
|
2002-04-23 22:50:15 +00:00
|
|
|
output = openFile(0, folder, script);
|
2002-04-23 01:59:16 +00:00
|
|
|
output.println("<tr><td class='z'>Code</td><td class='z'>C</td><td class='z'>D</td><td class='z'>KC</td><td class='z'>KD</td></tr>");
|
|
|
|
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
output.println("<tr>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
String prefix;
|
|
|
|
String code = UTF16.valueOf(cp);
|
2004-02-07 01:01:17 +00:00
|
|
|
String c = Default.nfc().normalize(cp);
|
|
|
|
String d = Default.nfd().normalize(cp);
|
|
|
|
String kc = Default.nfkc().normalize(cp);
|
|
|
|
String kd = Default.nfkd().normalize(cp);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, code, "<td class='z' ", "", false);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = c.equals(code) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, c, prefix, "", c.equals(code));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = d.equals(c) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, d, prefix, "", d.equals(c));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = kc.equals(c) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, kc, prefix, "", kc.equals(c));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = (kd.equals(d) || kd.equals(kc)) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, kd, prefix, "", (kd.equals(d) || kd.equals(kc)));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
output.println("</tr>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
closeFile(output);
|
2002-04-23 22:50:15 +00:00
|
|
|
closeIndexFile(indexFile, "", NORMALIZATION);
|
2002-04-23 01:59:16 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
static public void caseChart() throws IOException {
|
|
|
|
HACK_KANA = false;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Set set = new TreeSet();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
for (int i = 0; i <= 0x10FFFF; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isRepresented(i)) continue;
|
|
|
|
byte cat = Default.ucd().getCategory(i);
|
2002-04-23 01:59:16 +00:00
|
|
|
if (cat == Cs || cat == Co) continue;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
String code = UTF16.valueOf(i);
|
2004-02-07 01:01:17 +00:00
|
|
|
String lower = Default.ucd().getCase(i, FULL, LOWER);
|
|
|
|
String title = Default.ucd().getCase(i, FULL, TITLE);
|
|
|
|
String upper = Default.ucd().getCase(i, FULL, UPPER);
|
|
|
|
String fold = Default.ucd().getCase(i, FULL, FOLD);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
String decomp = Default.nfkd().normalize(i);
|
2002-04-23 22:50:15 +00:00
|
|
|
int script = 0;
|
|
|
|
if (lower.equals(code) && upper.equals(code) && fold.equals(code) && title.equals(code)) {
|
|
|
|
if (!containsCase(decomp)) continue;
|
|
|
|
script = NO_CASE_MAPPING;
|
2002-04-23 01:59:16 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
if (script == 0) script = getBestScript(decomp);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
2004-02-07 01:01:17 +00:00
|
|
|
new Pair(Default.ucd().getCase(decomp, FULL, FOLD),
|
2002-04-23 01:59:16 +00:00
|
|
|
new Integer(i))));
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
PrintWriter output = null;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Iterator it = set.iterator();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
int oldScript = -127;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
int counter = 0;
|
2002-04-23 22:50:15 +00:00
|
|
|
String[] replacement = new String[] {"%%%", "Case Charts"};
|
|
|
|
String folder = "charts\\case\\";
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
|
|
|
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
|
|
|
Utility.copyTextFile("case_help.html", Utility.UTF8, folder + "help.html");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.appendFile("index_header.html", Utility.UTF8, indexFile, replacement);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
/*
|
|
|
|
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
indexFile.println("<title>UCA Default Collation Table</title>");
|
|
|
|
indexFile.println("<base target='main'>");
|
|
|
|
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
|
|
|
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
|
|
|
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
|
|
|
*/
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
int columnCount = 0;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
while (it.hasNext()) {
|
|
|
|
Utility.dot(counter);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
Pair p = (Pair) it.next();
|
|
|
|
int script = ((Integer) p.first).intValue();
|
|
|
|
int cp = ((Integer)((Pair) p.second).second).intValue();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
if (script != oldScript
|
2002-04-23 01:59:16 +00:00
|
|
|
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
|
|
|
) {
|
|
|
|
closeFile(output);
|
|
|
|
output = null;
|
|
|
|
oldScript = script;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
if (output == null) {
|
2002-04-23 22:50:15 +00:00
|
|
|
output = openFile(0, folder, script);
|
|
|
|
if (script == NO_CASE_MAPPING) output.println("<tr>");
|
|
|
|
else output.println("<tr><td class='z'>Code</td><td class='z'>Lower</td><td class='z'>Title</td>"
|
|
|
|
+"<td class='z'>Upper</td><td class='z'>Fold</td></tr>");
|
2002-04-23 01:59:16 +00:00
|
|
|
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
if (script == NO_CASE_MAPPING) {
|
|
|
|
if (columnCount > 10) {
|
|
|
|
output.println("</tr><tr>");
|
|
|
|
columnCount = 0;
|
|
|
|
}
|
|
|
|
showCell(output, UTF16.valueOf(cp), "<td ", "", false);
|
|
|
|
++columnCount;
|
|
|
|
continue;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
output.println("<tr>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
String prefix;
|
|
|
|
String code = UTF16.valueOf(cp);
|
2004-02-07 01:01:17 +00:00
|
|
|
String lower = Default.ucd().getCase(cp, FULL, LOWER);
|
|
|
|
String title = Default.ucd().getCase(cp, FULL, TITLE);
|
|
|
|
String upper = Default.ucd().getCase(cp, FULL, UPPER);
|
|
|
|
String fold = Default.ucd().getCase(cp, FULL, FOLD);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, code, "<td class='z' ", "", false);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = lower.equals(code) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, lower, prefix, "", lower.equals(code));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = title.equals(upper) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, title, prefix, "", title.equals(upper));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
prefix = upper.equals(code) ? "<td class='g' " : "<td class='n' ";
|
2002-04-23 22:50:15 +00:00
|
|
|
showCell(output, upper, prefix, "", upper.equals(code));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
prefix = fold.equals(lower) ? "<td class='g' " : "<td class='n' ";
|
|
|
|
showCell(output, fold, prefix, "", fold.equals(lower));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
output.println("</tr>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 01:59:16 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
closeFile(output);
|
|
|
|
closeIndexFile(indexFile, "", CASE);
|
|
|
|
}
|
2003-04-25 01:39:15 +00:00
|
|
|
|
|
|
|
static public void scriptChart() throws IOException {
|
|
|
|
HACK_KANA = false;
|
|
|
|
|
|
|
|
Set set = new TreeSet();
|
|
|
|
|
|
|
|
for (int i = 0; i <= 0x10FFFF; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isRepresented(i)) continue;
|
|
|
|
byte cat = Default.ucd().getCategory(i);
|
2003-04-25 01:39:15 +00:00
|
|
|
if (cat == Cs || cat == Co || cat == Cn) continue;
|
|
|
|
|
|
|
|
String code = UTF16.valueOf(i);
|
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
String decomp = Default.nfkd().normalize(i);
|
2003-04-25 01:39:15 +00:00
|
|
|
int script = getBestScript(decomp);
|
|
|
|
|
|
|
|
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
|
|
|
new Pair(decomp,
|
|
|
|
new Integer(i))));
|
|
|
|
}
|
|
|
|
|
|
|
|
PrintWriter output = null;
|
|
|
|
|
|
|
|
Iterator it = set.iterator();
|
|
|
|
|
|
|
|
int oldScript = -127;
|
|
|
|
|
|
|
|
int counter = 0;
|
|
|
|
String[] replacement = new String[] {"%%%", "Script Charts"};
|
|
|
|
String folder = "charts\\script\\";
|
|
|
|
|
|
|
|
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
|
|
|
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
|
|
|
Utility.copyTextFile("script_help.html", Utility.UTF8, folder + "help.html");
|
|
|
|
|
|
|
|
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
|
|
|
Utility.appendFile("script_index_header.html", Utility.UTF8, indexFile, replacement);
|
|
|
|
|
|
|
|
/*
|
|
|
|
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
indexFile.println("<title>UCA Default Collation Table</title>");
|
|
|
|
indexFile.println("<base target='main'>");
|
|
|
|
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
|
|
|
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
|
|
|
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
|
|
|
*/
|
|
|
|
|
|
|
|
int columnCount = 0;
|
|
|
|
|
|
|
|
while (it.hasNext()) {
|
|
|
|
Utility.dot(counter);
|
|
|
|
|
|
|
|
Pair p = (Pair) it.next();
|
|
|
|
int script = ((Integer) p.first).intValue();
|
|
|
|
int cp = ((Integer)((Pair)p.second).second).intValue();
|
|
|
|
|
|
|
|
if (script != oldScript
|
|
|
|
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
|
|
|
) {
|
|
|
|
closeFile(output);
|
|
|
|
output = null;
|
|
|
|
oldScript = script;
|
|
|
|
columnCount = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (output == null) {
|
|
|
|
output = openFile(0, folder, script);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (columnCount > 10) {
|
|
|
|
output.println("</tr><tr>");
|
|
|
|
columnCount = 0;
|
|
|
|
}
|
|
|
|
showCell(output, UTF16.valueOf(cp), "<td ", "", false);
|
|
|
|
++columnCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
closeFile(output);
|
|
|
|
closeIndexFile(indexFile, "", CASE);
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
|
|
|
|
if (stoplist.contains(key)) return;
|
|
|
|
for (int i = 0; i < key.length(); ++i) {
|
|
|
|
char c = key.charAt(i);
|
|
|
|
if ('0' <= c && c <= '9') return;
|
|
|
|
}
|
|
|
|
Set result = (Set)m.get(key);
|
|
|
|
if (result == null) {
|
|
|
|
result = new TreeSet();
|
|
|
|
m.put(key, result);
|
|
|
|
}
|
|
|
|
result.add(ch);
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
static public void indexChart() throws IOException {
|
|
|
|
HACK_KANA = false;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
Map map = new TreeMap();
|
|
|
|
Set stoplist = new TreeSet();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"};
|
|
|
|
stoplist.addAll(Arrays.asList(stops));
|
|
|
|
System.out.println("Stop-list: " + stoplist);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
for (int i = 0; i < LIMIT_SCRIPT; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
stoplist.add(Default.ucd().getScriptID_fromIndex((byte)i));
|
2002-05-29 02:01:00 +00:00
|
|
|
}
|
|
|
|
System.out.println("Stop-list: " + stoplist);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
for (int i = 0; i <= 0x10FFFF; ++i) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isRepresented(i)) continue;
|
|
|
|
if (!Default.ucd().isAssigned(i)) continue;
|
2002-05-29 02:01:00 +00:00
|
|
|
if (0xAC00 <= i && i <= 0xD7A3) continue;
|
2004-02-07 01:01:17 +00:00
|
|
|
if (Default.ucd().hasComputableName(i)) continue;
|
2002-05-29 02:01:00 +00:00
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
String s = Default.ucd().getName(i);
|
2002-05-29 02:01:00 +00:00
|
|
|
if (s == null) continue;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
if (s.startsWith("<")) {
|
2004-02-07 01:01:17 +00:00
|
|
|
System.out.println("Weird character at " + Default.ucd().getCodeAndName(i));
|
2002-05-29 02:01:00 +00:00
|
|
|
}
|
|
|
|
String ch = UTF16.valueOf(i);
|
|
|
|
int last = -1;
|
|
|
|
int j;
|
|
|
|
for (j = 0; j < s.length(); ++j) {
|
|
|
|
char c = s.charAt(j);
|
|
|
|
if ('A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
|
|
|
if (last == -1) last = j;
|
|
|
|
} else {
|
|
|
|
if (last != -1) {
|
|
|
|
String word = s.substring(last, j);
|
|
|
|
addMapChar(map, stoplist, word, ch);
|
|
|
|
last = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (last != -1) {
|
|
|
|
String word = s.substring(last, j);
|
|
|
|
addMapChar(map, stoplist, word, ch);
|
|
|
|
}
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
PrintWriter output = null;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
Iterator it = map.keySet().iterator();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
int oldScript = -127;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
int counter = 0;
|
|
|
|
String[] replacement = new String[] {"%%%", "Name Charts"};
|
|
|
|
String folder = "charts\\name\\";
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
|
|
|
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
|
|
|
Utility.copyTextFile("name_help.html", Utility.UTF8, folder + "help.html");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
2002-10-05 02:16:17 +00:00
|
|
|
Utility.appendFile("index_header.html", Utility.UTF8, indexFile, replacement);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
int columnCount = 0;
|
|
|
|
char lastInitial = 0;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
while (it.hasNext()) {
|
|
|
|
Utility.dot(counter);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
String key = (String) it.next();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
Set chars = (Set) map.get(key);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
char initial = key.charAt(0);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
if (initial != lastInitial) {
|
|
|
|
closeFile(output);
|
|
|
|
output = null;
|
|
|
|
lastInitial = initial;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
if (output == null) {
|
|
|
|
output = openFile2(0, folder, String.valueOf(initial));
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
output.println("<tr><td class='h'>" + key + "</td>");
|
|
|
|
columnCount = 1;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
Iterator sublist = chars.iterator();
|
|
|
|
while (sublist.hasNext()) {
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
String ch = (String) sublist.next();
|
|
|
|
if (columnCount > 10) {
|
|
|
|
output.println("</tr><tr><td></td>");
|
|
|
|
columnCount = 1;
|
|
|
|
}
|
|
|
|
showCell(output, ch, "<td ", "", true);
|
|
|
|
++columnCount;
|
|
|
|
continue;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
output.println("</tr>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
closeFile(output);
|
|
|
|
closeIndexFile(indexFile, "", CASE);
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
static void showCell(PrintWriter output, String s,
|
|
|
|
String prefix, String extra, boolean skipName) {
|
|
|
|
if (s.equals("\u0300")) {
|
|
|
|
System.out.println();
|
|
|
|
}
|
2004-02-07 01:01:17 +00:00
|
|
|
String name = Default.ucd().getName(s);
|
|
|
|
String comp = Default.nfc().normalize(s);
|
|
|
|
int cat = Default.ucd().getCategory(UTF16.charAt(comp,0));
|
2004-02-06 18:32:05 +00:00
|
|
|
if (cat == Mn || cat == Mc || cat == Me) {
|
|
|
|
comp = '\u25CC' + comp;
|
|
|
|
if (s.equals("\u0300")) {
|
2004-02-07 01:01:17 +00:00
|
|
|
System.out.println(Default.ucd().getCodeAndName(comp));
|
2004-02-06 18:32:05 +00:00
|
|
|
}
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
String outline = prefix
|
2002-04-24 02:38:53 +00:00
|
|
|
+ (skipName ? "" : " title='" + Utility.quoteXML(name, true) + "'")
|
2002-04-23 22:50:15 +00:00
|
|
|
+ extra + ">"
|
2002-04-24 02:38:53 +00:00
|
|
|
+ Utility.quoteXML(comp, true)
|
2002-04-23 22:50:15 +00:00
|
|
|
+ "<br><tt>"
|
|
|
|
+ Utility.hex(s)
|
|
|
|
//+ "<br>" + script
|
|
|
|
+ "</tt></td>";
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
output.println(outline);
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static byte getBestScript(String s) {
|
|
|
|
int cp;
|
|
|
|
byte result = COMMON_SCRIPT;
|
|
|
|
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
|
|
|
cp = UTF16.charAt(s, i);
|
2004-02-07 01:01:17 +00:00
|
|
|
result = Default.ucd().getScript(cp);
|
2002-04-23 22:50:15 +00:00
|
|
|
if (result != COMMON_SCRIPT && result != INHERITED_SCRIPT) return result;
|
|
|
|
}
|
|
|
|
return COMMON_SCRIPT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int getFirstPrimary(String sortKey) {
|
|
|
|
int result = sortKey.charAt(0);
|
2002-06-02 05:07:08 +00:00
|
|
|
if (UCA.isImplicitLeadPrimary(result)) {
|
2002-04-23 22:50:15 +00:00
|
|
|
return (result << 16) | sortKey.charAt(1);
|
|
|
|
}
|
|
|
|
return (result << 16);
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static final String[] CLASSNAME = {
|
2002-09-25 06:40:14 +00:00
|
|
|
"<td class='q'",
|
|
|
|
"<td class='q'",
|
|
|
|
"<td class='q'",
|
|
|
|
"<td class='t'",
|
|
|
|
"<td class='s'",
|
2002-04-23 22:50:15 +00:00
|
|
|
"<td class='p'"};
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static final String[] XCLASSNAME = {
|
2002-09-25 06:40:14 +00:00
|
|
|
"<td class='eq'",
|
|
|
|
"<td class='eq'",
|
|
|
|
"<td class='eq'",
|
|
|
|
"<td class='et'",
|
|
|
|
"<td class='es'",
|
2002-04-23 22:50:15 +00:00
|
|
|
"<td class='ep'"};
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
|
|
|
|
static PrintWriter indexFile;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static PrintWriter openFile(int count, String directory, int script) throws IOException {
|
|
|
|
String scriptName = getChunkName(script, LONG);
|
|
|
|
String shortScriptName = getChunkName(script, SHORT);
|
|
|
|
String hover = scriptName.equals(shortScriptName) ? "" : "' title='" + shortScriptName;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
|
2002-09-25 06:40:14 +00:00
|
|
|
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
|
2002-04-23 22:50:15 +00:00
|
|
|
Utility.fixDot();
|
|
|
|
System.out.println("Writing: " + scriptName);
|
|
|
|
indexFile.println(" <a href = '" + fileName + hover + "'>" + scriptName + "</a>");
|
|
|
|
String title = "UCA: " + scriptName;
|
|
|
|
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
output.println("<title>" + title + "</title>");
|
|
|
|
output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
|
|
|
|
output.println("</head><body><h2>" + scriptName + "</h2>");
|
|
|
|
output.println("<table>");
|
|
|
|
return output;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-05-29 02:01:00 +00:00
|
|
|
static PrintWriter openFile2(int count, String directory, String name) throws IOException {
|
|
|
|
String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
|
2002-09-25 06:40:14 +00:00
|
|
|
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
|
2002-05-29 02:01:00 +00:00
|
|
|
Utility.fixDot();
|
|
|
|
System.out.println("Writing: " + name);
|
|
|
|
indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
|
|
|
|
String title = name;
|
|
|
|
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
output.println("<title>" + title + "</title>");
|
|
|
|
output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
|
|
|
|
output.println("</head><body>");
|
|
|
|
output.println("<table>");
|
|
|
|
return output;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
static final int
|
2002-04-23 22:50:15 +00:00
|
|
|
NULL_ORDER = -3,
|
|
|
|
IGNORABLE_ORDER = -2,
|
|
|
|
VARIABLE_ORDER = -1,
|
|
|
|
// scripts in here
|
2002-07-03 02:15:47 +00:00
|
|
|
CJK = 120,
|
|
|
|
CJK_AB = 121,
|
|
|
|
UNSUPPORTED = 122,
|
2002-04-23 22:50:15 +00:00
|
|
|
CAT_OFFSET = 128,
|
|
|
|
// categories in here
|
|
|
|
NO_CASE_MAPPING = 200;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static String getChunkName(int script, byte length) {
|
|
|
|
switch(script) {
|
|
|
|
case NO_CASE_MAPPING: return "NoCaseMapping";
|
|
|
|
case NULL_ORDER: return "Null";
|
|
|
|
case IGNORABLE_ORDER: return "Ignorable";
|
|
|
|
case VARIABLE_ORDER: return "Variable";
|
2002-07-03 02:15:47 +00:00
|
|
|
case CJK: return "CJK";
|
|
|
|
case CJK_AB: return "CJK-Extensions";
|
2002-04-23 22:50:15 +00:00
|
|
|
case UNSUPPORTED: return "Unsupported";
|
2002-09-25 06:40:14 +00:00
|
|
|
default:
|
2004-02-07 01:01:17 +00:00
|
|
|
if (script >= CAT_OFFSET) return Default.ucd().getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length);
|
2002-04-23 22:50:15 +00:00
|
|
|
else if (script == HIRAGANA_SCRIPT && HACK_KANA) return length == SHORT ? "Kata-Hira" : "Katakana-Hiragana";
|
2004-02-07 01:01:17 +00:00
|
|
|
else return Default.ucd().getCase(Default.ucd().getScriptID_fromIndex((byte)script, length), FULL, TITLE);
|
2002-04-23 22:50:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void closeFile(PrintWriter output) {
|
|
|
|
if (output == null) return;
|
|
|
|
output.println("</table></body></html>");
|
|
|
|
output.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static final byte COLLATION = 0, NORMALIZATION = 1, CASE = 2;
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static void closeIndexFile(PrintWriter indexFile, String extra, byte choice) {
|
2002-04-23 01:59:16 +00:00
|
|
|
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
|
2002-04-23 22:50:15 +00:00
|
|
|
df.setTimeZone(TimeZone.getTimeZone("GMT"));
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
indexFile.println("</p><hr width='50%'><p>");
|
|
|
|
boolean gotOne = false;
|
|
|
|
if (choice != COLLATION) {
|
|
|
|
indexFile.println("<a href='..\\uca\\index.html' target='_top'>Collation Charts</a>");
|
|
|
|
gotOne = true;
|
|
|
|
}
|
|
|
|
if (choice != NORMALIZATION) {
|
|
|
|
if (gotOne) indexFile.println("<br>");
|
|
|
|
indexFile.println("<a href='..\\normalization\\index.html' target='_top'>Normalization Charts</a>");
|
|
|
|
gotOne = true;
|
|
|
|
}
|
|
|
|
if (choice != CASE) {
|
|
|
|
if (gotOne) indexFile.println("<br>");
|
|
|
|
indexFile.println("<a href='..\\case\\index.html' target='_top'>Case Charts</a>");
|
|
|
|
gotOne = true;
|
|
|
|
}
|
|
|
|
indexFile.println("</p><hr width='50%'><p style='font-size: 70%'>");
|
2004-02-07 01:01:17 +00:00
|
|
|
indexFile.println("UCD: " + Default.ucd().getVersion() + extra);
|
|
|
|
indexFile.println("<br>" + Default.getDate() + " <a href='http://www.macchiato.com/' target='_top'>MED</a>");
|
2002-04-23 01:59:16 +00:00
|
|
|
indexFile.println("</p></body></html>");
|
|
|
|
indexFile.close();
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-04-23 22:50:15 +00:00
|
|
|
static boolean containsCase(String s) {
|
|
|
|
int cp;
|
|
|
|
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
|
|
|
cp = UTF16.charAt(s, i);
|
2002-09-25 06:40:14 +00:00
|
|
|
// contains Lu, Lo, Lt, or Lowercase or Uppercase
|
2004-02-07 01:01:17 +00:00
|
|
|
byte cat = Default.ucd().getCategory(cp);
|
2002-04-23 22:50:15 +00:00
|
|
|
if (cat == Lu || cat == Ll || cat == Lt) return true;
|
2004-02-07 01:01:17 +00:00
|
|
|
if (Default.ucd().getBinaryProperty(cp, Other_Lowercase)) return true;
|
|
|
|
if (Default.ucd().getBinaryProperty(cp, Other_Uppercase)) return true;
|
2002-04-23 22:50:15 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
static final Transliterator addCircle = Transliterator.createFromRules(
|
|
|
|
"any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD);
|
|
|
|
|
|
|
|
public static void writeCompositionChart() throws IOException {
|
|
|
|
UCA uca = new UCA(null,"");
|
|
|
|
|
|
|
|
Set letters = new TreeSet();
|
|
|
|
Set marks = new TreeSet(uca);
|
|
|
|
Set totalMarks = new TreeSet(uca);
|
|
|
|
Map decomposes = new HashMap();
|
|
|
|
Set notPrinted = new TreeSet(new UTF16.StringComparator());
|
|
|
|
Set printed = new HashSet();
|
|
|
|
|
|
|
|
// UnicodeSet latin = new UnicodeSet("[:latin:]");
|
|
|
|
|
|
|
|
PrintWriter out = Utility.openPrintWriter("composition_chart.html", Utility.UTF8_WINDOWS);
|
|
|
|
try {
|
|
|
|
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
out.println("<style>");
|
|
|
|
|
|
|
|
out.println("body { font-family: Arial Unicode MS }");
|
|
|
|
out.println("td { text-align: Center ; vertical-align: top; width: 1%; background-color: #EEEEEE }");
|
|
|
|
out.println("tt { font-size: 50% }");
|
|
|
|
out.println("table { width='1%' }");
|
|
|
|
out.println(".w { background-color: #FFFFFF }");
|
|
|
|
out.println(".h { background-color: #EEEEFF }");
|
|
|
|
out.println(".r { background-color: #FF0000 }");
|
|
|
|
out.println("</style>");
|
|
|
|
out.println("</head><body bgcolor='#FFFFFF'>");
|
|
|
|
out.println("<h1>Composites</h1>");
|
|
|
|
|
|
|
|
UnicodeSetIterator it = new UnicodeSetIterator();
|
|
|
|
|
|
|
|
for (byte script = 0; script < UCD_Types.LIMIT_SCRIPT; ++script) {
|
|
|
|
|
|
|
|
String scriptName = "";
|
|
|
|
try {
|
2004-02-07 01:01:17 +00:00
|
|
|
scriptName = Default.ucd().getScriptID_fromIndex(script);
|
2002-09-25 06:40:14 +00:00
|
|
|
Utility.fixDot();
|
|
|
|
System.out.println(scriptName);
|
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
|
System.out.println("Failed to create transliterator for: " + scriptName + "(" + script + ")");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
letters.clear();
|
|
|
|
letters.add(""); // header row
|
|
|
|
marks.clear();
|
|
|
|
notPrinted.clear();
|
|
|
|
printed.clear();
|
|
|
|
|
|
|
|
for (int cp = 0; cp < 0x10FFFF; ++cp) {
|
2004-02-07 01:01:17 +00:00
|
|
|
byte type = Default.ucd().getCategory(cp);
|
|
|
|
if (type == Default.ucd().UNASSIGNED || type == Default.ucd().PRIVATE_USE) continue; // skip chaff
|
2002-09-25 06:40:14 +00:00
|
|
|
Utility.dot(cp);
|
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
byte newScript = Default.ucd().getScript(cp);
|
2002-09-25 06:40:14 +00:00
|
|
|
if (newScript != script) continue;
|
|
|
|
|
|
|
|
String source = UTF16.valueOf(cp);
|
2004-02-07 01:01:17 +00:00
|
|
|
String decomp = Default.nfd().normalize(source);
|
2002-09-25 06:40:14 +00:00
|
|
|
if (decomp.equals(source)) continue;
|
|
|
|
|
|
|
|
// pick up all decompositions
|
|
|
|
int count = UTF16.getCharCount(UTF16.charAt(decomp, 0));
|
|
|
|
|
|
|
|
if (count == decomp.length()) {
|
|
|
|
notPrinted.add(source);
|
|
|
|
continue; // skip unless marks
|
|
|
|
}
|
|
|
|
|
|
|
|
if (UCD.isHangulSyllable(cp)) count = 2;
|
|
|
|
String first = decomp.substring(0, count);
|
|
|
|
String second = decomp.substring(count);
|
|
|
|
//if (!markSet.containsAll(second)) continue; // skip unless marks
|
|
|
|
|
|
|
|
letters.add(first);
|
|
|
|
marks.add(second);
|
|
|
|
Utility.addToSet(decomposes, decomp, source);
|
|
|
|
notPrinted.add(source);
|
|
|
|
if (source.equals("\u212b")) System.out.println("A-RING!");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (marks.size() != 0) {
|
|
|
|
|
|
|
|
totalMarks.addAll(marks);
|
|
|
|
|
|
|
|
|
|
|
|
out.println("<table border='1' cellspacing='0'>");
|
2003-04-25 01:39:15 +00:00
|
|
|
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " ? " + marks.size() + ")</caption>");
|
2002-09-25 06:40:14 +00:00
|
|
|
|
|
|
|
Iterator it2 = letters.iterator();
|
|
|
|
while (it2.hasNext()) {
|
|
|
|
String let = (String)it2.next();
|
2004-02-07 01:01:17 +00:00
|
|
|
out.println("<tr>" + showCell(Default.nfc().normalize(let), "class='h'"));
|
2002-09-25 06:40:14 +00:00
|
|
|
Iterator it3 = marks.iterator();
|
|
|
|
while (it3.hasNext()) {
|
|
|
|
String mark = (String)it3.next();
|
|
|
|
String merge = let + mark;
|
|
|
|
if (let.length() != 0 && decomposes.get(merge) == null) {
|
|
|
|
out.println("<td> </td>");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
String comp;
|
|
|
|
try {
|
2004-02-07 01:01:17 +00:00
|
|
|
comp = Default.nfc().normalize(merge);
|
2002-09-25 06:40:14 +00:00
|
|
|
} catch (Exception e) {
|
|
|
|
System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// skip unless single char or header
|
|
|
|
/*if (let.length() != 0
|
|
|
|
&& (UTF16.countCodePoint(comp) != 1 || comp.equals(merge))) {
|
|
|
|
out.println("<td class='x'> </td>");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
Set decomps = (Set) decomposes.get(merge);
|
|
|
|
if (let.length() == 0) {
|
|
|
|
printed.add(comp);
|
|
|
|
out.println(showCell(comp, "class='h'"));
|
|
|
|
} else if (decomps.contains(comp)) {
|
|
|
|
printed.add(comp);
|
|
|
|
out.println(showCell(comp, "class='w'"));
|
|
|
|
} else {
|
|
|
|
comp = (String) new ArrayList(decomps).get(0);
|
|
|
|
printed.add(comp);
|
|
|
|
out.println(showCell(comp, "class='r'"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out.println("</tr>");
|
|
|
|
}
|
|
|
|
out.println("</table><br>");
|
|
|
|
|
|
|
|
//out.println("<table><tr><th>Other Letters</th><th>Other Marks</th></tr><tr><td>");
|
|
|
|
//tabulate(out, atomics.iterator(),16);
|
|
|
|
//out.println("</td><td>");
|
|
|
|
//out.println("</td></tr></table>");
|
|
|
|
|
|
|
|
}
|
|
|
|
notPrinted.removeAll(printed);
|
|
|
|
if (notPrinted.size() != 0) {
|
|
|
|
tabulate(out, scriptName + " Excluded", notPrinted.iterator(), 24, "class='r'");
|
|
|
|
out.println("<br>");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Set otherMarks = new TreeSet(uca);
|
|
|
|
UnicodeSet markSet = new UnicodeSet("[[:Me:][:Mn:]]");
|
|
|
|
it.reset(markSet);
|
|
|
|
while (it.next()) {
|
|
|
|
int cp = it.codepoint;
|
|
|
|
String source = UTF16.valueOf(cp);
|
|
|
|
if (totalMarks.contains(source)) continue; // skip all that we have already
|
|
|
|
otherMarks.add(source);
|
|
|
|
}
|
|
|
|
tabulate(out, "Marks that never combine", otherMarks.iterator(), 24, "class='b'");
|
|
|
|
|
|
|
|
out.println("</body></html>");
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
out.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void tabulate(PrintWriter out, String caption, Iterator it2, int limit, String classType) {
|
|
|
|
int count = 0;
|
|
|
|
out.println("<table border='1' cellspacing='0'><tr>");
|
|
|
|
if (caption != null && caption.length() != 0) {
|
|
|
|
out.println("<caption>" + caption + "</caption>");
|
|
|
|
}
|
|
|
|
while (it2.hasNext()) {
|
|
|
|
if (++count > limit) {
|
|
|
|
out.println("</tr><tr>");
|
|
|
|
count = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
out.println(showCell((String)it2.next(), classType));
|
|
|
|
}
|
|
|
|
out.println("</tr></table>");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String showCell(String comp, String classType) {
|
|
|
|
if (comp == null) {
|
|
|
|
return "<td "
|
|
|
|
+ classType + (classType.length() != 0 ? " " : "")
|
|
|
|
+ "> </td>";
|
|
|
|
}
|
|
|
|
return "<td "
|
|
|
|
+ classType + (classType.length() != 0 ? " " : "")
|
2004-02-07 01:01:17 +00:00
|
|
|
+ "title='" + Utility.hex(comp) + " " + Default.ucd().getName(comp) + "'>" + addCircle.transliterate(comp)
|
2002-09-25 06:40:14 +00:00
|
|
|
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
|
|
|
|
}
|
|
|
|
|
2002-10-05 01:28:58 +00:00
|
|
|
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2002-10-03 22:58:17 +00:00
|
|
|
public static void writeAllocation() throws IOException {
|
2002-10-05 01:28:58 +00:00
|
|
|
String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger
|
|
|
|
int[] starts = new int[names.length];
|
|
|
|
int[] ends = new int[names.length];
|
|
|
|
|
2004-02-12 08:23:19 +00:00
|
|
|
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
|
|
|
|
|
|
|
|
//UCD.BlockData blockData = new UCD.BlockData();
|
2002-10-05 01:28:58 +00:00
|
|
|
|
2002-10-03 22:58:17 +00:00
|
|
|
int counter = 0;
|
2004-02-12 08:23:19 +00:00
|
|
|
String currentName;
|
|
|
|
//int blockId = 0;
|
|
|
|
while (blockIterator.hasNext()) {
|
|
|
|
//while (Default.ucd().getBlockData(blockId++, blockData)) {
|
|
|
|
names[counter] = currentName = (String) blockIterator.next();
|
|
|
|
if (currentName.equals("No_Block")) continue;
|
|
|
|
UnicodeSet s = Default.ucd().getBlockSet(currentName, null);
|
|
|
|
if (s.getRangeCount() != 1) {
|
|
|
|
throw new IllegalArgumentException("Failure with block set: " + currentName);
|
|
|
|
}
|
|
|
|
starts[counter] = s.getRangeStart(0);
|
|
|
|
ends[counter] = s.getRangeEnd(0);
|
2002-10-05 01:28:58 +00:00
|
|
|
//System.out.println(names[counter] + ", " + values[counter]);
|
|
|
|
++counter;
|
|
|
|
|
|
|
|
// HACK
|
2004-02-12 08:23:19 +00:00
|
|
|
if (currentName.equals("Tags")) {
|
2002-10-05 01:28:58 +00:00
|
|
|
names[counter] = "<i>reserved default ignorable</i>";
|
|
|
|
starts[counter] = 0xE0080;
|
|
|
|
ends[counter] = 0xE0FFF;
|
|
|
|
++counter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Graphic
|
|
|
|
Format
|
|
|
|
Control
|
|
|
|
Private Use
|
|
|
|
Surrogate
|
|
|
|
Noncharacter
|
|
|
|
Reserved (default ignorable)
|
|
|
|
Reserved (other)
|
|
|
|
*/
|
|
|
|
|
2002-10-05 02:16:17 +00:00
|
|
|
PrintWriter out = Utility.openPrintWriter("allocation.html", Utility.UTF8_WINDOWS);
|
2002-10-03 22:58:17 +00:00
|
|
|
try {
|
|
|
|
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
out.println("<title>Unicode Allocation</title></head>");
|
|
|
|
out.println("<body bgcolor='#FFFFFF'><h1 align='center'><a href='#Notes'>Unicode Allocation</a></h1>");
|
|
|
|
|
|
|
|
|
2002-10-05 01:28:58 +00:00
|
|
|
for (int textOnly = 0; textOnly < 2; ++textOnly) {
|
|
|
|
out.println("<table border='1' cellspacing='0'>"); // width='100%'
|
|
|
|
if (textOnly == 0) {
|
2002-10-05 02:16:17 +00:00
|
|
|
out.println("<caption><b>Graphic Version</b></caption>");
|
2002-10-05 01:28:58 +00:00
|
|
|
out.println("<tr><th>Start</th><th align='left'>Block Name</th><th align='left'>Size</th></tr>");
|
|
|
|
} else {
|
2002-10-05 02:16:17 +00:00
|
|
|
out.println("<caption><b>Textual Version (decimal)</b></caption>");
|
2002-10-05 01:28:58 +00:00
|
|
|
out.println("<tr><th>Block Name</th><th>Start</th><th>Total</th><th>Assigned</th></tr>");
|
2002-10-03 22:58:17 +00:00
|
|
|
}
|
2002-10-05 01:28:58 +00:00
|
|
|
int lastEnd = -1;
|
|
|
|
for (int i = 0; i < counter; ++i) {
|
|
|
|
if (starts[i] != lastEnd + 1) {
|
|
|
|
drawAllocation(out, lastEnd + 1, "<i>reserved</i>", starts[i] - lastEnd + 1, 0, "#000000", "#000000", textOnly);
|
|
|
|
}
|
|
|
|
int total = ends[i] - starts[i] + 1;
|
|
|
|
int alloc = 0;
|
|
|
|
for (int j = starts[i]; j <= ends[i]; ++j) {
|
2004-02-07 01:01:17 +00:00
|
|
|
if (Default.ucd().isAllocated(j)) ++alloc;
|
2002-10-03 22:58:17 +00:00
|
|
|
}
|
2002-10-05 01:28:58 +00:00
|
|
|
//System.out.println(names[i] + "\t" + alloc + "\t" + total);
|
|
|
|
String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000"
|
|
|
|
: names[i].indexOf("Private") >= 0 ? "#0000FF"
|
|
|
|
: "#00FF00";
|
|
|
|
String colorReserved = names[i].indexOf("reserved default ignorable") >= 0 ? "#CCCCCC"
|
|
|
|
: "#000000";
|
|
|
|
drawAllocation(out, starts[i], names[i], total, alloc, color, colorReserved, textOnly);
|
|
|
|
lastEnd = ends[i];
|
2002-10-03 22:58:17 +00:00
|
|
|
}
|
2002-10-05 01:28:58 +00:00
|
|
|
out.println("</table><p> </p>");
|
2002-10-03 22:58:17 +00:00
|
|
|
}
|
2002-10-05 01:28:58 +00:00
|
|
|
out.println("<h2>Key</h2><p><a name='Notes'></a>This chart lists all the Unicode blocks and their starting code points. "
|
|
|
|
+ "The area of each bar is proportional to the total number of code points in each block. "
|
|
|
|
+ "The colors have the following significance:<br>"
|
|
|
|
+ "<table border='1' cellspacing='0' cellpadding='4'>"
|
|
|
|
+ "<tr><td>Green</td><td>Graphic, Control, Format, Noncharacter* code points</td></tr>"
|
|
|
|
+ "<tr><td>Red</td><td>Surrogate code points</td></tr>"
|
|
|
|
+ "<tr><td>Blue</td><td>Private Use code points</td></tr>"
|
|
|
|
+ "<tr><td>Gray</td><td>Reserved (default ignorable) code points</td></tr>"
|
|
|
|
+ "<tr><td>Black</td><td>Reserved (other) code points</td></tr>"
|
|
|
|
+ "</table><br>"
|
|
|
|
+ "* Control, Format, and Noncharacter are not distinguished from Graphic characters by color, since they are mixed into other blocks. "
|
2002-10-03 22:58:17 +00:00
|
|
|
+ "Tooltips on the bars show the total number of code points and the number assigned. "
|
|
|
|
+ "(Remember that assigned <i>code points</i> are not necessarily assigned <i>characters</i>.)"
|
|
|
|
+ "</p>");
|
|
|
|
out.println("</body></html>");
|
|
|
|
} finally {
|
|
|
|
out.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static double longestBar = 1000;
|
|
|
|
static int longestBlock = 722402;
|
|
|
|
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
|
|
|
|
static {nf.setMaximumFractionDigits(0);}
|
|
|
|
|
2002-10-05 01:28:58 +00:00
|
|
|
static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc, String color, String colorReserved, int textOnly) {
|
|
|
|
if (textOnly == 0) {
|
|
|
|
int unalloc = total - alloc;
|
|
|
|
|
|
|
|
double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
|
|
|
|
double allocWidth = alloc * totalWidth / total;
|
|
|
|
double unallocWidth = totalWidth - allocWidth;
|
|
|
|
|
|
|
|
out.println("<tr><td align='right'><code>" + Utility.hex(start)
|
|
|
|
+ "</code></td><td>" + title
|
|
|
|
+ "</td><td title='total: " + nf.format(total) + ", assigned: " + nf.format(alloc)
|
|
|
|
+ "'><table border='0' cellspacing='0' cellpadding='0'><tr>");
|
|
|
|
|
|
|
|
if (alloc != 0) out.println("<td style='font-size:1;width:" + allocWidth + ";height:" + totalWidth
|
|
|
|
+ "' bgcolor='" + color + "'> </td>");
|
|
|
|
if (unalloc != 0) out.println("<td style='font-size:1;width:" + unallocWidth + ";height:" + totalWidth
|
|
|
|
+ "' bgcolor='" + colorReserved + "'> </td>");
|
|
|
|
out.println("</tr></table></td></tr>");
|
|
|
|
} else {
|
|
|
|
out.println("<tr><td>" + title + "</td><td align='right'>" + start + "</td><td align='right'>" + total + "</td><td align='right'>" + alloc + "</td></tr>");
|
|
|
|
}
|
2002-10-03 22:58:17 +00:00
|
|
|
}
|
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
static final IntStack p1 = new IntStack(30);
|
|
|
|
static final IntStack s1 = new IntStack(30);
|
|
|
|
static final IntStack t1 = new IntStack(30);
|
|
|
|
static final IntStack p2 = new IntStack(30);
|
|
|
|
static final IntStack s2 = new IntStack(30);
|
|
|
|
static final IntStack t2 = new IntStack(30);
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
static int getStrengthDifference(CEList ceList, CEList lastCEList) {
|
|
|
|
extractNonzeros(ceList, p1, s1, t1);
|
|
|
|
extractNonzeros(lastCEList, p2, s2, t2);
|
|
|
|
int temp = p1.compareTo(p2);
|
|
|
|
if (temp != 0) return 3;
|
|
|
|
temp = s1.compareTo(s2);
|
|
|
|
if (temp != 0) return 2;
|
|
|
|
temp = t1.compareTo(t2);
|
|
|
|
if (temp != 0) return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
static void extractNonzeros(CEList ceList, IntStack primaries, IntStack secondaries, IntStack tertiaries) {
|
|
|
|
primaries.clear();
|
|
|
|
secondaries.clear();
|
|
|
|
tertiaries.clear();
|
2002-09-25 06:40:14 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
for (int i = 0; i < ceList.length(); ++i) {
|
|
|
|
int ce = ceList.at(i);
|
|
|
|
int temp = UCA.getPrimary(ce);
|
|
|
|
if (temp != 0) primaries.push(temp);
|
|
|
|
temp = UCA.getSecondary(ce);
|
|
|
|
if (temp != 0) secondaries.push(temp);
|
|
|
|
temp = UCA.getTertiary(ce);
|
|
|
|
if (temp != 0) tertiaries.push(temp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|