2001-08-31 00:30:17 +00:00
|
|
|
/**
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
2005-05-02 15:39:54 +00:00
|
|
|
* $Date: 2005/05/02 15:39:53 $
|
|
|
|
* $Revision: 1.22 $
|
2001-08-31 00:30:17 +00:00
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*/
|
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
package com.ibm.text.UCD;
|
|
|
|
|
|
|
|
import java.util.*;
|
|
|
|
import java.io.*;
|
|
|
|
import java.text.DateFormat;
|
|
|
|
import java.text.SimpleDateFormat;
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
import com.ibm.icu.dev.test.util.BagFormatter;
|
2005-05-02 15:39:54 +00:00
|
|
|
import com.ibm.icu.dev.test.util.CollectionUtilities;
|
2004-02-06 18:32:05 +00:00
|
|
|
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
|
2005-02-24 02:59:34 +00:00
|
|
|
import com.ibm.icu.dev.test.util.UnicodeLabel;
|
2005-04-06 08:48:17 +00:00
|
|
|
import com.ibm.icu.dev.test.util.UnicodeMap;
|
2004-02-06 18:32:05 +00:00
|
|
|
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
2005-02-24 02:59:34 +00:00
|
|
|
import com.ibm.icu.impl.ICUData;
|
|
|
|
import com.ibm.icu.impl.ICUResourceBundle;
|
|
|
|
import com.ibm.icu.impl.UCharArrayIterator;
|
2003-07-07 15:58:57 +00:00
|
|
|
import com.ibm.icu.text.NumberFormat;
|
2005-02-24 02:59:34 +00:00
|
|
|
import com.ibm.icu.text.StringPrep;
|
|
|
|
import com.ibm.icu.text.StringPrepParseException;
|
2003-07-07 15:58:57 +00:00
|
|
|
import com.ibm.icu.util.Currency;
|
2005-02-24 02:59:34 +00:00
|
|
|
import com.ibm.icu.util.ULocale;
|
|
|
|
|
2003-07-07 15:58:57 +00:00
|
|
|
import java.math.BigDecimal;
|
|
|
|
|
|
|
|
import java.util.regex.*;
|
2001-08-30 20:50:18 +00:00
|
|
|
|
2003-05-02 21:46:33 +00:00
|
|
|
import com.ibm.icu.text.*;
|
2001-08-30 20:50:18 +00:00
|
|
|
import com.ibm.text.utility.*;
|
|
|
|
|
|
|
|
public class TestData implements UCD_Types {
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
static UnicodeProperty.Factory upf;
|
|
|
|
|
2003-05-02 21:46:33 +00:00
|
|
|
public static void main (String[] args) throws IOException {
|
2005-02-24 02:59:34 +00:00
|
|
|
//checkChars(false);
|
|
|
|
new GenStringPrep().genStringPrep();
|
|
|
|
if (true) return;
|
2004-02-07 01:01:17 +00:00
|
|
|
|
|
|
|
System.out.println("main: " + Default.getDate());
|
2004-02-06 18:32:05 +00:00
|
|
|
upf = ICUPropertyFactory.make();
|
2004-02-07 01:01:17 +00:00
|
|
|
System.out.println("after factory: " + Default.getDate());
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
showPropDiff(
|
|
|
|
"gc=mn", null,
|
|
|
|
"script=inherited", null);
|
|
|
|
|
|
|
|
// upf.getProperty("gc")
|
|
|
|
//.getPropertySet(new ICUPropertyFactory.RegexMatcher("mn|me"),null)
|
2003-07-07 15:58:57 +00:00
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
showPropDiff(
|
|
|
|
"gc=mn|me", null,
|
|
|
|
"script=inherited", null);
|
|
|
|
|
2003-08-20 03:48:47 +00:00
|
|
|
if (true) return;
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
showPropDiff(
|
|
|
|
"General_Category=L", null,
|
|
|
|
"Script!=Inherited|Common",
|
2004-02-07 01:01:17 +00:00
|
|
|
upf.getSet("script=inherited")
|
|
|
|
.addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd()))
|
2004-02-06 18:32:05 +00:00
|
|
|
.complement()
|
|
|
|
);
|
|
|
|
|
2003-08-20 03:48:47 +00:00
|
|
|
|
2004-02-07 01:01:17 +00:00
|
|
|
UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd());
|
|
|
|
UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd());
|
2003-08-20 03:48:47 +00:00
|
|
|
UnicodeSet po = new UnicodeSet("[:po:]");
|
|
|
|
UnicodeSet empty = new UnicodeSet();
|
|
|
|
|
|
|
|
Utility.showSetDifferences(
|
|
|
|
"Sentence_Terminal", sterm,
|
|
|
|
"Empty", empty,
|
2004-02-07 01:01:17 +00:00
|
|
|
true, Default.ucd());
|
2003-08-20 03:48:47 +00:00
|
|
|
|
|
|
|
Utility.showSetDifferences(
|
|
|
|
"Sentence_Terminal", sterm,
|
|
|
|
"Terminal_Punctuation", term,
|
2004-02-07 01:01:17 +00:00
|
|
|
true, Default.ucd());
|
2003-08-20 03:48:47 +00:00
|
|
|
|
|
|
|
Utility.showSetDifferences(
|
|
|
|
"Terminal_Punctuation", term,
|
|
|
|
"Punctuation_Other", po,
|
2004-02-07 01:01:17 +00:00
|
|
|
true, Default.ucd());
|
2003-08-20 03:48:47 +00:00
|
|
|
|
|
|
|
if (true) return;
|
|
|
|
|
2003-07-07 15:58:57 +00:00
|
|
|
UnicodeSet us = getSetForName("LATIN LETTER.*P");
|
2004-02-07 01:01:17 +00:00
|
|
|
Utility.showSetNames("",us,false,Default.ucd());
|
2003-07-07 15:58:57 +00:00
|
|
|
|
|
|
|
us = getSetForName(".*VARIA(TION|NT).*");
|
2004-02-07 01:01:17 +00:00
|
|
|
Utility.showSetNames("",us,false,Default.ucd());
|
2003-07-07 15:58:57 +00:00
|
|
|
|
|
|
|
if (true) return;
|
|
|
|
|
|
|
|
/*showSet();
|
|
|
|
*/
|
|
|
|
String x = "[[[:s:][:p:]&[:ascii:]] | [\\u2190-\\u2BFF] | "
|
|
|
|
+ "[[:s:][:p:]"
|
|
|
|
// + "&[:decompositiontype=none:]"
|
|
|
|
// + "- [:id_continue:]"
|
|
|
|
+ "-[:sk:]"
|
|
|
|
+ "]]";
|
|
|
|
PrintWriter pw = Utility.openPrintWriter("Syntax.txt", Utility.UTF8_WINDOWS);
|
|
|
|
showSet(pw, x, false);
|
|
|
|
showSet(pw, "[[\\u2000-\\u205F]-" + x + "]", true);
|
|
|
|
showSet(pw, "[[:whitespace:]&[:decompositiontype=none:]]", false);
|
|
|
|
pw.close();
|
|
|
|
|
|
|
|
if (true) return;
|
|
|
|
|
|
|
|
testFormatHack();
|
|
|
|
if (true) return;
|
|
|
|
testConvertToBDD();
|
|
|
|
if (true) return;
|
|
|
|
|
|
|
|
System.out.println("Shift: " + SHIFT + ", Mask: " + Long.toHexString(MASK));
|
|
|
|
showNumber(-5);
|
|
|
|
showNumber(0);
|
|
|
|
showNumber(5);
|
|
|
|
showNumber(500);
|
|
|
|
showNumber(5000000);
|
|
|
|
if (true) return;
|
|
|
|
|
2003-05-02 21:46:33 +00:00
|
|
|
String script = args[0];
|
|
|
|
PrintWriter log = Utility.openPrintWriter("TranslitSkeleton_" + script + ".txt", Utility.UTF8_WINDOWS);
|
|
|
|
try {
|
|
|
|
UnicodeSet base = new UnicodeSet("[:" + script + ":]");
|
|
|
|
UnicodeSetIterator it = new UnicodeSetIterator(base);
|
|
|
|
while (it.next()) {
|
|
|
|
String s = UTF16.valueOf(it.codepoint);
|
2004-02-07 01:01:17 +00:00
|
|
|
String norm = Default.nfd().normalize(s);
|
|
|
|
if (s.equals(norm) && Default.nfkd().isNormalized(norm)) {
|
|
|
|
log.println("# " + s + " <> XXX # " + Default.ucd().getName(it.codepoint));
|
2003-05-02 21:46:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
log.close();
|
|
|
|
}
|
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
Matcher m;
|
2004-12-11 06:03:10 +00:00
|
|
|
|
2005-02-24 02:59:34 +00:00
|
|
|
static class GenStringPrep {
|
2005-05-02 15:39:54 +00:00
|
|
|
|
2005-02-24 02:59:34 +00:00
|
|
|
UnicodeSet[] coreChars = new UnicodeSet[100];
|
2005-03-26 05:40:05 +00:00
|
|
|
UnicodeSet decomposable = new UnicodeSet();
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeMap suspect = new UnicodeMap();
|
2005-03-30 17:19:32 +00:00
|
|
|
|
2005-03-26 05:40:05 +00:00
|
|
|
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
|
|
|
|
//UnicodeSet id_continue = ups.getSet("ID_Continue=true");
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet xid_continue = ups.getSet("XID_Continue=true");
|
2005-05-02 15:39:54 +00:00
|
|
|
UnicodeSet wordChars = new UnicodeSet();
|
2005-04-06 08:48:17 +00:00
|
|
|
{
|
2005-05-02 15:39:54 +00:00
|
|
|
if (false) {
|
|
|
|
wordChars.addAll(ups.getSet("name=.*MODIFIER LETTER.*", new RegexMatcher()));
|
|
|
|
wordChars.retainAll(ups.getSet("gc=Sk"));
|
|
|
|
}
|
2005-04-06 08:48:17 +00:00
|
|
|
wordChars.addAll(new UnicodeSet("[\\u0027 \\u002D \\u002E \\u003A \\u00B7 \\u058A \\u05F3" +
|
2005-05-02 15:39:54 +00:00
|
|
|
" \\u05F4 \\u200C \\u200D \\u2010 \\u2019 \\u2027 \\u30A0 \\u04C0" +
|
|
|
|
" \\u055A \\u02B9 \\u02BA]"));
|
2005-04-06 08:48:17 +00:00
|
|
|
//wordChars.removeAll(xid_continue);
|
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
|
|
|
|
UnicodeSet patternProp = ups.getSet("Pattern_Syntax=true").removeAll(wordChars);
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet isNFKC = ups.getSet("NFKC_Quickcheck=NO").complement();
|
2005-03-30 17:19:32 +00:00
|
|
|
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet not_xid_continue = new UnicodeSet(xid_continue).complement().removeAll(wordChars);
|
2005-03-30 17:19:32 +00:00
|
|
|
|
2005-03-26 05:40:05 +00:00
|
|
|
//UnicodeSet[] decompChars = new UnicodeSet[100];
|
2005-02-24 02:59:34 +00:00
|
|
|
UCD ucd = Default.ucd();
|
|
|
|
|
2005-03-30 17:19:32 +00:00
|
|
|
Collator uca0 = Collator.getInstance(ULocale.ENGLISH);
|
2005-02-24 02:59:34 +00:00
|
|
|
{
|
2005-03-30 17:19:32 +00:00
|
|
|
uca0.setStrength(Collator.IDENTICAL);
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
GenerateHanTransliterator.MultiComparator uca
|
|
|
|
= new GenerateHanTransliterator.MultiComparator(new Comparator[] {
|
|
|
|
uca0, new UTF16.StringComparator()});
|
2005-02-24 02:59:34 +00:00
|
|
|
|
|
|
|
UnicodeSet bidiR = new UnicodeSet(
|
|
|
|
"[[:Bidi_Class=AL:][:Bidi_Class=R:]]");
|
|
|
|
|
|
|
|
UnicodeSet bidiL = new UnicodeSet("[:Bidi_Class=l:]");
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet hasNoUpper = new UnicodeSet();
|
|
|
|
UnicodeSet hasNoUpperMinus = new UnicodeSet();
|
2005-03-30 17:19:32 +00:00
|
|
|
BagFormatter bf = new BagFormatter();
|
|
|
|
UnicodeSet inIDN = new UnicodeSet();
|
2005-02-24 02:59:34 +00:00
|
|
|
|
|
|
|
void genStringPrep() throws IOException {
|
2005-05-02 15:39:54 +00:00
|
|
|
//showScriptToBlock();
|
2005-03-30 17:19:32 +00:00
|
|
|
bf.setShowLiteral(BagFormatter.toHTMLControl);
|
|
|
|
//bf.setValueSource(UnicodeLabel.NULL);
|
|
|
|
if (false) {
|
|
|
|
|
|
|
|
System.out.println("word chars: " + bf.showSetNames(wordChars));
|
|
|
|
System.out.println("pat: " + bf.showSetNames(patternProp));
|
|
|
|
System.out.println("xid: " + bf.showSetNames(not_xid_continue));
|
|
|
|
}
|
2005-02-24 02:59:34 +00:00
|
|
|
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
|
|
|
Utility.dot(cp);
|
2005-03-30 17:19:32 +00:00
|
|
|
int cat = Default.ucd().getCategory(cp);
|
|
|
|
if (cat == UCD.Cn || cat == UCD.Co || cat == UCD.Cs) continue;
|
2005-03-26 05:40:05 +00:00
|
|
|
if (!Default.nfd().isNormalized(cp)) decomposable.add(cp);
|
2005-03-30 17:19:32 +00:00
|
|
|
int idnaType = getIDNAType(cp);
|
|
|
|
idnaTypeSet[idnaType].add(cp);
|
2005-04-06 08:48:17 +00:00
|
|
|
String str = UTF16.valueOf(cp);
|
|
|
|
if (str.equals(ucd.getCase(str, FULL, UPPER))) hasNoUpper.add(cp);
|
2005-02-24 02:59:34 +00:00
|
|
|
int script = ucd.getScript(cp);
|
2005-03-26 05:40:05 +00:00
|
|
|
if (coreChars[script] == null)
|
|
|
|
coreChars[script] = new UnicodeSet();
|
|
|
|
coreChars[script].add(cp);
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-04-06 08:48:17 +00:00
|
|
|
// fix characters with no uppercase
|
|
|
|
hasNoUpperMinus = new UnicodeSet(hasNoUpper).removeAll(wordChars);
|
|
|
|
System.out.println(bf.showSetNames(hasNoUpper));
|
2005-02-24 02:59:34 +00:00
|
|
|
|
|
|
|
Utility.fixDot();
|
2005-03-30 17:19:32 +00:00
|
|
|
PrintWriter htmlOut = BagFormatter.openUTF8Writer(GEN_DIR, "idn-chars.html");
|
2005-05-02 15:39:54 +00:00
|
|
|
PrintWriter htmlOut2 = BagFormatter.openUTF8Writer(GEN_DIR, "script-chars.html");
|
2005-03-30 17:19:32 +00:00
|
|
|
PrintWriter textOut = BagFormatter.openUTF8Writer(GEN_DIR, "idn-chars.txt");
|
|
|
|
textOut.println('\uFEFF');
|
|
|
|
textOut.println("For documentation, see idn-chars.html");
|
2005-05-02 15:39:54 +00:00
|
|
|
|
|
|
|
Utility.appendFile("./com/ibm/text/UCD/idn-charsHeader.html", Utility.UTF8_WINDOWS, htmlOut,
|
|
|
|
new String[] {"%date%", Default.getDate()});
|
2005-03-30 17:19:32 +00:00
|
|
|
/*
|
2005-02-24 02:59:34 +00:00
|
|
|
out
|
|
|
|
.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
|
|
out.println("<title>IDN Characters</title><style>");
|
|
|
|
out.println("<!--");
|
2005-03-26 05:40:05 +00:00
|
|
|
out.println(".script { font-size: 150%; background-color: #CCCCCC }");
|
|
|
|
out.println(".Atomic { background-color: #CCCCFF }");
|
|
|
|
out.println(".Atomic-no-uppercase { background-color: #CCFFCC }");
|
2005-03-30 17:19:32 +00:00
|
|
|
out.println(".Non-XID { background-color: #FFCCCC }");
|
2005-03-26 05:40:05 +00:00
|
|
|
out.println(".Decomposable { background-color: #FFFFCC }");
|
2005-03-30 17:19:32 +00:00
|
|
|
out.println(".Pattern_Syntax { background-color: #FFCCFF }");
|
|
|
|
|
2005-02-24 02:59:34 +00:00
|
|
|
out.println("th { text-align: left }");
|
|
|
|
out.println("-->");
|
|
|
|
out.println("</style></head><body><table>");
|
2005-03-30 17:19:32 +00:00
|
|
|
*/
|
2005-05-02 15:39:54 +00:00
|
|
|
htmlOut.println("<table border='1' cellpadding='2' cellspacing='0'>");
|
|
|
|
htmlOut2.println("<html><body><table border='1' cellpadding='2' cellspacing='0'>");
|
2005-02-24 02:59:34 +00:00
|
|
|
|
|
|
|
for (int scriptCode = 0; scriptCode < coreChars.length; ++scriptCode) {
|
|
|
|
if (scriptCode == COMMON_SCRIPT
|
|
|
|
|| scriptCode == INHERITED_SCRIPT)
|
|
|
|
continue;
|
2005-05-02 15:39:54 +00:00
|
|
|
showCodes(htmlOut, textOut, scriptCode, htmlOut2);
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-05-02 15:39:54 +00:00
|
|
|
showCodes(htmlOut, textOut, COMMON_SCRIPT, htmlOut2);
|
|
|
|
showCodes(htmlOut, textOut, INHERITED_SCRIPT, htmlOut2);
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.println("</table></body></html>");
|
|
|
|
htmlOut.close();
|
2005-05-02 15:39:54 +00:00
|
|
|
htmlOut2.println("</table></body></html>");
|
|
|
|
htmlOut2.close();
|
2005-04-06 08:48:17 +00:00
|
|
|
bf.setMergeRanges(false);
|
|
|
|
|
|
|
|
textOut.println();
|
2005-05-02 15:39:54 +00:00
|
|
|
textOut.println("# *** ADDITIONAL WORD CHARACTERS ***");
|
|
|
|
textOut.println();
|
2005-04-06 08:48:17 +00:00
|
|
|
bf.setValueSource("word-chars");
|
|
|
|
bf.showSetNames(textOut, wordChars);
|
|
|
|
|
|
|
|
textOut.println();
|
2005-05-02 15:39:54 +00:00
|
|
|
textOut.println("# *** FOR REVIEW ***");
|
2005-04-06 08:48:17 +00:00
|
|
|
bf.setLabelSource(UnicodeLabel.NULL);
|
|
|
|
for (Iterator it = new TreeSet(suspect.getAvailableValues()).iterator(); it.hasNext();) {
|
|
|
|
textOut.println();
|
|
|
|
String value = (String)it.next();
|
|
|
|
bf.setValueSource(value);
|
|
|
|
bf.showSetNames(textOut, suspect.getSet(value));
|
|
|
|
}
|
|
|
|
textOut.close();
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
|
|
|
|
2005-05-02 15:39:54 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
private void showScriptToBlock() {
|
|
|
|
UnicodeMap scripts = ToolUnicodePropertySource.make("").getProperty("script").getUnicodeMap();
|
|
|
|
UnicodeMap blocks = ToolUnicodePropertySource.make("").getProperty("block").getUnicodeMap();
|
|
|
|
UnicodeMap.Composer myCompose = new UnicodeMap.Composer() {
|
|
|
|
public Object compose(Object a, Object b) {
|
|
|
|
return a + "\t" + b;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
UnicodeMap sb = ((UnicodeMap)scripts.clone()).composeWith(blocks, myCompose);
|
|
|
|
for (Iterator it = sb.getAvailableValues(new TreeSet()).iterator(); it.hasNext();) {
|
|
|
|
System.out.println(it.next());
|
|
|
|
}
|
|
|
|
throw new IllegalArgumentException();
|
|
|
|
}
|
|
|
|
|
|
|
|
Map scriptToGif = CollectionUtilities.asMap(script_to_gif);
|
|
|
|
|
|
|
|
static String[][] script_to_gif = {
|
|
|
|
|
|
|
|
{"Common","common.gif"}, //Miscellaneous_Symbols
|
|
|
|
{"Inherited","combiningdiacritics.gif"}, //Combining_Diacritical_Marks
|
|
|
|
{"Arabic","arabic.gif"}, //Arabic
|
|
|
|
{"Armenian","armenian.gif"}, //Armenian
|
|
|
|
{"Bengali","bengali.gif"}, //Bengali
|
|
|
|
{"Bopomofo","bopomofo.gif"}, //Bopomofo
|
|
|
|
{"Braille","braillesymbols.gif"}, //Braille_Patterns
|
|
|
|
{"Buginese","buginese.gif"}, //Buginese
|
|
|
|
{"Buhid","buhid.gif"}, //Buhid
|
|
|
|
{"Canadian_Aboriginal","canadiansyllabics.gif"}, //Unified_Canadian_Aboriginal_Syllabics
|
|
|
|
{"Cherokee","cherokee.gif"}, //Cherokee
|
|
|
|
{"Coptic","coptic.gif"}, //Coptic
|
|
|
|
{"Cypriot","cypriot.gif"}, //Cypriot_Syllabary
|
|
|
|
{"Cyrillic","cyrillic.gif"}, //Cyrillic
|
|
|
|
{"Deseret","deseret.gif"}, //Deseret
|
|
|
|
{"Devanagari","devanagari.gif"}, //Devanagari
|
|
|
|
{"Ethiopic","ethiopic.gif"}, //Ethiopic
|
|
|
|
{"Georgian","georgian.gif"}, //Georgian
|
|
|
|
{"Glagolitic","glagolitic.gif"}, //Glagolitic
|
|
|
|
{"Gothic","gothic.gif"}, //Gothic
|
|
|
|
{"Greek","greek.gif"}, //Greek_and_Coptic
|
|
|
|
{"Gujarati","gujarati.gif"}, //Gujarati
|
|
|
|
{"Gurmukhi","gurmukhi.gif"}, //Gurmukhi
|
|
|
|
{"Han","cjkideographcompat.gif"}, //CJK_Compatibility_Ideographs
|
|
|
|
{"Han","kangxiradicals.gif"}, //Kangxi_Radicals
|
|
|
|
{"Hangul","hangulsyllables.gif"}, //Hangul_Syllables
|
|
|
|
{"Hanunoo","hanunoo.gif"}, //Hanunoo
|
|
|
|
{"Hebrew","hebrew.gif"}, //Hebrew
|
|
|
|
{"Hiragana","hiragana.gif"}, //Hiragana
|
|
|
|
{"Kannada","kannada.gif"}, //Kannada
|
|
|
|
{"Katakana","katakana.gif"}, //Katakana
|
|
|
|
{"Kharoshthi","kharoshthi.gif"}, //Kharoshthi
|
|
|
|
{"Khmer","khmer.gif"}, //Khmer
|
|
|
|
{"Lao","lao.gif"}, //Lao
|
|
|
|
{"Latin","latin.gif"}, //Basic_Latin
|
|
|
|
{"Limbu","limbu.gif"}, //Limbu
|
|
|
|
{"Linear_B","linearbsyllabary.gif"}, //Linear_B_Syllabary
|
|
|
|
{"Malayalam","malayalam.gif"}, //Malayalam
|
|
|
|
{"Mongolian","mongolian.gif"}, //Mongolian
|
|
|
|
{"Myanmar","myanmar.gif"}, //Myanmar
|
|
|
|
{"New_Tai_Lue","newtailu.gif"}, //New_Tai_Lue
|
|
|
|
{"Ogham","ogham.gif"}, //Ogham
|
|
|
|
{"Old_Italic","olditalic.gif"}, //Old_Italic
|
|
|
|
{"Old_Persian","oldpersiancuneiform.gif"}, //Old_Persian
|
|
|
|
{"Oriya","oriya.gif"}, //Oriya
|
|
|
|
{"Osmanya","osmanya.gif"}, //Osmanya
|
|
|
|
{"Runic","runic.gif"}, //Runic
|
|
|
|
{"Shavian","shavian.gif"}, //Shavian
|
|
|
|
{"Sinhala","sinhala.gif"}, //Sinhala
|
|
|
|
{"Syloti_Nagri","silotinagri.gif"}, //Syloti_Nagri
|
|
|
|
{"Syriac","syriac.gif"}, //Syriac
|
|
|
|
{"Tagalog","tagalog.gif"}, //Tagalog
|
|
|
|
{"Tagbanwa","tagbanwa.gif"}, //Tagbanwa
|
|
|
|
{"Tai_Le","taile.gif"}, //Tai_Le
|
|
|
|
{"Tamil","tamil.gif"}, //Tamil
|
|
|
|
{"Telugu","telugu.gif"}, //Telugu
|
|
|
|
{"Thaana","thaana.gif"}, //Thaana
|
|
|
|
{"Thai","thai.gif"}, //Thai
|
|
|
|
{"Tibetan","tibetan.gif"}, //Tibetan
|
|
|
|
{"Tifinagh","tifinagh.gif"}, //Tifinagh
|
|
|
|
{"Ugaritic","ugaritic.gif"}, //Ugaritic
|
|
|
|
{"Yi","yi.gif"}, //Yi_Syllables
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2005-03-30 17:19:32 +00:00
|
|
|
UnicodeSet idnaTypeSet[] = new UnicodeSet[IDNA_TYPE_LIMIT];
|
|
|
|
{
|
|
|
|
for (int i = 0; i < idnaTypeSet.length; ++i) idnaTypeSet[i] = new UnicodeSet();
|
|
|
|
}
|
|
|
|
static final int OK = 0, DELETED = 1, ILLEGAL = 2, REMAPPED = 3, IDNA_TYPE_LIMIT = 4;
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
private int getIDNAType(int cp) {
|
|
|
|
inbuffer.setLength(0);
|
|
|
|
UTF16.append(inbuffer, cp);
|
|
|
|
try {
|
|
|
|
intermediate = IDNA.convertToASCII(inbuffer,
|
|
|
|
IDNA.DEFAULT); // USE_STD3_RULES
|
|
|
|
if (intermediate.length() == 0)
|
|
|
|
return DELETED;
|
|
|
|
outbuffer = IDNA.convertToUnicode(intermediate,
|
|
|
|
IDNA.USE_STD3_RULES);
|
|
|
|
} catch (StringPrepParseException e) {
|
|
|
|
return ILLEGAL;
|
|
|
|
} catch (Exception e) {
|
|
|
|
System.out.println("Failure at: " + Utility.hex(cp));
|
|
|
|
return ILLEGAL;
|
|
|
|
}
|
|
|
|
if (!TestData.equals(inbuffer, outbuffer))
|
|
|
|
return REMAPPED;
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
StringBuffer inbuffer = new StringBuffer();
|
|
|
|
StringBuffer intermediate, outbuffer;
|
|
|
|
|
2005-02-24 02:59:34 +00:00
|
|
|
UnicodeSet lowercase = new UnicodeSet("[:Lowercase:]");
|
|
|
|
|
|
|
|
/**
|
2005-03-30 17:19:32 +00:00
|
|
|
* @param htmlOut
|
|
|
|
* @param textOut TODO
|
|
|
|
* @param scriptCode
|
2005-05-02 15:39:54 +00:00
|
|
|
* @param htmlOut2 TODO
|
2005-02-24 02:59:34 +00:00
|
|
|
* @param ucd
|
|
|
|
* @param coreChars
|
|
|
|
* @param decompChars
|
|
|
|
*/
|
2005-05-02 15:39:54 +00:00
|
|
|
private void showCodes(PrintWriter htmlOut, PrintWriter textOut, int scriptCode, PrintWriter htmlOut2) {
|
2005-03-26 05:40:05 +00:00
|
|
|
if (coreChars[scriptCode] == null) return;
|
|
|
|
String script = Default.ucd().getScriptID_fromIndex((byte) scriptCode);
|
2005-05-02 15:39:54 +00:00
|
|
|
script = Utility.getUnskeleton(script.toLowerCase(),true);
|
|
|
|
System.out.println(script);
|
|
|
|
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.println();
|
2005-05-02 15:39:54 +00:00
|
|
|
String scriptLine = "<tr><th class='script'><img src='images/" + ((String)scriptToGif.get(script)).toLowerCase()
|
|
|
|
+ "'> Script: " + script + "</th></tr>";
|
|
|
|
htmlOut.println(scriptLine);
|
|
|
|
htmlOut2.println(scriptLine);
|
2005-03-30 17:19:32 +00:00
|
|
|
textOut.println();
|
|
|
|
textOut.println("#*** Script: " + script + " ***");
|
2005-02-24 02:59:34 +00:00
|
|
|
UnicodeSet core = new UnicodeSet(coreChars[scriptCode]);
|
2005-03-30 17:19:32 +00:00
|
|
|
|
|
|
|
UnicodeSet deleted = extract(idnaTypeSet[DELETED], core);
|
|
|
|
UnicodeSet illegal = extract(idnaTypeSet[ILLEGAL], core);
|
|
|
|
UnicodeSet remapped = extract(idnaTypeSet[REMAPPED], core);
|
|
|
|
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet remappedIsNFKC = extract(isNFKC, remapped);
|
|
|
|
UnicodeSet remappedIsNFKCDecomp = extract(decomposable, remappedIsNFKC);
|
|
|
|
|
2005-03-30 17:19:32 +00:00
|
|
|
UnicodeSet decomp = extract(decomposable, core);
|
|
|
|
UnicodeSet pattern = extract(patternProp, core);
|
|
|
|
UnicodeSet non_id = extract(not_xid_continue, core);
|
|
|
|
|
2005-04-06 08:48:17 +00:00
|
|
|
UnicodeSet bicameralNoupper = new UnicodeSet();
|
|
|
|
if (!hasNoUpper.containsAll(core)) {
|
|
|
|
bicameralNoupper = extract(hasNoUpperMinus, core);
|
|
|
|
}
|
|
|
|
|
|
|
|
UnicodeSet foo = new UnicodeSet(bicameralNoupper).addAll(non_id);
|
|
|
|
for (UnicodeSetIterator it = new UnicodeSetIterator(foo); it.next(); ) {
|
|
|
|
String cat = Default.ucd().getCategoryID(it.codepoint);
|
|
|
|
String name = Default.ucd().getName(it.codepoint);
|
|
|
|
if (name.indexOf("MUSICAL SYMBOL") >= 0
|
|
|
|
|| name.indexOf("DINGBA") >= 0
|
|
|
|
|| name.indexOf("RADICAL ") >= 0
|
|
|
|
) cat = "XX";
|
|
|
|
suspect.put(it.codepoint, cat);
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
|
|
|
|
if (core.size() != 0) printlnSet(htmlOut, textOut, script, "Atomic", core, scriptCode);
|
2005-04-06 08:48:17 +00:00
|
|
|
if (bicameralNoupper.size() != 0) printlnSet(htmlOut, textOut, script, "Atomic-no-uppercase", bicameralNoupper, scriptCode);
|
2005-03-30 17:19:32 +00:00
|
|
|
if (pattern.size() != 0) printlnSet(htmlOut, textOut, script, "Pattern_Syntax", pattern, scriptCode);
|
|
|
|
if (non_id.size() != 0) printlnSet(htmlOut, textOut, script, "Non-XID", non_id, scriptCode);
|
2005-05-02 15:39:54 +00:00
|
|
|
if (decomp.size() != 0) printlnSet(htmlOut, textOut, script, "NFD-Decomposable", decomp, scriptCode);
|
2005-03-30 17:19:32 +00:00
|
|
|
|
2005-04-06 08:48:17 +00:00
|
|
|
if (remappedIsNFKC.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Case-Atomic", remappedIsNFKC, scriptCode);
|
2005-05-02 15:39:54 +00:00
|
|
|
if (remappedIsNFKCDecomp.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Case-NFD-Decomposable", remappedIsNFKCDecomp, scriptCode);
|
2005-04-06 08:48:17 +00:00
|
|
|
if (remapped.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Compat", remapped, scriptCode);
|
2005-03-30 17:19:32 +00:00
|
|
|
if (deleted.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Deleted", deleted, scriptCode);
|
2005-05-02 15:39:54 +00:00
|
|
|
if (illegal.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Prohibited", illegal, scriptCode);
|
2005-03-30 17:19:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
private UnicodeSet extract(UnicodeSet other, UnicodeSet core) {
|
|
|
|
UnicodeSet decomp = new UnicodeSet(core).retainAll(other);
|
|
|
|
core.removeAll(decomp);
|
|
|
|
return decomp;
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2005-03-30 17:19:32 +00:00
|
|
|
* @param htmlOut
|
|
|
|
* @param textOut TODO
|
|
|
|
* @param script TODO
|
2005-02-24 02:59:34 +00:00
|
|
|
* @param unicodeset
|
|
|
|
* @param scriptCode
|
2005-03-30 17:19:32 +00:00
|
|
|
* @param uca
|
2005-02-24 02:59:34 +00:00
|
|
|
*/
|
2005-03-30 17:19:32 +00:00
|
|
|
private void printlnSet(PrintWriter htmlOut, PrintWriter textOut,
|
|
|
|
String script, String title, UnicodeSet unicodeset, int scriptCode) {
|
2005-02-24 02:59:34 +00:00
|
|
|
if (unicodeset == null)
|
|
|
|
return;
|
|
|
|
int size = unicodeset.size();
|
|
|
|
String dir = unicodeset.containsSome(bidiR)
|
|
|
|
&& unicodeset.containsNone(bidiL) ? " dir='rtl'" : "";
|
2005-05-02 15:39:54 +00:00
|
|
|
htmlOut.println("<tr><th class='" + title + "'><a href='#" +
|
|
|
|
title + "'>" + title + "</a> ("
|
2005-02-24 02:59:34 +00:00
|
|
|
+ nf.format(size) + ")</th></tr>");
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.print("<tr><td class='" + title + "'" + dir + ">");
|
2005-05-02 15:39:54 +00:00
|
|
|
// <a href="#Atomic">categorization</a>
|
2005-03-30 17:19:32 +00:00
|
|
|
textOut.println();
|
|
|
|
textOut.println("# " + title);
|
|
|
|
bf.setValueSource(script + " ; " + title);
|
2005-02-24 02:59:34 +00:00
|
|
|
UnicodeSetIterator usi = new UnicodeSetIterator();
|
|
|
|
if (scriptCode == HAN_SCRIPT || scriptCode == HANGUL_SCRIPT) {
|
|
|
|
usi.reset(unicodeset);
|
|
|
|
while (usi.nextRange()) {
|
|
|
|
if (usi.codepoint == usi.codepointEnd) {
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.print(formatCode(UTF16
|
2005-02-24 02:59:34 +00:00
|
|
|
.valueOf(usi.codepoint)));
|
|
|
|
} else {
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.print(formatCode(UTF16
|
2005-02-24 02:59:34 +00:00
|
|
|
.valueOf(usi.codepoint))
|
|
|
|
+ ".. "
|
|
|
|
+ formatCode(UTF16
|
|
|
|
.valueOf(usi.codepointEnd)));
|
|
|
|
}
|
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
bf.showSetNames(textOut, unicodeset);
|
2005-02-24 02:59:34 +00:00
|
|
|
} else {
|
|
|
|
Set reordered = new TreeSet(uca);
|
|
|
|
usi.reset(unicodeset);
|
|
|
|
while (usi.next()) {
|
2005-03-30 17:19:32 +00:00
|
|
|
String x = usi.getString();
|
|
|
|
boolean foo = reordered.add(x);
|
2005-02-24 02:59:34 +00:00
|
|
|
if (!foo)
|
|
|
|
throw new IllegalArgumentException("Collision with "
|
2005-03-30 17:19:32 +00:00
|
|
|
+ Default.ucd().getCodeAndName(x));
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
|
|
|
for (Iterator it = reordered.iterator(); it.hasNext();) {
|
2005-03-30 17:19:32 +00:00
|
|
|
Object key = it.next();
|
|
|
|
htmlOut.print(formatCode((String)key));
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
bf.showSetNames(textOut, reordered);
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
2005-03-30 17:19:32 +00:00
|
|
|
htmlOut.println("</td></tr>");
|
2005-02-24 02:59:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param string
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
private String formatCode(String string) {
|
|
|
|
int cat = ucd.getCategory(UTF16.charAt(string,0));
|
|
|
|
return "<span title='" + ucd.getCodeAndName(string) + "'>"
|
|
|
|
+ (cat == Me || cat == Mn ? "\u00A0" : "") //\u25cc
|
2005-03-30 17:19:32 +00:00
|
|
|
+ BagFormatter.toHTMLControl.transliterate(string)
|
2005-02-24 02:59:34 +00:00
|
|
|
+ " </span>";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param inbuffer
|
|
|
|
* @param outbuffer
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
public static boolean equals(StringBuffer inbuffer, StringBuffer outbuffer) {
|
|
|
|
if (inbuffer.length() != outbuffer.length()) return false;
|
|
|
|
for (int i = inbuffer.length() - 1; i >= 0; --i) {
|
|
|
|
if (inbuffer.charAt(i) != outbuffer.charAt(i)) return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void checkChars(boolean mergeRanges) {
|
|
|
|
UCD ucd = Default.ucd();
|
|
|
|
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
|
|
|
|
UnicodeSet isUpper = ups.getSet("Uppercase=true");
|
|
|
|
UnicodeSet isLower = ups.getSet("Lowercase=true");
|
|
|
|
UnicodeSet isTitle = ups.getSet("gc=Lt");
|
|
|
|
UnicodeSet otherAlphabetic = ups.getSet("Alphabetic=true").addAll(ups.getSet("gc=Sk"));
|
|
|
|
// create the following
|
|
|
|
UnicodeSet hasFold = new UnicodeSet();
|
|
|
|
UnicodeSet hasUpper = new UnicodeSet();
|
|
|
|
UnicodeSet hasLower = new UnicodeSet();
|
|
|
|
UnicodeSet hasTitle = new UnicodeSet();
|
|
|
|
UnicodeSet compat = new UnicodeSet();
|
|
|
|
UnicodeSet bicameralsScripts = new UnicodeSet();
|
|
|
|
|
|
|
|
UCD u40 = UCD.make("4.0.0");
|
|
|
|
BitSet scripts = new BitSet();
|
|
|
|
for (int i = 0; i <= 0x10FFFF; ++i) {
|
|
|
|
int gc = ucd.getCategory(i);
|
|
|
|
if (gc == Cn || gc == PRIVATE_USE) continue;
|
|
|
|
String str = UTF16.valueOf(i);
|
|
|
|
if (!str.equals(ucd.getCase(str, FULL, FOLD))) hasFold.add(i);
|
|
|
|
if (!str.equals(ucd.getCase(str, FULL, UPPER))) hasUpper.add(i);
|
|
|
|
if (!str.equals(ucd.getCase(str, FULL, LOWER))) {
|
|
|
|
hasLower.add(i);
|
|
|
|
scripts.set(ucd.getScript(i));
|
|
|
|
}
|
|
|
|
if (!str.equals(ucd.getCase(str, FULL, TITLE))) hasTitle.add(i);
|
|
|
|
if (!str.equals(Default.nfkd().normalize(str))) compat.add(i);
|
|
|
|
//System.out.println(ucd.getCodeAndName(i) + "\t" + (u40.isAllocated(i) ? "already in 4.0" : "new in 4.1"));
|
|
|
|
}
|
|
|
|
BagFormatter bf = new BagFormatter();
|
|
|
|
bf.setMergeRanges(mergeRanges);
|
|
|
|
bf.setUnicodePropertyFactory(ups);
|
|
|
|
printItems(bf, compat, "isUpper or isTitle without hasLower",
|
|
|
|
new UnicodeSet(isUpper).addAll(isTitle).removeAll(hasLower));
|
|
|
|
printItems(bf, compat, "hasLower, but not isUpper or isTitle",
|
|
|
|
new UnicodeSet(hasLower).removeAll(isTitle).removeAll(isUpper));
|
|
|
|
printItems(bf, compat, "isLower without hasUpper",
|
|
|
|
new UnicodeSet(isLower).addAll(isTitle).removeAll(hasUpper));
|
|
|
|
printItems(bf, compat, "hasUpper, but not isLower or isTitle",
|
|
|
|
new UnicodeSet(hasUpper).removeAll(isTitle).removeAll(isLower));
|
|
|
|
|
|
|
|
UnicodeSet scriptSet = new UnicodeSet();
|
|
|
|
UnicodeProperty scriptProp = ups.getProperty("Script");
|
|
|
|
for (int i = 0; i < scripts.size(); ++i) {
|
|
|
|
if (!scripts.get(i)) continue;
|
|
|
|
if (i == COMMON_SCRIPT) continue;
|
|
|
|
String scriptName = ucd.getScriptID_fromIndex((byte)i);
|
|
|
|
System.out.println(scriptName);
|
|
|
|
scriptSet.addAll(scriptProp.getSet(scriptName));
|
|
|
|
}
|
|
|
|
UnicodeSet allCased = new UnicodeSet().addAll(isUpper).addAll(isLower).addAll(isTitle);
|
|
|
|
printItems(bf, compat, "(Bicameral) isAlpha or Symbol Modifier, but not isCased",
|
|
|
|
new UnicodeSet(scriptSet).retainAll(otherAlphabetic).removeAll(allCased));
|
|
|
|
printItems(bf, compat, "(Bicameral) isCased, but not isAlpha or Symbol Modifier",
|
|
|
|
new UnicodeSet(scriptSet).retainAll(allCased).removeAll(otherAlphabetic));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param bf
|
|
|
|
* @param compat
|
|
|
|
* @param temp
|
|
|
|
*/
|
|
|
|
private static void printItems(BagFormatter bf, UnicodeSet compat, String title, UnicodeSet temp) {
|
|
|
|
System.out.println();
|
|
|
|
System.out.println(title + " -- (non compat)");
|
|
|
|
UnicodeSet temp2 = new UnicodeSet(temp).removeAll(compat);
|
|
|
|
System.out.println(bf.showSetNames(temp2));
|
|
|
|
System.out.println();
|
|
|
|
temp2 = new UnicodeSet(temp).retainAll(compat);
|
|
|
|
System.out.println(title + " -- (compat)");
|
|
|
|
System.out.println(bf.showSetNames(temp2));
|
|
|
|
}
|
|
|
|
|
2004-12-11 06:03:10 +00:00
|
|
|
static PrintWriter log;
|
|
|
|
|
|
|
|
public static void checkShaping() throws IOException {
|
|
|
|
log = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "checklog.txt");
|
|
|
|
checkProperty("Joining_Type", "Non_Joining", "Joining_Type", "Transparent");
|
|
|
|
checkProperty("Joining_Group", "No_Joining_Group", "Joining_Type", "Transparent");
|
|
|
|
checkProperty("Line_Break", "Unknown", "Line_Break", "Combining_Mark");
|
|
|
|
checkProperty("East_Asian_Width", null, "Line_Break", "Combining_Mark");
|
|
|
|
checkProperty("Bidi_Class", null, "Line_Break", "Combining_Mark");
|
|
|
|
checkProperty("Script", null, "Script", new String[]{"Common", "Inherited"});
|
|
|
|
checkProperty("General_Category", null, "General_Category", new String[]{"Spacing_Mark",
|
|
|
|
"Enclosing_Mark", "Nonspacing_Mark"});
|
|
|
|
log.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param propertyName
|
|
|
|
* @param exclusion
|
|
|
|
* @param ignorePropertyName TODO
|
|
|
|
* @param ignoreValue
|
|
|
|
*/
|
|
|
|
private static void checkProperty(String propertyName, String exclusion, String ignorePropertyName, Object ignoreValueList) {
|
|
|
|
log.println();
|
|
|
|
log.println(propertyName + " Check");
|
|
|
|
log.println();
|
|
|
|
Set ignoreValueSet = new HashSet();
|
|
|
|
if (ignoreValueList instanceof String) ignoreValueSet.add(ignoreValueList);
|
|
|
|
else ignoreValueSet.addAll(Arrays.asList((Object[])ignoreValueList));
|
|
|
|
|
|
|
|
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("4.0.1");
|
|
|
|
UnicodeProperty up = ups.getProperty(propertyName);
|
|
|
|
UnicodeProperty ignProp = ups.getProperty(ignorePropertyName);
|
|
|
|
UnicodeProperty name = ups.getProperty("Name");
|
|
|
|
UnicodeSet significant = (exclusion != null ? up.getSet(exclusion) : new UnicodeSet()).complement();
|
|
|
|
UnicodeSetIterator it = new UnicodeSetIterator(significant);
|
|
|
|
Normalizer n = new Normalizer(Normalizer.NFD, "4.0.1");
|
|
|
|
int counter = 0;
|
|
|
|
while (it.next()) {
|
|
|
|
String baseValue = up.getValue(it.codepoint);
|
|
|
|
String nfd = n.normalize(it.codepoint);
|
|
|
|
if (n.isNormalized(it.codepoint)) continue;
|
|
|
|
//if (nfd.equals(it.getString())) continue;
|
|
|
|
int cp;
|
|
|
|
for (int i = 0; i < nfd.length(); i += UTF16.getCharCount(cp)) {
|
|
|
|
cp = UTF16.charAt(nfd, i);
|
|
|
|
boolean shown = false;
|
|
|
|
String newValue = up.getValue(cp);
|
|
|
|
String possIgnValue = ignProp.getValue(cp);
|
|
|
|
if (ignoreValueSet.contains(possIgnValue)) {
|
|
|
|
//log.println("--- " + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
//log.println("*** " + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
|
|
|
|
|
|
|
|
if (!baseValue.equals(newValue)) {
|
|
|
|
if (!shown) log.println((++counter) + "\tCONFLICT\t" + baseValue + "\t" + Utility.hex(it.codepoint) + " " + name.getValue(it.codepoint));
|
|
|
|
log.println("\tNFD(" + Utility.hex(it.codepoint) + ") contains:\t" + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
|
|
|
|
shown = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2003-08-20 03:48:47 +00:00
|
|
|
|
2004-12-11 06:03:10 +00:00
|
|
|
public static class RegexMatcher implements UnicodeProperty.Matcher {
|
2004-02-18 03:09:02 +00:00
|
|
|
private Matcher matcher;
|
|
|
|
|
|
|
|
public UnicodeProperty.Matcher set(String pattern) {
|
|
|
|
matcher = Pattern.compile(pattern).matcher("");
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
public boolean matches(String value) {
|
|
|
|
matcher.reset(value);
|
|
|
|
return matcher.matches();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-02-06 18:32:05 +00:00
|
|
|
static BagFormatter bf = new BagFormatter();
|
2004-02-18 03:09:02 +00:00
|
|
|
static UnicodeProperty.Matcher matcher = new RegexMatcher();
|
2004-02-06 18:32:05 +00:00
|
|
|
|
|
|
|
private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
|
|
|
|
System.out.println("Property Listing");
|
|
|
|
if (s1 == null) {
|
|
|
|
s1 = upf.getSet(p1, matcher, null);
|
|
|
|
}
|
|
|
|
if (s2 == null) {
|
|
|
|
s2 = upf.getSet(p2, matcher, null);
|
|
|
|
}
|
|
|
|
bf.showSetDifferences(bf.CONSOLE,p1,s1,p2,s2);
|
|
|
|
}
|
2003-08-20 03:48:47 +00:00
|
|
|
|
2003-07-07 15:58:57 +00:00
|
|
|
static private UnicodeSet getSetForName(String regexPattern) {
|
|
|
|
UnicodeSet result = new UnicodeSet();
|
|
|
|
Pattern p = Pattern.compile(regexPattern);
|
|
|
|
Matcher m = p.matcher("");
|
|
|
|
for (int i = 0; i < 0x10FFFF; ++i) {
|
|
|
|
Utility.dot(i);
|
2004-02-07 01:01:17 +00:00
|
|
|
if (!Default.ucd().isAssigned(i)) continue;
|
|
|
|
byte cat = Default.ucd().getCategory(i);
|
2003-07-07 15:58:57 +00:00
|
|
|
if (cat == PRIVATE_USE) continue;
|
2004-02-07 01:01:17 +00:00
|
|
|
m.reset(Default.ucd().getName(i));
|
2003-07-07 15:58:57 +00:00
|
|
|
if (m.matches()) {
|
|
|
|
result.add(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void showSet(PrintWriter pw, String x, boolean separateLines) {
|
|
|
|
pw.println("****************************");
|
|
|
|
System.out.println(x);
|
|
|
|
UnicodeSet ss = new UnicodeSet(x);
|
|
|
|
pw.println(x);
|
2004-02-07 01:01:17 +00:00
|
|
|
Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd());
|
2003-07-07 15:58:57 +00:00
|
|
|
pw.println("****************************");
|
|
|
|
}
|
|
|
|
|
|
|
|
static int SHIFT = 6;
|
|
|
|
static int MASK = (1<<6) - 1;
|
|
|
|
static int OTHER = 0xFF & ~MASK;
|
|
|
|
|
|
|
|
static void showNumber(float x) {
|
|
|
|
System.out.println("Number: " + x);
|
|
|
|
//long bits = Double.doubleToLongBits(x);
|
|
|
|
long bits = (Float.floatToIntBits(x) + 0L) << 32;
|
|
|
|
System.out.println("IEEE: " + Long.toBinaryString(bits));
|
|
|
|
System.out.print("Broken: ");
|
|
|
|
long lastShift = 64-SHIFT;
|
|
|
|
for (long shift = 64-SHIFT; shift > 0; shift -= SHIFT) {
|
|
|
|
long temp = bits >>> shift;
|
|
|
|
temp &= MASK;
|
|
|
|
if (temp != 0) lastShift = shift;
|
|
|
|
temp |= OTHER;
|
|
|
|
String piece = Long.toBinaryString(temp);
|
|
|
|
System.out.print(" " + piece);
|
|
|
|
}
|
|
|
|
System.out.println();
|
|
|
|
System.out.print("Bytes: 1B");
|
|
|
|
for (long shift = 64-SHIFT; shift >= lastShift; shift -= SHIFT) {
|
|
|
|
long temp = bits >>> shift;
|
|
|
|
temp &= MASK;
|
|
|
|
temp |= OTHER;
|
|
|
|
if (shift == lastShift) {
|
|
|
|
temp &= ~0x80;
|
|
|
|
}
|
|
|
|
String piece = Long.toHexString(temp).toUpperCase();
|
|
|
|
System.out.print(" " + piece);
|
|
|
|
}
|
|
|
|
System.out.println();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int findFirstNonZero(String digits) {
|
|
|
|
for (int i = 0; i < digits.length(); ++i) {
|
|
|
|
if (digits.charAt(i) != '0') return i;
|
|
|
|
}
|
|
|
|
return digits.length();
|
|
|
|
}
|
|
|
|
|
|
|
|
static String remove(String s, int start, int limit) {
|
|
|
|
return s.substring(0, start) + s.substring(limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
static String hexByte(int i) {
|
|
|
|
String result = Integer.toHexString(i).toUpperCase();
|
|
|
|
if (result.length() == 1) result = '0' + result;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// dumb implementation
|
|
|
|
static String convertToBCD(String digits) {
|
|
|
|
|
|
|
|
// fix negatives, remove leading zeros, get decimal
|
|
|
|
|
|
|
|
int[] pairs = new int[120];
|
|
|
|
boolean negative = false;
|
|
|
|
boolean removedNegative = false;
|
|
|
|
boolean removedDecimal = false;
|
|
|
|
int leadZeros = 0;
|
|
|
|
int trailZeros = 0;
|
|
|
|
|
|
|
|
if (digits.charAt(0) == '-') {
|
|
|
|
negative = true;
|
|
|
|
removedNegative = true;
|
|
|
|
digits = remove(digits, 0, 1);
|
|
|
|
}
|
|
|
|
while (digits.length() > 0 && digits.charAt(0) == '0') {
|
|
|
|
digits = remove(digits, 0, 1);
|
|
|
|
leadZeros++;
|
|
|
|
}
|
|
|
|
int decimalOffset = digits.indexOf('.');
|
|
|
|
if (decimalOffset < 0) {
|
|
|
|
decimalOffset = digits.length();
|
|
|
|
} else {
|
|
|
|
digits = digits = remove(digits, decimalOffset, decimalOffset+1);
|
|
|
|
removedDecimal = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove trailing zeros
|
|
|
|
while (digits.length() > 0 && digits.charAt(digits.length() - 1) == '0') {
|
|
|
|
digits = remove(digits, digits.length() - 1, digits.length());
|
|
|
|
trailZeros++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// make the digits even (in non-fraction part)
|
|
|
|
if (((decimalOffset) & 1) != 0) {
|
|
|
|
digits = '0' + digits; // make even
|
|
|
|
++decimalOffset;
|
|
|
|
leadZeros--;
|
|
|
|
}
|
|
|
|
if (((digits.length()) & 1) != 0) {
|
|
|
|
digits = digits + '0'; // make even
|
|
|
|
trailZeros--;
|
|
|
|
}
|
|
|
|
|
|
|
|
// handle 0
|
|
|
|
if (digits.length() == 0) {
|
|
|
|
negative = false;
|
|
|
|
digits = "00";
|
|
|
|
leadZeros -= 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// store exponent
|
|
|
|
int exp = decimalOffset/2;
|
|
|
|
if (!negative) exp |= 0x80;
|
|
|
|
else exp = (~exp) & 0x7F;
|
|
|
|
String result = hexByte(exp);
|
|
|
|
for (int i = 0; i < digits.length(); i += 2) {
|
|
|
|
int base100 = ((digits.charAt(i) - '0')*10 + (digits.charAt(i+1) - '0')) << 1;
|
|
|
|
if (i < digits.length() - 2) base100 |= 0x1; // mark all but last
|
|
|
|
if (negative) base100 = (~base100) & 0xFF;
|
|
|
|
result += "." + hexByte(base100);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
// add a secondary weight
|
|
|
|
// assume we don't care about more than too many leads/trails
|
|
|
|
leadZeros += 2; // make non-negative; might have padded by 2, for 0
|
|
|
|
trailZeros += 2; // make non-negative; might have padded by 1
|
|
|
|
if (leadZeros > 7) leadZeros = 7;
|
|
|
|
if (trailZeros > 7) trailZeros = 7;
|
|
|
|
int secondary = (removedNegative ? 0 : 0x80) // only for zero
|
|
|
|
| (leadZeros << 4)
|
|
|
|
| (removedDecimal ? 0 : 0x08)
|
|
|
|
| (trailZeros);
|
|
|
|
result += ";" + hexByte(secondary);
|
|
|
|
*/
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int stamp = 0;
|
|
|
|
static void add(Map m, String s) {
|
|
|
|
add2(m, s);
|
|
|
|
add2(m, "0" + s);
|
|
|
|
if (s.indexOf('.') >= 0) {
|
|
|
|
add2(m, s + "0");
|
|
|
|
add2(m, "0" + s + "0");
|
|
|
|
} else {
|
|
|
|
add2(m, s + ".");
|
|
|
|
add2(m, "0" + s + ".");
|
|
|
|
add2(m, s + ".0");
|
|
|
|
add2(m, "0" + s + ".0");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add2(Map m, String s) {
|
|
|
|
add3(m,s);
|
|
|
|
if (s.indexOf('-') < 0) add3(m, "-" + s);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void add3(Map m, String s) {
|
|
|
|
String base = convertToBCD(s);
|
|
|
|
base += "|" + Math.random() + stamp++; // just something for uniqueness
|
|
|
|
m.put(base, s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean SHOW_ALL = true;
|
|
|
|
|
|
|
|
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH);
|
|
|
|
static {
|
|
|
|
nf.setGroupingUsed(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static String cleanToString(double d) {
|
|
|
|
return nf.format(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void testConvertToBDD() {
|
|
|
|
System.out.println("Starting Test");
|
|
|
|
double[] testList = {0, 0.00000001, 0.001, 5, 10, 50, 100, 1000, 100000000};
|
|
|
|
Map m = new TreeMap();
|
|
|
|
|
|
|
|
for (int i = 0; i < testList.length; ++i) {
|
|
|
|
double d = testList[i];
|
|
|
|
add(m, cleanToString(d));
|
|
|
|
add(m, cleanToString(d + 0.1));
|
|
|
|
add(m, cleanToString(d + 1));
|
|
|
|
add(m, cleanToString(d + 1.1));
|
|
|
|
if (d > 0.1) add(m, cleanToString(d - 0.1));
|
|
|
|
if (d > 1.0) add(m, cleanToString(d - 1.0));
|
|
|
|
if (d > 1.1) add(m, cleanToString(d - 1.1));
|
|
|
|
}
|
|
|
|
Iterator it = m.keySet().iterator();
|
|
|
|
String lastKey = "";
|
|
|
|
String lastValue = "";
|
|
|
|
boolean lastPrinted = false;
|
|
|
|
double lastNumber = Double.NEGATIVE_INFINITY;
|
|
|
|
int errorCount = 0;
|
|
|
|
while (it.hasNext()) {
|
|
|
|
String key = (String) it.next();
|
|
|
|
String value = (String) m.get(key);
|
|
|
|
key = key.substring(0, key.indexOf('|')); // remove stamp
|
|
|
|
double number = Double.parseDouble(value);
|
|
|
|
if (lastNumber > number) {
|
|
|
|
if (!lastPrinted) System.out.println("\t" + lastValue + "\t" + lastKey);
|
|
|
|
System.out.println("Fail:\t" + value + "\t" + key);
|
|
|
|
lastPrinted = true;
|
|
|
|
errorCount++;
|
|
|
|
} else if (SHOW_ALL) {
|
|
|
|
System.out.println("\t" + value + "\t" + key);
|
|
|
|
lastPrinted = true;
|
|
|
|
}
|
|
|
|
lastNumber = number;
|
|
|
|
lastKey = key;
|
|
|
|
lastValue = value;
|
|
|
|
}
|
|
|
|
System.out.println("Done Test, " + errorCount + " Errors");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void testFormatHack() {
|
|
|
|
String[] testCurrencies = {"USD","GBP","JPY","EUR"};
|
|
|
|
Locale[] testLocales = NumberFormat.getAvailableLocales();
|
|
|
|
for (int i = 0; i < testLocales.length; ++i) {
|
|
|
|
// since none of this should vary by country, we'll just do by language
|
|
|
|
if (!testLocales[i].getCountry().equals("")) continue;
|
|
|
|
System.out.println(testLocales[i].getDisplayName());
|
|
|
|
for (int j = 0; j < testCurrencies.length; ++j) {
|
|
|
|
NumberFormat nf = getCurrencyFormat(
|
|
|
|
Currency.getInstance(testCurrencies[j]), testLocales[i], true);
|
|
|
|
String newVersion = nf.format(1234.567);
|
|
|
|
System.out.print("\t" + newVersion);
|
|
|
|
nf = getCurrencyFormat(
|
|
|
|
Currency.getInstance(testCurrencies[j]), testLocales[i], false);
|
|
|
|
String oldVersion = nf.format(1234.567);
|
|
|
|
if (!oldVersion.equals(newVersion)) {
|
|
|
|
System.out.print(" (" + oldVersion + ")");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
System.out.println();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static NumberFormat getCurrencyFormat(Currency currency, Locale displayLocale, boolean ICU26) {
|
|
|
|
// code for ICU 2.6
|
|
|
|
if (ICU26) {
|
|
|
|
NumberFormat result = NumberFormat.getCurrencyInstance();
|
|
|
|
result.setCurrency(currency);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ugly work-around for 2.4
|
|
|
|
DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale);
|
|
|
|
HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode()));
|
|
|
|
result.setMinimumFractionDigits(hack.decimals);
|
|
|
|
result.setMaximumFractionDigits(hack.decimals);
|
|
|
|
result.setRoundingIncrement(hack.rounding);
|
|
|
|
DecimalFormatSymbols symbols = result.getDecimalFormatSymbols();
|
|
|
|
symbols.setCurrencySymbol(hack.symbol);
|
|
|
|
result.setDecimalFormatSymbols(symbols);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Map hackData = new HashMap();
|
|
|
|
static class HackCurrencyInfo {
|
|
|
|
int decimals;
|
|
|
|
double rounding;
|
|
|
|
String symbol;
|
|
|
|
HackCurrencyInfo(int decimals, double rounding, String symbol) {
|
|
|
|
this.decimals = decimals;
|
|
|
|
this.rounding = rounding;
|
|
|
|
this.symbol = symbol;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static {
|
|
|
|
hackData.put("USD", new HackCurrencyInfo(2, 0.01, "$"));
|
|
|
|
hackData.put("GBP", new HackCurrencyInfo(2, 0.01, "\u00a3"));
|
|
|
|
hackData.put("JPY", new HackCurrencyInfo(0, 1, "\u00a5"));
|
|
|
|
hackData.put("EUR", new HackCurrencyInfo(2, 0.01, "\u20AC"));
|
|
|
|
}
|
2001-12-13 23:36:29 +00:00
|
|
|
/*
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
System.out.println("START");
|
|
|
|
ucd = UCD.make();
|
|
|
|
System.out.println("Loaded UCD " + ucd.getVersion() + " " + (new Date(ucd.getDate())));
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
|
|
|
|
checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-10-25 20:33:46 +00:00
|
|
|
long mask = 0;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
if (false) {
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedBidiClass-3.1.1d1.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
2001-09-01 00:06:48 +00:00
|
|
|
mask = Utility.setBits(0, DerivedProperty.FC_NFKC_Closure, DerivedProperty.ExpandsOnNFKC);
|
|
|
|
mask = Utility.clearBit(mask, DerivedProperty.FullCompInclusion);
|
2001-08-30 20:50:18 +00:00
|
|
|
generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-3.1.0d1.txt");
|
|
|
|
|
|
|
|
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedEastAsianWidth-3.1.0d1.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
2001-08-30 20:50:18 +00:00
|
|
|
"DerivedGeneralCategory-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedCombiningClass-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedDecompositionType-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedNumericType-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedEastAsianWidth-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedJoiningType-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedJoiningGroup-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedBinaryProperties-3.1.0d1.txt");
|
|
|
|
generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedNumericValues-3.1.0d1.txt");
|
|
|
|
|
2001-09-01 00:06:48 +00:00
|
|
|
mask = Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf);
|
2001-08-30 20:50:18 +00:00
|
|
|
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-3.1.0d1.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
|
|
|
"DerivedLineBreak-3.1.0d1.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, KEEP_SPECIAL, HEADER_SCRIPTS, "Scripts-3.1.0d4.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + Noncharacter_Code_Point + 1,
|
|
|
|
KEEP_SPECIAL, HEADER_EXTEND, "PropList-3.1.0d5.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
writeNormalizerTestSuite("NormalizationTest-3.1.0d1.txt");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2001-09-01 00:06:48 +00:00
|
|
|
//generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
|
2001-08-30 20:50:18 +00:00
|
|
|
// HEADER_DERIVED, "DerivedPropData2-3.1.0d1.txt");
|
|
|
|
//generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-3.1.0d1.txt");
|
|
|
|
//listStrings("LowerCase-3.1.0d1.txt", 0,0);
|
|
|
|
//generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedPropData1-3.1.0d1.txt");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
// AGE stuff
|
|
|
|
//UCD ucd = UCD.make();
|
|
|
|
//System.out.println(ucd.getAgeID(0x61));
|
|
|
|
//System.out.println(ucd.getAgeID(0x2FA1D));
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
//generateCompExclusions();
|
|
|
|
System.out.println("END");
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static Normalizer nfkc = new Normalizer(Normalizer.NFKC);
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
public static void checkHoffman(String test) {
|
|
|
|
String result = nfkc.normalize(test);
|
|
|
|
System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
|
|
|
|
System.out.println();
|
|
|
|
show(test, 0);
|
|
|
|
System.out.println();
|
|
|
|
show(result, 0);
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
public static void show(String s, int indent) {
|
|
|
|
int cp;
|
|
|
|
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
|
|
|
|
cp = UTF32.char32At(s, i);
|
|
|
|
String cc = " " + ucd.getCombiningClass(cp);
|
|
|
|
cc = Utility.repeat(" ", 4 - cc.length()) + cc;
|
|
|
|
System.out.println(Utility.repeat(" ", indent) + ucd.getCode(cp) + cc + " " + ucd.getName(cp));
|
|
|
|
String decomp = nfkc.normalize(cp);
|
|
|
|
if (!decomp.equals(UTF32.valueOf32(cp))) {
|
|
|
|
show(decomp, indent + 4);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
//Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
public static String fixFile(String s) {
|
|
|
|
int len = s.length();
|
|
|
|
if (!s.endsWith(".txt")) return s;
|
|
|
|
if (s.charAt(len-6) != 'd') return s;
|
|
|
|
char c = s.charAt(len-5);
|
|
|
|
if (c < '0' || '9' < c) return s;
|
|
|
|
System.out.println("Fixing File Name");
|
|
|
|
return s.substring(0,len-6) + s.substring(len-4);
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
|
2001-08-30 20:50:18 +00:00
|
|
|
output.println("# " + fixFile(fileName));
|
|
|
|
output.println("#");
|
|
|
|
if (headerChoice == HEADER_SCRIPTS) {
|
|
|
|
output.println("# For documentation, see UTR #24: Script Names");
|
|
|
|
output.println("# http://www.unicode.org/unicode/reports/tr24/");
|
|
|
|
} else if (headerChoice == HEADER_EXTEND) {
|
|
|
|
output.println("# Unicode Character Database: Extended Properties");
|
|
|
|
output.println("# For documentation, see PropList.html");
|
|
|
|
} else {
|
|
|
|
output.println("# Unicode Character Database: Derived Property Data");
|
|
|
|
output.println("# Generated algorithmically from the Unicode Character Database");
|
|
|
|
output.println("# For documentation, see DerivedProperties.html");
|
|
|
|
}
|
|
|
|
output.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
|
|
|
|
output.println("# Note: Unassigned and Noncharacter codepoints are omitted,");
|
|
|
|
output.println("# except when listing Noncharacter or Cn.");
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-10-25 20:33:46 +00:00
|
|
|
public static void generateDerived (long bitMask, int headerChoice, String fileName) throws IOException {
|
2001-09-19 23:33:52 +00:00
|
|
|
ucd = UCD.make("3.1.0");
|
|
|
|
PrintWriter output = Utility.openPrintWriter(fileName);
|
2001-08-30 20:50:18 +00:00
|
|
|
doHeader(fileName, output, headerChoice);
|
|
|
|
for (int i = 0; i < 32; ++i) {
|
|
|
|
if ((bitMask & (1<<i)) == 0) continue;
|
2001-12-05 02:41:23 +00:00
|
|
|
if (i >= DERIVED_PROPERTY_LIMIT) break;
|
2001-08-30 20:50:18 +00:00
|
|
|
System.out.print('.');
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println();
|
|
|
|
new DerivedPropertyLister(ucd, i, output).print();
|
|
|
|
}
|
|
|
|
output.close();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
/*
|
|
|
|
public static void listStrings(String file, int type, int subtype) throws IOException {
|
2001-09-19 23:33:52 +00:00
|
|
|
ucd = UCD.make("3.1.0");
|
|
|
|
UCD ucd30 = UCD.make("3.0.0");
|
|
|
|
PrintWriter output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
for (int i = 0; i < 0x10FFFF; ++i) {
|
|
|
|
if ((i & 0xFFF) == 0) System.out.println("# " + i);
|
|
|
|
if (!ucd.isRepresented(i)) continue;
|
|
|
|
if (ucd30.isRepresented(i)) continue;
|
|
|
|
String string = "";
|
|
|
|
switch(type) {
|
|
|
|
case 0: string = ucd.getSimpleLowercase(i);
|
|
|
|
}
|
|
|
|
if (UTF32.length32(string) == 1 && UTF32.char32At(string,0) == i) continue;
|
|
|
|
output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + ucd.getName(i));
|
|
|
|
}
|
|
|
|
output.close();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
public static void generateCompExclusions() throws IOException {
|
2001-09-19 23:33:52 +00:00
|
|
|
PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
|
2001-08-30 20:50:18 +00:00
|
|
|
new CompLister(output).print();
|
|
|
|
output.close();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static class CompLister extends PropertyLister {
|
|
|
|
UCD oldUCD;
|
|
|
|
int oldLength = 0;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
public CompLister(PrintWriter output) {
|
2001-08-30 20:50:18 +00:00
|
|
|
this.output = output;
|
2001-09-19 23:33:52 +00:00
|
|
|
ucdData = UCD.make("3.1.0");
|
|
|
|
oldUCD = UCD.make("3.0.0");
|
2001-08-30 20:50:18 +00:00
|
|
|
showOnConsole = true;
|
|
|
|
}
|
2001-12-06 00:05:53 +00:00
|
|
|
public String valueName(int cp) {
|
2001-08-30 20:50:18 +00:00
|
|
|
return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
|
|
|
|
}
|
|
|
|
public byte status(int cp) {
|
2001-08-31 00:30:17 +00:00
|
|
|
if (ucdData.getDecompositionType(cp) == CANONICAL
|
2001-08-30 20:50:18 +00:00
|
|
|
&& oldUCD.getDecompositionType(cp) != CANONICAL) {
|
|
|
|
int temp = oldLength;
|
|
|
|
oldLength = UTF32.length32(ucdData.getDecompositionMapping(cp));
|
|
|
|
if (temp != oldLength) return BREAK;
|
|
|
|
return INCLUDE;
|
|
|
|
}
|
|
|
|
return EXCLUDE;
|
|
|
|
}
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
public static void generateVerticalSlice(int startEnum, int endEnum, byte skipSpecial, int headerChoice, String file) throws IOException {
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
//System.out.println(ucd.toString(0x1E0A));
|
|
|
|
/*
|
|
|
|
System.out.println(ucd.getData(0xFFFF));
|
|
|
|
System.out.println(ucd.getData(0x100000));
|
|
|
|
System.out.println(ucd.getData(0x100000-1));
|
|
|
|
System.out.println(ucd.getData(0x100000-2));
|
|
|
|
System.out.println(ucd.getData(0x100000-3));
|
|
|
|
if (true) return;
|
|
|
|
String test2 = ucd.getName(0x2A6D6);
|
2001-12-13 23:36:29 +00:00
|
|
|
//* /
|
2001-08-31 00:30:17 +00:00
|
|
|
|
|
|
|
|
2001-09-19 23:33:52 +00:00
|
|
|
PrintWriter output = Utility.openPrintWriter(file);
|
2001-08-30 20:50:18 +00:00
|
|
|
doHeader(file, output, headerChoice);
|
2001-10-25 20:33:46 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
int last = -1;
|
|
|
|
for (int i = startEnum; i < endEnum; ++i) {
|
2001-12-05 02:41:23 +00:00
|
|
|
UnicodeProperty up = UnifiedBinaryProperty.make(i, ucd);
|
|
|
|
if (up == null) continue;
|
|
|
|
|
2001-08-31 00:30:17 +00:00
|
|
|
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|
2001-08-30 20:50:18 +00:00
|
|
|
|| i == (CATEGORY | UNUSED_CATEGORY)
|
|
|
|
|| i == (BINARY_PROPERTIES | Non_break)
|
|
|
|
|| i == (JOINING_TYPE | JT_U)
|
|
|
|
|| i == (SCRIPT | UNUSED_SCRIPT)
|
|
|
|
|| i == (JOINING_GROUP | NO_SHAPING)
|
|
|
|
) continue; // skip zero case
|
|
|
|
if (skipSpecial == SKIP_SPECIAL
|
|
|
|
&& i >= (BINARY_PROPERTIES | CompositionExclusion)
|
|
|
|
&& i < (AGE + NEXT_ENUM)) continue;
|
|
|
|
if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
|
|
|
|
output.println();
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println("# " + UCD_Names.UNIFIED_PROPERTIES[i>>8]);
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println();
|
|
|
|
System.out.println();
|
|
|
|
System.out.println(UCD_Names.UNIFIED_PROPERTIES[i>>8]);
|
|
|
|
last = i;
|
|
|
|
} else {
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println();
|
|
|
|
}
|
|
|
|
System.out.print(".");
|
2001-08-31 00:30:17 +00:00
|
|
|
new MyPropertyLister(ucd, i, output).print();
|
2001-08-30 20:50:18 +00:00
|
|
|
}
|
|
|
|
if (endEnum == LIMIT_ENUM) {
|
|
|
|
output.println();
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println("# Numeric Values (from UnicodeData.txt, field 6/7/8)");
|
|
|
|
output.println("# ================================================");
|
|
|
|
output.println();
|
|
|
|
System.out.println();
|
|
|
|
System.out.println("@NUMERIC VALUES");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
Set floatSet = new TreeSet();
|
|
|
|
for (int i = 0; i < 0x10FFFF; ++i) {
|
|
|
|
float nv = ucd.getNumericValue(i);
|
|
|
|
if (Float.isNaN(nv)) continue;
|
|
|
|
floatSet.add(new Float(nv));
|
|
|
|
}
|
|
|
|
Iterator it = floatSet.iterator();
|
|
|
|
while(it.hasNext()) {
|
|
|
|
new MyFloatLister(ucd, ((Float)it.next()).floatValue(), output).print();
|
|
|
|
output.println();
|
|
|
|
System.out.print(".");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output.close();
|
|
|
|
System.out.println();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static UCD ucd;
|
|
|
|
|
|
|
|
static public Normalizer formC, formD, formKC, formKD;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static public void writeNormalizerTestSuite(String fileName) throws IOException {
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
PrintWriter log = new PrintWriter(
|
|
|
|
new BufferedWriter(
|
|
|
|
new OutputStreamWriter(
|
|
|
|
new FileOutputStream(GEN_DIR + fileName),
|
|
|
|
"UTF8"),
|
|
|
|
32*1024));
|
|
|
|
formC = new Normalizer(Normalizer.NFC);
|
|
|
|
formD = new Normalizer(Normalizer.NFD);
|
|
|
|
formKC = new Normalizer(Normalizer.NFKC);
|
|
|
|
formKD = new Normalizer(Normalizer.NFKD);
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
log.println("# " + fixFile(fileName));
|
|
|
|
log.println("#");
|
|
|
|
log.println("# Normalization Test Suite");
|
|
|
|
log.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
|
|
|
|
log.println("# Format:");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# Columns (c1, c2,...) are separated by semicolons");
|
|
|
|
log.println("# Comments are indicated with hash marks");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# CONFORMANCE:");
|
|
|
|
log.println("# 1. The following invariants must be true for all conformant implementations");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# NFC");
|
|
|
|
log.println("# c2 == NFC(c1) == NFC(c2) == NFC(c3)");
|
|
|
|
log.println("# c4 == NFC(c4) == NFC(c5)");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# NFD");
|
|
|
|
log.println("# c3 == NFD(c1) == NFD(c2) == NFD(c3)");
|
|
|
|
log.println("# c5 == NFD(c4) == NFD(c5");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# NFKC");
|
|
|
|
log.println("# c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# NFKD");
|
|
|
|
log.println("# c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# 2. For every assigned Unicode 3.1.0 code point X that is not specifically");
|
|
|
|
log.println("# listed in Part 1, the following invariants must be true for all conformant");
|
|
|
|
log.println("# implementations:");
|
|
|
|
log.println("#");
|
|
|
|
log.println("# X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
System.out.println("Writing Part 1");
|
|
|
|
|
|
|
|
log.println("#");
|
|
|
|
log.println("@Part0 # Specific cases");
|
|
|
|
log.println("#");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
for (int j = 0; j < testSuiteCases.length; ++j) {
|
|
|
|
writeLine(testSuiteCases[j], log, false);
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
System.out.println("Writing Part 2");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
log.println("#");
|
|
|
|
log.println("@Part1 # Character by character test");
|
|
|
|
log.println("# All characters not explicitly occurring in c1 of Part 1 have identical NFC, D, KC, KD forms.");
|
|
|
|
log.println("#");
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
|
|
|
Utility.dot(ch);
|
|
|
|
if (!ucd.isAssigned(ch)) continue;
|
|
|
|
if (ucd.isPUA(ch)) continue;
|
|
|
|
String cc = UTF32.valueOf32(ch);
|
|
|
|
writeLine(cc,log, true);
|
|
|
|
}
|
|
|
|
Utility.fixDot();
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
System.out.println("Finding Examples");
|
|
|
|
|
|
|
|
String[] example = new String[256];
|
|
|
|
|
|
|
|
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
|
|
|
Utility.dot(ch);
|
|
|
|
if (!ucd.isAssigned(ch)) continue;
|
|
|
|
if (ucd.isPUA(ch)) continue;
|
|
|
|
int cc = ucd.getCombiningClass(ch);
|
|
|
|
if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
Utility.fixDot();
|
|
|
|
System.out.println("Writing Part 3");
|
|
|
|
|
|
|
|
log.println("#");
|
|
|
|
log.println("@Part2 # Canonical Order Test");
|
|
|
|
log.println("#");
|
|
|
|
|
|
|
|
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
|
|
|
Utility.dot(ch);
|
|
|
|
if (!ucd.isAssigned(ch)) continue;
|
|
|
|
if (ucd.isPUA(ch)) continue;
|
|
|
|
short c = ucd.getCombiningClass(ch);
|
|
|
|
if (c == 0) continue;
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
// add character with higher class, same class, lower class
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
String sample = "";
|
|
|
|
for (int i = c+1; i < example.length; ++i) {
|
|
|
|
if (example[i] == null) continue;
|
|
|
|
sample += example[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sample += example[c];
|
|
|
|
for (int i = c-1; i > 0; --i) {
|
|
|
|
if (example[i] == null) continue;
|
|
|
|
sample += example[i];
|
|
|
|
break;
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
writeLine("a" + sample + UTF32.valueOf32(ch) + "b", log, false);
|
|
|
|
writeLine("a" + UTF32.valueOf32(ch) + sample + "b", log, false);
|
|
|
|
}
|
|
|
|
Utility.fixDot();
|
|
|
|
log.println("#");
|
|
|
|
log.println("# END OF FILE");
|
|
|
|
log.close();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static void writeLine(String cc, PrintWriter log, boolean check) {
|
|
|
|
String c = formC.normalize(cc);
|
|
|
|
String d = formD.normalize(cc);
|
|
|
|
String kc = formKC.normalize(cc);
|
|
|
|
String kd = formKD.normalize(cc);
|
|
|
|
if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
|
|
|
|
log.println(
|
|
|
|
Utility.hex(cc," ") + ";" + Utility.hex(c," ") + ";" + Utility.hex(d," ") + ";"
|
|
|
|
+ Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
|
2001-08-31 00:30:17 +00:00
|
|
|
+ "; # ("
|
2001-08-30 20:50:18 +00:00
|
|
|
+ comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
|
|
|
|
+ ") " + ucd.getName(cc));
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static StringBuffer commaResult = new StringBuffer();
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
// not recursive!!!
|
|
|
|
static final String comma(String s) {
|
|
|
|
commaResult.setLength(0);
|
|
|
|
int cp;
|
|
|
|
for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
|
|
|
|
cp = UTF32.char32At(s, i);
|
|
|
|
if (ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
|
|
|
|
UTF32.append32(commaResult, cp);
|
|
|
|
}
|
|
|
|
return commaResult.toString();
|
|
|
|
}
|
2001-08-31 00:30:17 +00:00
|
|
|
|
2001-08-30 20:50:18 +00:00
|
|
|
static final String[] testSuiteCases = {
|
|
|
|
"\u1E0A",
|
|
|
|
"\u1E0C",
|
|
|
|
"\u1E0A\u0323",
|
|
|
|
"\u1E0C\u0307",
|
|
|
|
"D\u0307\u0323",
|
|
|
|
"D\u0323\u0307",
|
|
|
|
"\u1E0A\u031B",
|
|
|
|
"\u1E0C\u031B",
|
|
|
|
"\u1E0A\u031B\u0323",
|
|
|
|
"\u1E0C\u031B\u0307",
|
|
|
|
"D\u031B\u0307\u0323",
|
|
|
|
"D\u031B\u0323\u0307",
|
|
|
|
"\u00C8",
|
|
|
|
"\u0112",
|
|
|
|
"E\u0300",
|
|
|
|
"E\u0304",
|
|
|
|
"\u1E14",
|
|
|
|
"\u0112\u0300",
|
|
|
|
"\u1E14\u0304",
|
|
|
|
"E\u0304\u0300",
|
|
|
|
"E\u0300\u0304",
|
|
|
|
};
|
2001-12-13 23:36:29 +00:00
|
|
|
//*/
|
2001-08-30 20:50:18 +00:00
|
|
|
}
|