/** ******************************************************************************* * Copyright (C) 1996-2001, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/testParser.java,v $ * $Date: 2003/04/25 01:39:15 $ * $Revision: 1.3 $ * ******************************************************************************* */ package com.ibm.text.utility; /** Simple Test program for XMLParse */ import java.io.*; import java.util.*; public class testParser implements XMLParseTypes { public static final String BASE_DIR = "C:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\UNIDATA 3.0.1\\"; public static final boolean VERBOSE = false; private static final String testFile = BASE_DIR + "UCD-Main.xml"; // "test.xml"; // BASE_DIR + "UCD-Main.xml"; public static void main (String[] args) throws Exception { //test1(); //test2(); test3(); } public static void test1() throws Exception { XMLParse xml = new XMLParse(testFile, new char[1000]); for (int i = 0; i < 100000; ++i) { byte kind = xml.next(); if (kind == DONE) break; String value = xml.getValue(); int quoteFlags = QUOTE_IEBUG | QUOTE_NON_ASCII | (kind != TEXT ? QUOTE_TABCRLF : 0); String qValue = XMLParse.quote(value, quoteFlags); if (VERBOSE) System.out.println(kindNames[kind] + ", \"" + value + "\", \"" + qValue + "\""); else { switch (kind) { case ELEMENT_TAG: System.out.print('<' + qValue); break; case ELEMENT_TAG_SLASH: System.out.print(""); break; case END_ELEMENT_COMMENT: System.out.print(">"); break; case END_ELEMENT_SLASH: System.out.print("/>"); break; case END_ELEMENT_QUESTION: System.out.print("?>"); break; case ATTRIBUTE_TAG: System.out.print(" " + qValue + "="); break; case ATTRIBUTE_VALUE: System.out.print("\"" + qValue + "\""); break; case TEXT: System.out.print(qValue); break; default: throw new Exception("Unknown KIND"); } } } } static final int NORMAL_QUOTE = QUOTE_NON_ASCII | QUOTE_IEBUG | QUOTE_TABCRLF; static void test2() throws Exception { PrintWriter log = Utility.openPrintWriter("UCD-Extract.html", Utility.UTF8_WINDOWS); //int fieldCount = 4; //int width = 100/fieldCount; //int first = width + 100 - width*fieldCount; try { log.println(""); log.println(""); log.println("Extract from UCD"); log.println(""); String tableHead = "" + "" + "" + "" + "" + ""; log.println(tableHead); XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]); boolean recordingChar = false; int topByte = 0; int printByte = 0; Map data = new TreeMap(); String lastTag = ""; for (int line = 0; ; ++line) { byte kind = xml.next(); if (kind == DONE) break; String value = xml.getValue(); switch (kind) { case ELEMENT_TAG: recordingChar = value.equals("e"); break; case ATTRIBUTE_TAG: if (!recordingChar) break; lastTag = value; break; case ATTRIBUTE_VALUE: if (!recordingChar) break; data.put(lastTag, value); break; case END_ELEMENT: case END_ELEMENT_SLASH: if (!recordingChar) break; recordingChar = false; // get data String ch = (String)data.get("c"); ch = fixHack(ch); String name = (String)data.get("n"); if (name == null) name = ""; String props = (String)data.get("xs"); if (props == null) props = "\u00A0"; String gc = (String)data.get("gc"); if (gc == null) gc = "Lo"; // split tables int code = UTF32.char32At(ch, 0); if ((topByte & ~0x1F) != (code & ~0x1F)) { log.println("
CodeCharGCPropsName

"); log.println(tableHead); topByte = code; if ((printByte & ~0xFF) != (code & ~0xFF)) { System.out.println("Printing table for " + XMLParse.hex(topByte,2)); printByte = code; } } // draw line log.println("" + XMLParse.hex(code,4) + "" + XMLParse.quote(ch,NORMAL_QUOTE) + "" + XMLParse.quote(gc,NORMAL_QUOTE) + "" + XMLParse.quote(props,NORMAL_QUOTE) + "" + XMLParse.quote(name,NORMAL_QUOTE) + ""); // clear storage data.clear(); break; } } log.println(""); } finally { log.close(); } } static void test3() throws Exception { PrintWriter log = new PrintWriter(new BufferedWriter( new OutputStreamWriter( new FileOutputStream(BASE_DIR + "CaseFoldingDraft3.txt"), "UTF8"), 32*1024)); try { collect(log, "Other_Math"); collect (log, "Other_Alphabetic"); collect (log, "Other_Composite"); //int fieldCount = 4; //int width = 100/fieldCount; //int first = width + 100 - width*fieldCount; } finally { log.close(); } } static final void collect(PrintWriter log, String prop) throws Exception { XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]); //boolean recordingChar = false; //int topByte = 0; //int printByte = 0; //Map data = new TreeMap(); String lastTag = ""; String lastChar = ""; String lastName = ""; String lastCat = ""; int startChar = -1; int endChar = -2; String startName = ""; String startCat = ""; for (int line = 0; ; ++line) { if ((line % 10000) == 0) System.err.println("Item " + line); byte kind = xml.next(); if (kind == DONE) break; String value = xml.getValue(); switch (kind) { case ATTRIBUTE_TAG: lastTag = value; break; case ATTRIBUTE_VALUE: if (lastTag.equals("c")) lastChar = value; else if (lastTag.equals("n")) lastName = value; else if (lastTag.equals("gc")) lastCat = value; else if (lastTag.equals("xs") && value.indexOf(prop) >= 0) { lastChar = fixHack(lastChar); int ch = UTF32.char32At(lastChar,0); if (ch == endChar + 1) endChar = ch; else { //FDD0; FDEF; Noncharacter_Code_Point; # XX; 32; if (endChar >= 0) log.println(Utility.hex(startChar, 4) + "; " + (endChar == startChar ? " " : Utility.hex(endChar, 4)) + "; " + prop + "; # " + startCat + "; " + (endChar-startChar+1) + "; " + startName + (endChar == startChar ? "" : "...")); startChar = endChar = ch; startName = lastName; startCat = lastCat; } } break; } } if (endChar >= 0) log.println(Utility.hex(startChar, 4) + "; " + (endChar == startChar ? " " : Utility.hex(endChar, 4)) + "; " + prop + "; # " + startCat + "; " + (endChar-startChar+1) + "; " + startName + (endChar == startChar ? "" : "...")); } static void test4() throws Exception { PrintWriter log = new PrintWriter(new BufferedWriter( new OutputStreamWriter( new FileOutputStream(BASE_DIR + "CaseFoldingDraft3.txt"), "UTF8"), 32*1024)); //int fieldCount = 4; //int width = 100/fieldCount; //int first = width + 100 - width*fieldCount; try { XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]); boolean recordingChar = false; //int topByte = 0; //int printByte = 0; Map data = new TreeMap(); String lastTag = ""; for (int line = 0; ; ++line) { if ((line % 10000) == 0) System.err.println("Item " + line); byte kind = xml.next(); if (kind == DONE) break; String value = xml.getValue(); switch (kind) { case ELEMENT_TAG: recordingChar = value.equals("e"); break; case ATTRIBUTE_TAG: if (!recordingChar) break; lastTag = value; break; case ATTRIBUTE_VALUE: if (!recordingChar) break; data.put(lastTag, value); break; case END_ELEMENT: case END_ELEMENT_SLASH: if (!recordingChar) break; recordingChar = false; // get data String ch = (String)data.get("c"); ch = fixHack(ch); String name = (String)data.get("n"); if (name == null) name = ""; String lc = (String)data.get("lc"); if (lc == null) lc = ch; String fc = (String)data.get("fc"); if (fc == null) fc = (String)data.get("sl"); if (fc == null) fc = lc; if (fc.equals(ch)) continue; if (fc.length() == 1) { log.println(Utility.hex(ch, " ") + "; C; " + Utility.hex(fc, " ") + "; # " + name); } else { log.println(Utility.hex(ch, " ") + "; F; " + Utility.hex(fc, " ") + "; # " + name); if (!lc.equals(ch)) { log.println(Utility.hex(ch, " ") + "; S; " + Utility.hex(lc, " ") + "; # " + name); } } // clear storage data.clear(); break; } } } finally { log.close(); } } static final String fixHack(String s) { StringBuffer result = new StringBuffer(); char last = '\u0000'; int position = -1; for (int i = 0; i < s.length(); ++i) { char c = s.charAt(i); if (position > 0) { if (c == ';') { int x = Integer.parseInt(s.substring(position,i),16); result.append(UTF32.valueOf32(x)); position = -1; } } else { if (last == '#' && c == 'x') { result.setLength(result.length()-1); // remove '#' position = i+1; } else { result.append(c); } } last = c; } if (result != null) return result.toString(); return s; } }