/** ******************************************************************************* * Copyright (C) 1996-2001, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ * $Date: 2002/09/25 06:40:13 $ * $Revision: 1.10 $ * ******************************************************************************* */ package com.ibm.text.UCA; import java.util.*; import java.io.*; import com.ibm.text.UCD.*; import com.ibm.text.utility.*; import com.ibm.icu.text.UTF16; public class GenOverlap implements UCD_Types { static Map completes = new TreeMap(); static Map back = new HashMap(); static Map initials = new HashMap(); static int[] ces = new int[50]; static UCA collator; static UCD ucd; static Normalizer nfd; static Normalizer nfkd; public static void validateUCA(UCA collatorIn) throws Exception { collator = collatorIn; ucd = UCD.make(); nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion()); nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion()); for (int cp = 0x0; cp <= 0x10FFFF; ++cp) { Utility.dot(cp); if (!ucd.isRepresented(cp)) continue; byte decompType = ucd.getDecompositionType(cp); if (decompType >= UCD.COMPATIBILITY) { String decomp = nfkd.normalize(cp); CEList celistDecomp = getCEList(cp, decomp, true, decompType); CEList celistNormal = getCEList(UTF16.valueOf(cp), false); if (!celistNormal.equals(celistDecomp)) { Utility.fixDot(); System.out.println(); System.out.println(ucd.getCodeAndName(cp)); System.out.println(celistNormal); System.out.println(celistDecomp); } } } } public static void test(UCA collatorIn) throws Exception { collator = collatorIn; CEList.main(null); System.out.println("# Overlap"); System.out.println("# Generated " + new Date()); ucd = UCD.make(); nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion()); nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion()); UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd); // store data for faster lookup System.out.println("# Gathering Data"); int counter = 0; int[] lenArray = new int[1]; while (true) { Utility.dot(counter++); String s = cc.next(ces, lenArray); if (s == null) break; int len = lenArray[0]; CEList currCEList = new CEList(ces, 0, len); addString(s, currCEList); } /* for (int cp = 0x10000; cp <= 0x10FFFF; ++cp) { if (!ucd.isRepresented(cp)) continue; byte decompType = ucd.getDecompositionType(cp); if (decompType >= UCD.COMPATIBILITY) { String decomp = nfkd.normalize(cp); CEList celist = getCEList(cp, decomp, true, decompType); addString(decomp, celist); System.out.println("Adding: " + ucd.getCodeAndName(cp) + "\t" + celist); } } */ Utility.fixDot(); System.out.println("# Completes Count: " + completes.size()); System.out.println("# Initials Count: " + initials.size()); System.out.println("# Writing Overlaps"); // simpleList(); fullCheck(); } public static void addString(String s, CEList currCEList) { back.put(s, currCEList); completes.put(currCEList, s); for (int i = 1; i < currCEList.length(); ++i) { CEList start = currCEList.start(i); Set bag = (Set) initials.get(start); if (bag == null) { bag = new TreeSet(); initials.put(start, bag); } bag.add(s); } } static void simpleList() { Iterator it = completes.keySet().iterator(); int counter = 0; int foundCount = 0; while (it.hasNext()) { Utility.dot(counter++); // see if the ces for the current element are the start of something else CEList key = (CEList) it.next(); String val = (String) completes.get(key); Set probe = (Set) initials.get(key); if (probe != null) { Utility.fixDot(); foundCount++; System.out.println("Possible Overlap: "); System.out.println(" " + ucd.getCodeAndName(val)); System.out.println("\t" + key); Iterator it2 = probe.iterator(); int count2 = 0; while (it2.hasNext()) { String match = (String) it2.next(); CEList ceList = (CEList) back.get(match); System.out.println((count2++) + ". " + ucd.getCodeAndName(match)); System.out.println("\t" + ceList); } } } System.out.println("# Found Count: " + foundCount); } static boolean PROGRESS = false; static void fullCheck() throws IOException { PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS); PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS); Iterator it = completes.keySet().iterator(); int counter = 0; int foundCount = 0; String [] goalChars = new String[1]; String [] matchChars = new String[1]; // CEList show = getCEList("\u2034"); Utility.writeHtmlHeader(log, "Overlaps"); log.print("
WARNING:" + getCEList(matchChars[0]) + " | |||||
" + val + " | "); log.println("" + goalChars[0] + " | "); log.println("" + matchChars[0] + " | "); log.println("" + ucd.getCodeAndName(goalChars[0]) + " | "); log.println("" + ucd.getCodeAndName(matchChars[0]) + " | "); log.println("" + getCEList(goalChars[0]) + " |