scuffed-code/tools/unicodetools/com/ibm/text/UCD/Compare14652.java

/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.3 $
*
*******************************************************************************
*/

package com.ibm.text.UCD;

import java.util.*;
import java.io.*;

import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;

// quick and dirty function for grabbing contents of ISO 14652 file

public class Compare14652 implements UCD_Types {
    
    static final boolean oldVersion = false;
    
    public static UnicodeSet getSet(int prop, byte propValue) {
        return UnifiedBinaryProperty.make(prop | propValue).getSet();
    }
    
    static UnicodeSet
        titleSet = getSet(CATEGORY, Lt),
        combiningSet = getSet(CATEGORY, Mc)
            .addAll(getSet(CATEGORY, Me))
            .addAll(getSet(CATEGORY, Mn)),
        zSet = getSet(CATEGORY, Zs)
            .addAll(getSet(CATEGORY, Zl))
            .addAll(getSet(CATEGORY, Zp)),
        pSet = getSet(CATEGORY, Pd)
            .addAll(getSet(CATEGORY, Ps))
            .addAll(getSet(CATEGORY, Pe))
            .addAll(getSet(CATEGORY, Pc))
            .addAll(getSet(CATEGORY, Po))
            .addAll(getSet(CATEGORY, Pi))
            .addAll(getSet(CATEGORY, Pf)),
        sSet = getSet(CATEGORY, Sm)
            .addAll(getSet(CATEGORY, Sc))
            .addAll(getSet(CATEGORY, Sk))
            .addAll(getSet(CATEGORY, So)),
        noSet = getSet(CATEGORY, No),
        csSet = getSet(CATEGORY, Cs),
        cfSet = getSet(CATEGORY, Cf),
        cnSet = getSet(CATEGORY, Cn),
        circled = getSet(DECOMPOSITION_TYPE, COMPAT_CIRCLE),
        whitespaceSet = getSet(BINARY_PROPERTIES, White_space),
        alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
        lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet).removeAll(circled),
        upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet).removeAll(circled),
        digitSet = getSet(CATEGORY, Nd),
        xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
        spaceSet = whitespaceSet.size() == 0 ? zSet : whitespaceSet,
        controlSet = getSet(CATEGORY, Cc),
        punctSet = new UnicodeSet(pSet).addAll(sSet),
        graphSet = new UnicodeSet(0,0x10ffff)
            .removeAll(controlSet)
            //.removeAll(getSet(CATEGORY, Cf))
            .removeAll(csSet)
            .removeAll(cnSet)
            .removeAll(zSet),
            // Cc, Cf, Cs, Cn, Z
        blankSet = new UnicodeSet(spaceSet).removeAll(new UnicodeSet("[\\u000A-\\u000D\\u0085]"))
            .removeAll(getSet(CATEGORY, Zl))
            .removeAll(getSet(CATEGORY, Zp));
    

    static class Prop {
        String name;
        UnicodeSet contents = new UnicodeSet();
        String guess = "???";
        UnicodeSet guessContents = new UnicodeSet();
        
        String wsname = whitespaceSet.size() == 0 ? "gc=Z" : "Whitespace";
        
        Prop(String name) {
            this.name = name;
            if (name.equals("alpha")) {
                guess = "Alphabetic + gc=M";
                guessContents = alphaSet;
            } else if (name.equals("lower")) {
                guess = "Lowercase + gc=Lt - dt=circle";
                guessContents = lowerSet;
            } else if (name.equals("upper")) {
                guess = "Uppercase + gc=Lt - dt=circle";
                guessContents = upperSet;
            } else if (name.equals("digit")) {
                guess = "gc=Nd";
                guessContents = digitSet;
            } else if (name.equals("xdigit")) {
                guess = "gc=Nd+a..f (upper/lower,normal/fullwidth)";
                guessContents = xdigitSet;
            } else if (name.equals("space")) {
                guess = wsname;
                guessContents = spaceSet;
                //Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
            } else if (name.equals("cntrl")) {
                guess = "gc=Cc";
                guessContents = controlSet;
            } else if (name.equals("punct")) {
                guess = "gc=P,S";
                guessContents = punctSet;
            } else if (name.equals("graph")) {
                guess = "All - gc=Cc, Cs, Cn, or Z";
                guessContents = graphSet;
            } else if (name.equals("blank")) {
                guess = wsname + " - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
                guessContents = blankSet;
            } else if (name.equals("ISO_14652_class \"combining\"")) {
                guess = "gc=M";
                guessContents = combiningSet;
            }
            
            
/*upper
lower
alpha
digit
outdigit
space
cntrl
punct
graph
xdigit
blank
toupper
tolower
*/
        }
        
        void show(PrintWriter pw) {
            if (name.equals("ISO_14652_LC_CTYPE")) return;
            if (name.equals("ISO_14652_toupper")) return;
            if (name.equals("ISO_14652_tolower")) return;
            if (name.equals("ISO_14652_outdigit")) return;
            if (name.equals("ISO_14652_outdigit")) return;
            if (name.startsWith("ISO_14652_class")) return;
            
            pw.println();
            pw.println("**************************************************");
            pw.println(name);
            pw.println("**************************************************");
            Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd());
            //pw.println(props[i].contents);
        }
    }
    
    static Prop[] props = new Prop[100];
    static int propCount = 0;
    
    public static void main(String[] args) throws IOException {
        
        String version = Default.ucd().getVersion();
        PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);
        try {
            log.write('\uFEFF');
            log.print("Version: " + version);
            
            if (false) {
                UnicodeSet ID = getSet(DERIVED, ID_Start).addAll(getSet(DERIVED, ID_Continue_NO_Cf));
                UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));
                UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));
                
                Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd());
                Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd());
            }
            
            BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
            while (true) {
                String line = br.readLine();
                if (line == null) break;
                line = line.trim();
                if (line.length() == 0) continue;
                if (line.charAt(line.length() - 1) == '/') {
                    line = line.substring(0, line.length() - 1);
                }
                line = line.trim();
                if (line.length() == 0) continue;
                
                char ch = line.charAt(0);
                if (ch == '%') continue;
                if (ch == '(') continue;
                if (ch == '<') {
                    addItems(line, props[propCount-1].contents);
                } else {
                    // new property
                    System.out.println(line);
                    if (line.equals("width")) break;
                    props[propCount] = new Prop(line);
                    props[propCount].name = "ISO_14652_" + line;
                    props[propCount].contents = new UnicodeSet();
                    propCount++;
                }
            }
        
            for (int i = 0; i < propCount; ++i) props[i].show(log);
            
            log.println();
            log.println("**************************************************");
            log.println("Checking POSIX requirements for inclusion and disjointness.");
            log.println("**************************************************");
            log.println();
/*
alpha, digit, punct, cntrl are all disjoint
space, cntrl, blank are pairwise disjoint with any of alpha, digit, xdigit
alpha includes upper, lower
graph includes alpha, digit, punct
print includes graph
xdigit includes digit
*/
            Prop
                alpha = getProp("ISO_14652_alpha"),
                upper = getProp("ISO_14652_upper"),
                lower = getProp("ISO_14652_lower"),
                graph = getProp("ISO_14652_graph"),
                //print = getProp("ISO_14652_print"),
                punct = getProp("ISO_14652_punct"),
                digit = getProp("ISO_14652_digit"),
                xdigit = getProp("ISO_14652_xdigit"),
                space = getProp("ISO_14652_space"),
                blank = getProp("ISO_14652_blank"),
                cntrl = getProp("ISO_14652_cntrl");
                
            checkDisjoint(log, new Prop[] {alpha, digit, punct, cntrl});
            
            Prop [] l1 = new Prop[] {space, cntrl, blank};
            Prop [] l2 = new Prop[] {alpha, digit, xdigit};
            for (int i = 0; i < l1.length; ++i) {
                for (int j = i + 1; j < l2.length; ++j) {
                    checkDisjoint(log, l1[i], l2[j]);
                }
            }
            checkIncludes(log, alpha, upper);
            checkIncludes(log, alpha, lower);
            checkIncludes(log, graph, alpha);
            checkIncludes(log, graph, digit);
            checkIncludes(log, graph, punct);
            //checkIncludes(log, print, graph);
            checkIncludes(log, xdigit, digit);
            
            
            // possibly alpha, digit, punct, cntrl, space cover the !(Cn,Cs)
            
            UnicodeSet trRemainder = new UnicodeSet(cnSet)
                .complement()
                .removeAll(csSet)
                .removeAll(digit.contents)
                .removeAll(punct.contents)
                .removeAll(alpha.contents)
                .removeAll(cntrl.contents)
                .removeAll(space.contents);
            Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd());
                
            UnicodeSet propRemainder = new UnicodeSet(cnSet)
                .complement()
                .removeAll(csSet)
                //.removeAll(noSet)
                //.removeAll(cfSet)
                .removeAll(digit.guessContents)
                .removeAll(punct.guessContents)
                .removeAll(alpha.guessContents)
                .removeAll(cntrl.guessContents)
                .removeAll(space.guessContents);
            Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd());
                
            /*
            checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});
            UnicodeSet remainder = cnSet.complement();
            UnicodeSet guessRemainder = new UnicodeSet(remainder);
            for (int i = 0; i < list.length; ++i) {
                for (int j = i + 1; j < list.length; ++j) {
                    compare(log, list[i].name, list[i].contents, list[j].name, list[j].contents);
                    compare(log, list[i].guess, list[i].guessContents, list[j].guess, list[j].guessContents);
                }
                remainder.removeAll(list[i].contents);
                guessRemainder.removeAll(list[i].guessContents);
            }
            if (remainder.size() != 0) {
                log.println();
                log.println("Incomplete (TR): " + remainder);
            }
            if (guessRemainder.size() != 0) {
                log.println();
                log.println("Incomplete (Prop): " + guessRemainder);
            }
            */
            
        } finally {
            log.close();
        }
    }
    
    static void checkDisjoint(PrintWriter log, Prop[] list) {
        for (int i = 0; i < list.length; ++i) {
            for (int j = i + 1; j < list.length; ++j) {
                checkDisjoint(log, list[i], list[j]);
            }
        }
    }
    
    static void checkDisjoint(PrintWriter log, Prop prop1, Prop prop2) {
        checkDisjoint(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
        checkDisjoint(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
    }
    
    static void checkDisjoint(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
        if (set.containsSome(set2)) {
            log.println();
            log.println("Fails test: " + name + " disjoint-with " + name2);
            UnicodeSet diff = new UnicodeSet(set).retainAll(set2);
            Utility.showSetNames(log, "", diff, false, false, Default.ucd());
        }
    }
    
    static void checkIncludes(PrintWriter log, Prop prop1, Prop prop2) {
        checkIncludes(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
        checkIncludes(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
    }
    
    static void checkIncludes(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
        if (!set.containsAll(set2)) {
            log.println();
            log.println("Fails test:" + name + " includes " + name2);
            UnicodeSet diff = new UnicodeSet(set2).removeAll(set);
            Utility.showSetNames(log, "", diff, false, false, Default.ucd());
        }
    }

    static String[] pieces = new String[100];
    
    // example: <U1F48>..<U1F4D>;<U1F59>;<U1F5B>;<U1F5D>;<U1F5F>;<U1F68>..<U1F6F>;/
    static void addItems(String line, UnicodeSet contents) {
        int len = Utility.split(line, ';', pieces);
        for (int i = 0; i < len; ++i) {
            String piece = pieces[i].trim();
            if (piece.length() == 0) continue;
            if (piece.equals("<0>")) continue;
            int start, end;
            int rangePoint = piece.indexOf("..");
            if (rangePoint >= 0) {
                start = parse(piece.substring(0,rangePoint));
                end = parse(piece.substring(rangePoint+2));
            } else {
                start = end = parse(piece);
            }
            contents.add(start, end);
        }
    }
    
    static int parse(String piece) {
        if (!piece.startsWith("<U") || !piece.endsWith(">")) {
            throw new IllegalArgumentException("Bogus code point: " + piece);
        }
        return Integer.parseInt(piece.substring(2,piece.length()-1), 16);
    }
    
    static Prop getProp(String name) {
        //System.out.println("Searching for: " + name);
        for (int i = 0; i < propCount; ++i) {
            //System.out.println("Checking: " + props[i].name);
            if (props[i].name.equals(name)) {
                return props[i];
            }
        }
        //System.out.println("Missed");
        return null;
    }
    
    // oddities: 
        // extra space after ';' <U0300>..<U036F>; <U20D0>..<U20FF>; <UFE20>..<UFE2F>;/
        // <0>?? <0>;<U0BE7>..<U0BEF>;/
        // <U202C>; <U202D>;<U202E>; <UFEFF> : 0;/
       // % "print" is by default "graph", and the <space> character
       // print is odd, since it includes space but not other spaces.
       // alnum not defined.

}
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`/**`
			`*******************************************************************************`
			`* Copyright (C) 1996-2001, International Business Machines Corporation and *`
			`* others. All Rights Reserved. *`
			`*******************************************************************************`
			`*`
			`* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`* $Date: 2004/02/07 01:01:16 $`
			`* $Revision: 1.3 $`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`*`
			`*******************************************************************************`
			`*/`

			`package com.ibm.text.UCD;`

			`import java.util.*;`
			`import java.io.*;`

			`import com.ibm.text.utility.*;`
			`import com.ibm.icu.text.UTF16;`
			`import com.ibm.icu.text.UnicodeSet;`

			`// quick and dirty function for grabbing contents of ISO 14652 file`

			`public class Compare14652 implements UCD_Types {`

ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`static final boolean oldVersion = false;`

ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`public static UnicodeSet getSet(int prop, byte propValue) {`
			`return UnifiedBinaryProperty.make(prop \| propValue).getSet();`
			`}`

			`static UnicodeSet`
			`titleSet = getSet(CATEGORY, Lt),`
			`combiningSet = getSet(CATEGORY, Mc)`
			`.addAll(getSet(CATEGORY, Me))`
			`.addAll(getSet(CATEGORY, Mn)),`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`zSet = getSet(CATEGORY, Zs)`
			`.addAll(getSet(CATEGORY, Zl))`
			`.addAll(getSet(CATEGORY, Zp)),`
			`pSet = getSet(CATEGORY, Pd)`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`.addAll(getSet(CATEGORY, Ps))`
			`.addAll(getSet(CATEGORY, Pe))`
			`.addAll(getSet(CATEGORY, Pc))`
			`.addAll(getSet(CATEGORY, Po))`
			`.addAll(getSet(CATEGORY, Pi))`
			`.addAll(getSet(CATEGORY, Pf)),`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`sSet = getSet(CATEGORY, Sm)`
			`.addAll(getSet(CATEGORY, Sc))`
			`.addAll(getSet(CATEGORY, Sk))`
			`.addAll(getSet(CATEGORY, So)),`
			`noSet = getSet(CATEGORY, No),`
			`csSet = getSet(CATEGORY, Cs),`
			`cfSet = getSet(CATEGORY, Cf),`
			`cnSet = getSet(CATEGORY, Cn),`
			`circled = getSet(DECOMPOSITION_TYPE, COMPAT_CIRCLE),`
			`whitespaceSet = getSet(BINARY_PROPERTIES, White_space),`
			`alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),`
			`lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet).removeAll(circled),`
			`upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet).removeAll(circled),`
			`digitSet = getSet(CATEGORY, Nd),`
			`xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),`
			`spaceSet = whitespaceSet.size() == 0 ? zSet : whitespaceSet,`
			`controlSet = getSet(CATEGORY, Cc),`
			`punctSet = new UnicodeSet(pSet).addAll(sSet),`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`graphSet = new UnicodeSet(0,0x10ffff)`
			`.removeAll(controlSet)`
			`//.removeAll(getSet(CATEGORY, Cf))`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`.removeAll(csSet)`
			`.removeAll(cnSet)`
			`.removeAll(zSet),`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`// Cc, Cf, Cs, Cn, Z`
			`blankSet = new UnicodeSet(spaceSet).removeAll(new UnicodeSet("[\\u000A-\\u000D\\u0085]"))`
			`.removeAll(getSet(CATEGORY, Zl))`
			`.removeAll(getSet(CATEGORY, Zp));`


			`static class Prop {`
			`String name;`
			`UnicodeSet contents = new UnicodeSet();`
			`String guess = "???";`
			`UnicodeSet guessContents = new UnicodeSet();`

ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`String wsname = whitespaceSet.size() == 0 ? "gc=Z" : "Whitespace";`

ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`Prop(String name) {`
			`this.name = name;`
			`if (name.equals("alpha")) {`
			`guess = "Alphabetic + gc=M";`
			`guessContents = alphaSet;`
			`} else if (name.equals("lower")) {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`guess = "Lowercase + gc=Lt - dt=circle";`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`guessContents = lowerSet;`
			`} else if (name.equals("upper")) {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`guess = "Uppercase + gc=Lt - dt=circle";`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`guessContents = upperSet;`
			`} else if (name.equals("digit")) {`
			`guess = "gc=Nd";`
			`guessContents = digitSet;`
			`} else if (name.equals("xdigit")) {`
			`guess = "gc=Nd+a..f (upper/lower,normal/fullwidth)";`
			`guessContents = xdigitSet;`
			`} else if (name.equals("space")) {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`guess = wsname;`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`guessContents = spaceSet;`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`//Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`} else if (name.equals("cntrl")) {`
			`guess = "gc=Cc";`
			`guessContents = controlSet;`
			`} else if (name.equals("punct")) {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`guess = "gc=P,S";`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`guessContents = punctSet;`
			`} else if (name.equals("graph")) {`
			`guess = "All - gc=Cc, Cs, Cn, or Z";`
			`guessContents = graphSet;`
			`} else if (name.equals("blank")) {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`guess = wsname + " - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`guessContents = blankSet;`
			`} else if (name.equals("ISO_14652_class \"combining\"")) {`
			`guess = "gc=M";`
			`guessContents = combiningSet;`
			`}`


			`/*upper`
			`lower`
			`alpha`
			`digit`
			`outdigit`
			`space`
			`cntrl`
			`punct`
			`graph`
			`xdigit`
			`blank`
			`toupper`
			`tolower`
			`*/`
			`}`

			`void show(PrintWriter pw) {`
			`if (name.equals("ISO_14652_LC_CTYPE")) return;`
			`if (name.equals("ISO_14652_toupper")) return;`
			`if (name.equals("ISO_14652_tolower")) return;`
			`if (name.equals("ISO_14652_outdigit")) return;`
			`if (name.equals("ISO_14652_outdigit")) return;`
			`if (name.startsWith("ISO_14652_class")) return;`

			`pw.println();`
			`pw.println("**************************************************");`
			`pw.println(name);`
			`pw.println("**************************************************");`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd());`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`//pw.println(props[i].contents);`
			`}`
			`}`

			`static Prop[] props = new Prop[100];`
			`static int propCount = 0;`

			`public static void main(String[] args) throws IOException {`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`String version = Default.ucd().getVersion();`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);`
			`try {`
			`log.write('\uFEFF');`
			`log.print("Version: " + version);`

			`if (false) {`
			`UnicodeSet ID = getSet(DERIVED, ID_Start).addAll(getSet(DERIVED, ID_Continue_NO_Cf));`
			`UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));`
			`UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));`

misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd());`
			`Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd());`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`}`

ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);`
			`while (true) {`
			`String line = br.readLine();`
			`if (line == null) break;`
			`line = line.trim();`
			`if (line.length() == 0) continue;`
			`if (line.charAt(line.length() - 1) == '/') {`
			`line = line.substring(0, line.length() - 1);`
			`}`
			`line = line.trim();`
			`if (line.length() == 0) continue;`

			`char ch = line.charAt(0);`
			`if (ch == '%') continue;`
			`if (ch == '(') continue;`
			`if (ch == '<') {`
			`addItems(line, props[propCount-1].contents);`
			`} else {`
			`// new property`
			`System.out.println(line);`
			`if (line.equals("width")) break;`
			`props[propCount] = new Prop(line);`
			`props[propCount].name = "ISO_14652_" + line;`
			`props[propCount].contents = new UnicodeSet();`
			`propCount++;`
			`}`
ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`}`

			`for (int i = 0; i < propCount; ++i) props[i].show(log);`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00
			`log.println();`
			`log.println("**************************************************");`
			`log.println("Checking POSIX requirements for inclusion and disjointness.");`
			`log.println("**************************************************");`
			`log.println();`
			`/*`
			`alpha, digit, punct, cntrl are all disjoint`
			`space, cntrl, blank are pairwise disjoint with any of alpha, digit, xdigit`
			`alpha includes upper, lower`
			`graph includes alpha, digit, punct`
			`print includes graph`
			`xdigit includes digit`
			`*/`
			`Prop`
			`alpha = getProp("ISO_14652_alpha"),`
			`upper = getProp("ISO_14652_upper"),`
			`lower = getProp("ISO_14652_lower"),`
			`graph = getProp("ISO_14652_graph"),`
			`//print = getProp("ISO_14652_print"),`
			`punct = getProp("ISO_14652_punct"),`
			`digit = getProp("ISO_14652_digit"),`
			`xdigit = getProp("ISO_14652_xdigit"),`
			`space = getProp("ISO_14652_space"),`
			`blank = getProp("ISO_14652_blank"),`
			`cntrl = getProp("ISO_14652_cntrl");`

			`checkDisjoint(log, new Prop[] {alpha, digit, punct, cntrl});`

			`Prop [] l1 = new Prop[] {space, cntrl, blank};`
			`Prop [] l2 = new Prop[] {alpha, digit, xdigit};`
			`for (int i = 0; i < l1.length; ++i) {`
			`for (int j = i + 1; j < l2.length; ++j) {`
			`checkDisjoint(log, l1[i], l2[j]);`
			`}`
			`}`
			`checkIncludes(log, alpha, upper);`
			`checkIncludes(log, alpha, lower);`
			`checkIncludes(log, graph, alpha);`
			`checkIncludes(log, graph, digit);`
			`checkIncludes(log, graph, punct);`
			`//checkIncludes(log, print, graph);`
			`checkIncludes(log, xdigit, digit);`


			`// possibly alpha, digit, punct, cntrl, space cover the !(Cn,Cs)`

			`UnicodeSet trRemainder = new UnicodeSet(cnSet)`
			`.complement()`
			`.removeAll(csSet)`
			`.removeAll(digit.contents)`
			`.removeAll(punct.contents)`
			`.removeAll(alpha.contents)`
			`.removeAll(cntrl.contents)`
			`.removeAll(space.contents);`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd());`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00
			`UnicodeSet propRemainder = new UnicodeSet(cnSet)`
			`.complement()`
			`.removeAll(csSet)`
			`//.removeAll(noSet)`
			`//.removeAll(cfSet)`
			`.removeAll(digit.guessContents)`
			`.removeAll(punct.guessContents)`
			`.removeAll(alpha.guessContents)`
			`.removeAll(cntrl.guessContents)`
			`.removeAll(space.guessContents);`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd());`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00
			`/*`
			`checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});`
			`UnicodeSet remainder = cnSet.complement();`
			`UnicodeSet guessRemainder = new UnicodeSet(remainder);`
			`for (int i = 0; i < list.length; ++i) {`
			`for (int j = i + 1; j < list.length; ++j) {`
			`compare(log, list[i].name, list[i].contents, list[j].name, list[j].contents);`
			`compare(log, list[i].guess, list[i].guessContents, list[j].guess, list[j].guessContents);`
			`}`
			`remainder.removeAll(list[i].contents);`
			`guessRemainder.removeAll(list[i].guessContents);`
			`}`
			`if (remainder.size() != 0) {`
			`log.println();`
			`log.println("Incomplete (TR): " + remainder);`
			`}`
			`if (guessRemainder.size() != 0) {`
			`log.println();`
			`log.println("Incomplete (Prop): " + guessRemainder);`
			`}`
			`*/`

ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`} finally {`
			`log.close();`
			`}`
			`}`

ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`static void checkDisjoint(PrintWriter log, Prop[] list) {`
			`for (int i = 0; i < list.length; ++i) {`
			`for (int j = i + 1; j < list.length; ++j) {`
			`checkDisjoint(log, list[i], list[j]);`
			`}`
			`}`
			`}`

			`static void checkDisjoint(PrintWriter log, Prop prop1, Prop prop2) {`
			`checkDisjoint(log, prop1.name, prop1.contents, prop2.name, prop2.contents);`
			`checkDisjoint(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);`
			`}`

			`static void checkDisjoint(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {`
			`if (set.containsSome(set2)) {`
			`log.println();`
			`log.println("Fails test: " + name + " disjoint-with " + name2);`
			`UnicodeSet diff = new UnicodeSet(set).retainAll(set2);`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetNames(log, "", diff, false, false, Default.ucd());`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`}`
			`}`

			`static void checkIncludes(PrintWriter log, Prop prop1, Prop prop2) {`
			`checkIncludes(log, prop1.name, prop1.contents, prop2.name, prop2.contents);`
			`checkIncludes(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);`
			`}`

			`static void checkIncludes(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {`
			`if (!set.containsAll(set2)) {`
			`log.println();`
			`log.println("Fails test:" + name + " includes " + name2);`
			`UnicodeSet diff = new UnicodeSet(set2).removeAll(set);`
misc fixes to UnicodeProperty, etc. X-SVN-Rev: 14468 2004-02-07 01:01:17 +00:00			`Utility.showSetNames(log, "", diff, false, false, Default.ucd());`
ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`}`
			`}`

ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`static String[] pieces = new String[100];`

			`// example: <U1F48>..<U1F4D>;<U1F59>;<U1F5B>;<U1F5D>;<U1F5F>;<U1F68>..<U1F6F>;/`
			`static void addItems(String line, UnicodeSet contents) {`
			`int len = Utility.split(line, ';', pieces);`
			`for (int i = 0; i < len; ++i) {`
			`String piece = pieces[i].trim();`
			`if (piece.length() == 0) continue;`
			`if (piece.equals("<0>")) continue;`
			`int start, end;`
			`int rangePoint = piece.indexOf("..");`
			`if (rangePoint >= 0) {`
			`start = parse(piece.substring(0,rangePoint));`
			`end = parse(piece.substring(rangePoint+2));`
			`} else {`
			`start = end = parse(piece);`
			`}`
			`contents.add(start, end);`
			`}`
			`}`

			`static int parse(String piece) {`
			`if (!piece.startsWith("<U") \|\| !piece.endsWith(">")) {`
			`throw new IllegalArgumentException("Bogus code point: " + piece);`
			`}`
			`return Integer.parseInt(piece.substring(2,piece.length()-1), 16);`
			`}`

ICU-0 fixes to work with Eclipse X-SVN-Rev: 11702 2003-04-25 01:39:15 +00:00			`static Prop getProp(String name) {`
			`//System.out.println("Searching for: " + name);`
			`for (int i = 0; i < propCount; ++i) {`
			`//System.out.println("Checking: " + props[i].name);`
			`if (props[i].name.equals(name)) {`
			`return props[i];`
			`}`
			`}`
			`//System.out.println("Missed");`
			`return null;`
			`}`

ICU-0 for comparing POSIX properties X-SVN-Rev: 11639 2003-04-23 19:01:21 +00:00			`// oddities:`
			`// extra space after ';' <U0300>..<U036F>; <U20D0>..<U20FF>; <UFE20>..<UFE2F>;/`
			`// <0>?? <0>;<U0BE7>..<U0BEF>;/`
			`// <U202C>; <U202D>;<U202E>; <UFEFF> : 0;/`
			`// % "print" is by default "graph", and the <space> character`
			`// print is odd, since it includes space but not other spaces.`
			`// alnum not defined.`

			`}`