ICU-1749 preliminary work on adding strings. (also fixed import in TrieIterator so it works with VisualCafe.)

X-SVN-Rev: 7891
2002-03-06 19:28:32 +00:00 · 2002-03-06 19:28:32 +00:00 · dbaade006e
commit dbaade006e
parent badaf1c4ac
3 changed files with 535 additions and 53 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $ 
- * $Date: 2002/02/25 22:43:57 $ 
- * $Revision: 1.22 $
+ * $Date: 2002/03/06 19:28:32 $ 
+ * $Revision: 1.23 $
 *
 *****************************************************************************************
 */
@ -445,9 +445,192 @@ public class UnicodeSetTest extends TestFmwk {
            logln("bitsToSet(setToBits(c)): " + c);
        } else {
            errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
+        } 
+    }
+    
+    public void TestChaining() {
+        Object[][] testList = {
+            {I_EQUALS, UnicodeSet.fromEach("abc"),      new UnicodeSet("[a-c]")},
+            {I_EQUALS, UnicodeSet.fromMultiple("abc"),  new UnicodeSet("[{abc}]")},
+            {I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'), 
+                new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
+        };
+        
+        for (int i = 0; i < testList.length; ++i) {
+            expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
+        }        
+    } 
+    
+    static final Integer 
+       I_ANY = new Integer(UnicodeSet.ANY),
+       I_CONTAINS = new Integer(UnicodeSet.CONTAINS),
+       I_DISJOINT = new Integer(UnicodeSet.DISJOINT),
+       I_NO_B = new Integer(UnicodeSet.NO_B),
+       I_ISCONTAINED = new Integer(UnicodeSet.ISCONTAINED),
+       I_EQUALS = new Integer(UnicodeSet.EQUALS),
+       I_NO_A = new Integer(UnicodeSet.NO_A),
+       I_NONE = new Integer(UnicodeSet.NONE);
+    
+    
+    public void TestSetRelation() {
+
+        String[] choices = {"a", "b", "c"};
+        int limit = 1 << choices.length;
+        
+        SortedSet iset = new TreeSet();
+        SortedSet jset = new TreeSet();
+        
+        for (int i = 0; i < limit; ++i) {
+            pick(i, choices, iset);
+            for (int j = 0; j < limit; ++j) {
+                pick(j, choices, jset);
+                checkSetRelation(iset, jset, "(" + i + ")");
+            }
        }
    }
    
+    public void TestSetSpeed() {
+        TestSetSpeed2(100);
+        TestSetSpeed2(1000);
+        TestSetSpeed2(10000);
+    }
+    
+    public void TestSetSpeed2(int size) {
+        
+        SortedSet iset = new TreeSet();
+        SortedSet jset = new TreeSet();
+        
+        for (int i = 0; i < size*2; i += 2) { // only even values
+            iset.add(new Integer(i));
+            jset.add(new Integer(i));
+        }
+        
+        int iterations = 1000000 / size;
+        
+        logln("Timing comparison of Java vs Utility");
+        logln("For about " + size + " objects that are almost all the same.");
+        
+        CheckSpeed(iset, jset, "when a = b", iterations);
+        
+        iset.add(new Integer(size + 1));    // add odd value in middle
+        
+        CheckSpeed(iset, jset, "when a contains b", iterations);        
+        CheckSpeed(jset, iset, "when b contains a", iterations);
+        
+        jset.add(new Integer(size - 1));    // add different odd value in middle
+        
+        CheckSpeed(jset, iset, "when a, b are disjoint", iterations);        
+    }
+    
+    void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
+        CheckSpeed2(iset, jset, message, iterations);
+        CheckSpeed3(iset, jset, message, iterations);
+    }
+    
+    void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
+        boolean x;
+        boolean y;
+        
+        // make sure code is loaded:
+        x = iset.containsAll(jset);
+        y = UnicodeSet.hasRelation(iset, UnicodeSet.CONTAINS, jset);
+        if (x != y) errln("FAIL contains comparison");
+        
+        double start = System.currentTimeMillis();
+        for (int i = 0; i < iterations; ++i) {
+            x |= iset.containsAll(jset);
+        }
+        double middle = System.currentTimeMillis();
+        for (int i = 0; i < iterations; ++i) {
+            y |= UnicodeSet.hasRelation(iset, UnicodeSet.CONTAINS, jset);
+        }
+        double end = System.currentTimeMillis();
+        
+        double jtime = (middle - start)/iterations;
+        double utime = (end - middle)/iterations;
+        
+        java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
+        logln("Test contains: " + message + ": Java: " + jtime
+            + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
+    }
+    
+    void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
+        boolean x;
+        boolean y;
+        
+        // make sure code is loaded:
+        x = iset.equals(jset);
+        y = UnicodeSet.hasRelation(iset, UnicodeSet.EQUALS, jset);
+        if (x != y) errln("FAIL equality comparison");
+
+        
+        double start = System.currentTimeMillis();
+        for (int i = 0; i < iterations; ++i) {
+            x |= iset.equals(jset);
+        }
+        double middle = System.currentTimeMillis();
+        for (int i = 0; i < iterations; ++i) {
+            y |= UnicodeSet.hasRelation(iset, UnicodeSet.EQUALS, jset);
+        }
+        double end = System.currentTimeMillis();
+        
+        double jtime = (middle - start)/iterations;
+        double utime = (end - middle)/iterations;
+        
+        java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
+        logln("Test equals:   " + message + ": Java: " + jtime
+            + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
+    }
+    
+    void pick(int bits, Object[] examples, SortedSet output) {
+        output.clear();
+        for (int k = 0; k < 32; ++k) {
+            if (((1<<k) & bits) != 0) output.add(examples[k]);
+        }
+    }
+    
+    public static final String[] RELATION_NAME = {
+        "both-are-null",
+        "a-is-null", 
+        "equals", 
+        "is-contained-in",
+        "b-is-null",
+        "is-disjoint_with",
+        "contains", 
+        "any", };
+        
+    boolean dumbHasRelation(Collection A, int filter, Collection B) {
+        Collection a_b = new TreeSet(A);
+        a_b.removeAll(B);
+        if (a_b.size() > 0 && (filter & UnicodeSet.A_NOT_B) == 0) return false; 
+        
+        Collection b_a = new TreeSet(B);
+        b_a.removeAll(A);
+        if (b_a.size() > 0 && (filter & UnicodeSet.B_NOT_A) == 0) return false; 
+
+        Collection ab = new TreeSet(A);
+        ab.retainAll(B);
+        if (ab.size() > 0 && (filter & UnicodeSet.A_AND_B) == 0) return false; 
+        
+        return true;
+    }    
+    
+    void checkSetRelation(SortedSet a, SortedSet b, String message) {
+        for (int i = 0; i < 8; ++i) {
+            
+            boolean hasRelation = UnicodeSet.hasRelation(a, i, b);
+            boolean dumbHasRelation = dumbHasRelation(a, i, b);
+            
+            logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
+            
+            if (hasRelation != dumbHasRelation) {
+                errln("FAIL: " + 
+                    message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
+            }
+        }
+        logln("");
+    }
+    
   /**
    * Test the [:Latin:] syntax.
    */
@ -668,7 +851,63 @@ public class UnicodeSetTest extends TestFmwk {
        }
        return pairs.toString();
    }
-
+    
+    /**
+     * Test function. Make sure that the sets have the right relation
+     */
+     
+    void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
+        byte relation = ((Byte) relationObj).byteValue();
+        UnicodeSet set1 = (UnicodeSet) set1Obj;
+        UnicodeSet set2 = (UnicodeSet) set2Obj;
+        
+        boolean contains = set1.containsAll(set2);
+        boolean isContained = set2.containsAll(set1);
+        boolean disjoint = set1.containsNone(set2);
+        boolean equals = set1.equals(set2);
+        
+        UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
+        UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
+        UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
+        
+        // test basic properties
+        
+        if (contains != (intersection.size() == set2.size())) {
+            errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+          
+        if (contains != (intersection.equals(set2))) {
+            errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+          
+        if (isContained != (intersection.size() == set1.size())) {
+            errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+          
+        if (isContained != (intersection.equals(set1))) {
+            errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+          
+        if ((contains && isContained) != equals) {
+            errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+          
+        if (disjoint != (intersection.size() == 0)) {
+            errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
+        }
+        
+        // Now see if the expected relation is true
+        int status = (minus12.size() != 0 ? 4 : 0)
+          | (intersection.size() != 0 ? 2 : 0)
+          | (minus21.size() != 0 ? 1 : 0);
+        
+        if (status != relation) {
+            errln("FAIL relation incorrect" + message
+              + ": desired= " + RELATION_NAME[relation]
+              + ": found= " + RELATION_NAME[status]);
+        }
+    }
+    
    /**
     * Expect the given set to contain the characters in charsIn and
     * to not contain those in charsOut.
--- a/icu4j/src/com/ibm/icu/impl/TrieIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/TrieIterator.java
@ -5,8 +5,8 @@
 ******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/TrieIterator.java,v $
-* $Date: 2002/02/16 03:05:37 $
-* $Revision: 1.3 $
+* $Date: 2002/03/06 19:28:32 $
+* $Revision: 1.4 $
 *
 ******************************************************************************
 */
@ -16,6 +16,7 @@ package com.ibm.icu.impl;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.icu.util.RangeValueIterator.*;

 /**
 * <p>Class enabling iteration of the values in a Trie.</p>
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
- * $Date: 2002/02/25 22:43:58 $
- * $Revision: 1.56 $
+ * $Date: 2002/03/06 19:28:32 $
+ * $Revision: 1.57 $
 *
 *****************************************************************************************
 */
@ -15,9 +15,12 @@ package com.ibm.icu.text;
 import java.text.*;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.*;
+import java.util.TreeSet;
+import java.util.SortedSet;
+import java.util.Iterator;

 /**
- * A mutable set of Unicode characters.  Objects of this class
+ * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
 * represent <em>character classes</em> used in regular expressions.
 * A character specifies a subset of Unicode code points.  Legal
 * code points are U+0000 to U+10FFFF, inclusive.
@ -205,7 +208,7 @@ import com.ibm.icu.lang.*;
 * Unicode property
 * </table>
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.56 $ $Date: 2002/02/25 22:43:58 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.57 $ $Date: 2002/03/06 19:28:32 $
 */
 public class UnicodeSet extends UnicodeFilter {

@ -227,6 +230,9 @@ public class UnicodeSet extends UnicodeFilter {
    private int[] list;   // MUST be terminated with HIGH
    private int[] rangeList; // internal buffer
    private int[] buffer; // internal buffer
+    
+    // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
+    private TreeSet strings = new TreeSet();

    /**
     * The pattern representation of this set.  This may not be the
@ -354,9 +360,10 @@ public class UnicodeSet extends UnicodeFilter {
     * @param start first character in the set, inclusive
     * @rparam end last character in the set, inclusive
     */
-    public void set(int start, int end) {
+    public UnicodeSet set(int start, int end) {
        clear();
        complement(start, end);
+        return this;
    }

    /**
@ -364,10 +371,12 @@ public class UnicodeSet extends UnicodeFilter {
     * @param other a <code>UnicodeSet</code> whose value will be
     * copied to this object
     */
-    public void set(UnicodeSet other) {
+    public UnicodeSet set(UnicodeSet other) {
        list = (int[]) other.list.clone();
        len = other.len;
        pat = other.pat;
+        strings = (TreeSet)other.strings.clone();
+        return this;
    }

    /**
@ -378,8 +387,8 @@ public class UnicodeSet extends UnicodeFilter {
     * @exception java.lang.IllegalArgumentException if the pattern
     * contains a syntax error.
     */
-    public final void applyPattern(String pattern) {
-        applyPattern(pattern, true);
+    public final UnicodeSet applyPattern(String pattern) {
+        return applyPattern(pattern, true);
    }

    /**
@ -392,7 +401,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @exception java.lang.IllegalArgumentException if the pattern
     * contains a syntax error.
     */
-    public void applyPattern(String pattern, boolean ignoreWhitespace) {
+    public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
        ParsePosition pos = new ParsePosition(0);
        applyPattern(pattern, pos, null, ignoreWhitespace);

@ -407,6 +416,7 @@ public class UnicodeSet extends UnicodeFilter {
            throw new IllegalArgumentException("Parse of \"" + pattern +
                                               "\" failed at " + i);
        }
+        return this;
    }

    /**
@ -573,7 +583,7 @@ public class UnicodeSet extends UnicodeFilter {
        for (int i = 0; i < count; ++i) {
            n += getRangeEnd(i) - getRangeStart(i) + 1;
        }
-        return n;
+        return n + strings.size();
    }

    /**
@ -582,7 +592,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @return <tt>true</tt> if this set contains no elements.
     */
    public boolean isEmpty() {
-        return len == 1;
+        return len == 1; // TODO: optimize this
    }

    /**
@ -653,6 +663,7 @@ public class UnicodeSet extends UnicodeFilter {
        } else {
            return super.matches(text, offset, limit, incremental);
        }
+        // TODO: fix this for strings!
    }

    /**
@ -753,7 +764,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @param end last character, inclusive, of range to be added
     * to this set.
     */
-    public void add(int start, int end) {
+    public UnicodeSet add(int start, int end) {
        if (start < MIN_VALUE || start > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
        }
@ -763,6 +774,7 @@ public class UnicodeSet extends UnicodeFilter {
        if (start <= end) {
            add(range(start, end), 2, 0);
        }
+        return this;
    }

    /**
@ -770,10 +782,63 @@ public class UnicodeSet extends UnicodeFilter {
     * present.  If this set already contains the specified character,
     * the call leaves this set unchanged.
     */
-    public final void add(int c) {
+    public final UnicodeSet add(int c) {
        add(c, c);
+        return this;
    }

+    /**
+     * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+     * If this set already any particular character, it has no effect on that character.
+     * @param string to add
+     */
+    public final UnicodeSet addEach(String s) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+            cp = UTF16.charAt(s, i);
+            add(cp, cp);
+        }
+        return this;
+    }
+
+    /**
+     * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+     * @param string to add
+     */
+    public static UnicodeSet fromEach(String s) {
+        return new UnicodeSet().addEach(s);
+    }
+
+    /**
+     * Adds the specified multicharacter to this set if it is not already
+     * present.  If this set already contains the multicharacter,
+     * the call leaves this set unchanged.
+     * Thus "ch" => {"ch"}
+     * @param string to add
+     */
+    public final UnicodeSet add(String s) {
+        if (s.length() < 0) return this;
+        // this is slightly odd; the reason is to avoid UTF16.countCodePoint(s)
+        // when we don't really need to iterate through the whole string
+        int cp = UTF16.charAt(s, 0);
+        if (UTF16.getCharCount(cp) == 1) {
+            add(cp, cp);
+        } else {
+           strings.add(s);
+        }
+        return this;
+    }
+    
+    /**
+     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+     * @param string to add
+     */
+    public static UnicodeSet fromMultiple(String s) {
+        return new UnicodeSet().add(s);
+    }
+
+    
+
    /**
     * Retain only the elements in this set that are contained in the
     * specified range.  If <code>end > start</code> then an empty range is
@ -784,7 +849,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @param end last character, inclusive, of range to be retained
     * to this set.
     */
-    public void retain(int start, int end) {
+    public UnicodeSet retain(int start, int end) {
        if (start < MIN_VALUE || start > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
        }
@ -796,13 +861,14 @@ public class UnicodeSet extends UnicodeFilter {
        } else {
            clear();
        }
+        return this;
    }

    /**
     * Retain the specified character from this set if it is present.
     */
-    public final void retain(int c) {
-        retain(c, c);
+    public final UnicodeSet retain(int c) {
+        return retain(c, c);
    }

    /**
@ -816,7 +882,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @param end last character, inclusive, of range to be removed
     * from this set.
     */
-    public void remove(int start, int end) {
+    public UnicodeSet remove(int start, int end) {
        if (start < MIN_VALUE || start > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
        }
@ -826,6 +892,7 @@ public class UnicodeSet extends UnicodeFilter {
        if (start <= end) {
            retain(range(start, end), 2, 2);
        }
+        return this;
    }

    /**
@ -833,8 +900,8 @@ public class UnicodeSet extends UnicodeFilter {
     * The set will not contain the specified character once the call
     * returns.
     */
-    public final void remove(int c) {
-        remove(c, c);
+    public final UnicodeSet remove(int c) {
+        return remove(c, c);
    }

    /**
@ -848,7 +915,7 @@ public class UnicodeSet extends UnicodeFilter {
     * @param end last character, inclusive, of range to be removed
     * from this set.
     */
-    public void complement(int start, int end) {
+    public UnicodeSet complement(int start, int end) {
        if (start < MIN_VALUE || start > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
        }
@ -858,6 +925,7 @@ public class UnicodeSet extends UnicodeFilter {
        if (start <= end) {
            xor(range(start, end), 2, 0);
        }
+        return this;
    }

    /**
@ -865,16 +933,15 @@ public class UnicodeSet extends UnicodeFilter {
     * will be removed if it is in this set, or will be added if it is
     * not in this set.
     */
-    public final void complement(int c) {
-        complement(c, c);
+    public final UnicodeSet complement(int c) {
+        return complement(c, c);
    }

    /**
-     * Inverts this set.  This operation modifies this set so that its
-     * value is its complement.  This is equivalent to
+     * This is equivalent to
     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
     */
-    public void complement() {
+    public UnicodeSet complement() {
        if (list[0] == LOW) {
            System.arraycopy(list, 1, list, 0, len-1);
            --len;
@ -885,6 +952,7 @@ public class UnicodeSet extends UnicodeFilter {
            ++len;
        }
        pat = null;
+        return this;
    }

    /**
@ -905,27 +973,14 @@ public class UnicodeSet extends UnicodeFilter {
                return false;
            }
        }
+        if (!strings.containsAll(c.strings)) return false;
        return true;
    }
    
-    // TODO: Make this public
    /**
-     * Return TRUE if one or more characters in s is in this set.
+     * @return TRUE if every character in s is in this set.
     */
-    boolean containsSome(String s) {
-        int cp;
-        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
-            cp = UTF16.charAt(s, i);
-            if (contains(cp)) return true;
-        }
-        return false;
-    }
-        
-    // TODO: Make this public
-    /**
-     * Return TRUE if every character in s is in this set.
-     */
-    boolean containsAll(String s) {
+    public boolean containsAll(String s) {
        int cp;
        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
            cp = UTF16.charAt(s, i);
@ -935,6 +990,63 @@ public class UnicodeSet extends UnicodeFilter {
    }
    

+    /**
+     * Returns <tt>true</tt> if this set contains every character
+     * in the specified range of chars.
+     * If <code>end > start</code> then the results of this method
+     * are undefined.
+     *
+     * @return <tt>true</tt> if this set contains the specified range
+     * of chars.
+     */
+    public boolean containsNone(int start, int end) {
+        if (start < MIN_VALUE || start > MAX_VALUE) {
+            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
+        }
+        if (end < MIN_VALUE || end > MAX_VALUE) {
+            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
+        }
+        int i = -1;
+        while (true) {
+            if (start < list[++i]) break;
+        }
+        return ((i & 1) == 0 && end < list[i]);
+    }
+
+
+    /**
+     * Returns <tt>true</tt> if the specified set is disjoint with this set.
+     *
+     * @param c set to be checked for containment in this set.
+     * @return <tt>true</tt> if this set contains all of the elements of the
+     * 	       specified set.
+     */
+    public boolean containsNone(UnicodeSet c) {
+        // The specified set is a subset if all of its pairs are contained in
+        // this set.  It's possible to code this more efficiently in terms of
+        // direct manipulation of the inversion lists if the need arises.
+        int n = c.getRangeCount();
+        for (int i=0; i<n; ++i) {
+            if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
+                return false;
+            }
+        }
+        if (!hasRelation(strings, DISJOINT, c.strings)) return false;
+        return true;
+    }
+    
+    /**
+     * @return TRUE if one or more characters in s is in this set.
+     */
+    public boolean containsNone(String s) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+            cp = UTF16.charAt(s, i);
+            if (contains(cp)) return false;
+        }
+        return true;
+    }
+        
    /**
     * Adds all of the elements in the specified set to this set if
     * they're not already present.  This operation effectively
@ -944,8 +1056,9 @@ public class UnicodeSet extends UnicodeFilter {
     *
     * @param c set whose elements are to be added to this set.
     */
-    public void addAll(UnicodeSet c) {
+    public UnicodeSet addAll(UnicodeSet c) {
        add(c.list, c.len, 0);
+        return this;
    }

    /**
@ -957,8 +1070,9 @@ public class UnicodeSet extends UnicodeFilter {
     *
     * @param c set that defines which elements this set will retain.
     */
-    public void retainAll(UnicodeSet c) {
+    public UnicodeSet retainAll(UnicodeSet c) {
        retain(c.list, c.len, 0);
+        return this;
    }

    /**
@ -970,8 +1084,9 @@ public class UnicodeSet extends UnicodeFilter {
     * @param c set that defines which elements will be removed from
     *          this set.
     */
-    public void removeAll(UnicodeSet c) {
+    public UnicodeSet removeAll(UnicodeSet c) {
        retain(c.list, c.len, 2);
+        return this;
    }

    /**
@ -982,18 +1097,21 @@ public class UnicodeSet extends UnicodeFilter {
     * @param c set that defines which elements will be complemented from
     *          this set.
     */
-    public void complementAll(UnicodeSet c) {
+    public UnicodeSet complementAll(UnicodeSet c) {
        xor(c.list, c.len, 0);
+        return this;
    }

    /**
     * Removes all of the elements from this set.  This set will be
     * empty after this call returns.
     */
-    public void clear() {
+    public UnicodeSet clear() {
        list[0] = HIGH;
        len = 1;
        pat = null;
+        strings.clear();
+        return this;
    }

    /**
@ -1034,7 +1152,7 @@ public class UnicodeSet extends UnicodeFilter {
     * Reallocate this objects internal structures to take up the least
     * possible space, without changing this object's value.
     */
-    public void compact() {
+    public UnicodeSet compact() {
        if (len != list.length) {
            int[] temp = new int[len];
            System.arraycopy(list, 0, temp, 0, len);
@ -1042,6 +1160,7 @@ public class UnicodeSet extends UnicodeFilter {
        }
        rangeList = null;
        buffer = null;
+        return this;
    }

    /**
@ -1368,6 +1487,29 @@ public class UnicodeSet extends UnicodeFilter {
                    nestedPatDone = true;
                    i = pos.getIndex();
                }
+                /*else if (!isLiteral && c == '{') {
+                    // start of a string. find the rest.
+                    try {
+                        StringBuffer result = new StringBuffer();
+                        while (i < pattern.length()) {
+                            // don't need to worry about surrogates, since
+                            // the only significant characters are } and \\.
+                            char ch = pattern.charAt(i++);
+                            if (ch == '}') {
+                                break;
+                            } else if (ch == '\\') {
+                                result.append(pattern.charAt(i++)); // TODO, handle \\n, \\uXXXX etc.
+                            } else {
+                                result.append(ch);
+                            }
+                        }
+                        // We have new string. Add it to set and continue;
+                    } catch (Exception e) {
+                        throw new Exception("foo");
+                    }
+                    
+                }
+                */
            }

            /* At this point we have either a character c, or a nested set.  If
@ -1517,7 +1659,7 @@ public class UnicodeSet extends UnicodeFilter {
            // Debug parser
            System.out.println("UnicodeSet(" +
                               pattern.substring(start, i+1) + ") -> " +
-                               com.ibm.icu.impl.Utility.escape(toString()));
+                               Utility.escape(toString()));
        }
    }

@ -1770,4 +1912,104 @@ public class UnicodeSet extends UnicodeFilter {
    private static final int max(int a, int b) {
        return (a > b) ? a : b;
    }
+    
+    /**
+     * The relationship between two sets A and B can be determined by looking at:
+     * A - B
+     * A & B (intersection)
+     * B - A
+     * These are represented by a set of bits.
+     * Bit 2 is true if A - B is not empty
+     * Bit 1 is true if A & B is not empty
+     * BIT 0 is true if B - A is not empty
+     */
+    
+    public static final int
+        A_NOT_B = 4,
+        A_AND_B = 2,
+        B_NOT_A = 1;
+    
+    /**
+     * There are 8 combinations of the relationship bits. These correspond to
+     * the filters (combinations of allowed bits) in hasRelation. They also
+     * correspond to the modification functions, listed in comments.
+     */
+     
+    public static final int 
+       ANY =            A_NOT_B |   A_AND_B |   B_NOT_A,    // union,           addAll
+       CONTAINS =       A_NOT_B |   A_AND_B,                // A                (unnecessary)
+       DISJOINT =       A_NOT_B |               B_NOT_A,    // A xor B,         missing Java function
+       ISCONTAINED =                A_AND_B |   B_NOT_A,    // B                (unnecessary)
+       NO_B =           A_NOT_B,                            // A setDiff B,     removeAll
+       EQUALS =                     A_AND_B,                // A intersect B,   retainAll
+       NO_A =                                   B_NOT_A,    // B setDiff A,     removeAll
+       NONE =           0;                                  // null             (unnecessary)
+  
+    /**
+     * Utility that could be on SortedSet. Faster implementation than
+     * what is in Java.
+     * @param a first set
+     * @param allow filter, using ANY, CONTAINS, etc.
+     * @param b second set
+     * @return whether the filter relationship is true or not.
+     */
+    
+    public static boolean hasRelation(SortedSet a, int allow, SortedSet b) {
+        // extract filter conditions
+        // these are the ALLOWED conditions Set
+        
+        boolean anb = (allow & A_NOT_B) != 0;
+        boolean ab = (allow & A_AND_B) != 0;
+        boolean bna = (allow & B_NOT_A) != 0;
+        
+        // quick check on sizes
+        switch(allow) {
+            case CONTAINS: if (a.size() < b.size()) return false; break;
+            case ISCONTAINED: if (a.size() > b.size()) return false; break;
+            case EQUALS: if (a.size() != b.size()) return false; break;
+        }
+        
+        // check for null sets
+        if (a.size() == 0) {
+            if (b.size() == 0) return true;
+            return bna;
+        } else if (b.size() == 0) {
+            return anb;
+        }
+        
+        // pick up first strings, and start comparing
+        Iterator ait = a.iterator();
+        Iterator bit = b.iterator();
+        
+        Comparable aa = (Comparable) ait.next();
+        Comparable bb = (Comparable) bit.next();
+        
+        while (true) {
+            int comp = aa.compareTo(bb);
+            if (comp == 0) {
+                if (!ab) return false;
+                if (!ait.hasNext()) {
+                    if (!bit.hasNext()) return true;
+                    return bna;
+                } else if (!bit.hasNext()) {
+                    return anb;
+                }
+                aa = (Comparable) ait.next();
+                bb = (Comparable) bit.next();
+            } else if (comp < 0) {
+                if (!anb) return false;
+                if (!ait.hasNext()) {
+                    return bna;
+                }
+                aa = (Comparable) ait.next(); 
+            } else  {
+                if (!bna) return false;
+                if (!bit.hasNext()) {
+                    return anb;
+                }
+                bb = (Comparable) bit.next();
+            }
+        }
+    }
+    
 }