ICU-1749 preliminary work on adding strings. (also fixed import in TrieIterator so it works with VisualCafe.)

X-SVN-Rev: 7891
This commit is contained in:
Mark Davis 2002-03-06 19:28:32 +00:00
parent badaf1c4ac
commit dbaade006e
3 changed files with 535 additions and 53 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $
* $Date: 2002/02/25 22:43:57 $
* $Revision: 1.22 $
* $Date: 2002/03/06 19:28:32 $
* $Revision: 1.23 $
*
*****************************************************************************************
*/
@ -445,9 +445,192 @@ public class UnicodeSetTest extends TestFmwk {
logln("bitsToSet(setToBits(c)): " + c);
} else {
errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
}
}
public void TestChaining() {
Object[][] testList = {
{I_EQUALS, UnicodeSet.fromEach("abc"), new UnicodeSet("[a-c]")},
{I_EQUALS, UnicodeSet.fromMultiple("abc"), new UnicodeSet("[{abc}]")},
{I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
};
for (int i = 0; i < testList.length; ++i) {
expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
}
}
static final Integer
I_ANY = new Integer(UnicodeSet.ANY),
I_CONTAINS = new Integer(UnicodeSet.CONTAINS),
I_DISJOINT = new Integer(UnicodeSet.DISJOINT),
I_NO_B = new Integer(UnicodeSet.NO_B),
I_ISCONTAINED = new Integer(UnicodeSet.ISCONTAINED),
I_EQUALS = new Integer(UnicodeSet.EQUALS),
I_NO_A = new Integer(UnicodeSet.NO_A),
I_NONE = new Integer(UnicodeSet.NONE);
public void TestSetRelation() {
String[] choices = {"a", "b", "c"};
int limit = 1 << choices.length;
SortedSet iset = new TreeSet();
SortedSet jset = new TreeSet();
for (int i = 0; i < limit; ++i) {
pick(i, choices, iset);
for (int j = 0; j < limit; ++j) {
pick(j, choices, jset);
checkSetRelation(iset, jset, "(" + i + ")");
}
}
}
public void TestSetSpeed() {
TestSetSpeed2(100);
TestSetSpeed2(1000);
TestSetSpeed2(10000);
}
public void TestSetSpeed2(int size) {
SortedSet iset = new TreeSet();
SortedSet jset = new TreeSet();
for (int i = 0; i < size*2; i += 2) { // only even values
iset.add(new Integer(i));
jset.add(new Integer(i));
}
int iterations = 1000000 / size;
logln("Timing comparison of Java vs Utility");
logln("For about " + size + " objects that are almost all the same.");
CheckSpeed(iset, jset, "when a = b", iterations);
iset.add(new Integer(size + 1)); // add odd value in middle
CheckSpeed(iset, jset, "when a contains b", iterations);
CheckSpeed(jset, iset, "when b contains a", iterations);
jset.add(new Integer(size - 1)); // add different odd value in middle
CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
}
void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
CheckSpeed2(iset, jset, message, iterations);
CheckSpeed3(iset, jset, message, iterations);
}
void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
boolean x;
boolean y;
// make sure code is loaded:
x = iset.containsAll(jset);
y = UnicodeSet.hasRelation(iset, UnicodeSet.CONTAINS, jset);
if (x != y) errln("FAIL contains comparison");
double start = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
x |= iset.containsAll(jset);
}
double middle = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
y |= UnicodeSet.hasRelation(iset, UnicodeSet.CONTAINS, jset);
}
double end = System.currentTimeMillis();
double jtime = (middle - start)/iterations;
double utime = (end - middle)/iterations;
java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
logln("Test contains: " + message + ": Java: " + jtime
+ ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
}
void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
boolean x;
boolean y;
// make sure code is loaded:
x = iset.equals(jset);
y = UnicodeSet.hasRelation(iset, UnicodeSet.EQUALS, jset);
if (x != y) errln("FAIL equality comparison");
double start = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
x |= iset.equals(jset);
}
double middle = System.currentTimeMillis();
for (int i = 0; i < iterations; ++i) {
y |= UnicodeSet.hasRelation(iset, UnicodeSet.EQUALS, jset);
}
double end = System.currentTimeMillis();
double jtime = (middle - start)/iterations;
double utime = (end - middle)/iterations;
java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
logln("Test equals: " + message + ": Java: " + jtime
+ ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
}
void pick(int bits, Object[] examples, SortedSet output) {
output.clear();
for (int k = 0; k < 32; ++k) {
if (((1<<k) & bits) != 0) output.add(examples[k]);
}
}
public static final String[] RELATION_NAME = {
"both-are-null",
"a-is-null",
"equals",
"is-contained-in",
"b-is-null",
"is-disjoint_with",
"contains",
"any", };
boolean dumbHasRelation(Collection A, int filter, Collection B) {
Collection a_b = new TreeSet(A);
a_b.removeAll(B);
if (a_b.size() > 0 && (filter & UnicodeSet.A_NOT_B) == 0) return false;
Collection b_a = new TreeSet(B);
b_a.removeAll(A);
if (b_a.size() > 0 && (filter & UnicodeSet.B_NOT_A) == 0) return false;
Collection ab = new TreeSet(A);
ab.retainAll(B);
if (ab.size() > 0 && (filter & UnicodeSet.A_AND_B) == 0) return false;
return true;
}
void checkSetRelation(SortedSet a, SortedSet b, String message) {
for (int i = 0; i < 8; ++i) {
boolean hasRelation = UnicodeSet.hasRelation(a, i, b);
boolean dumbHasRelation = dumbHasRelation(a, i, b);
logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
if (hasRelation != dumbHasRelation) {
errln("FAIL: " +
message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
}
}
logln("");
}
/**
* Test the [:Latin:] syntax.
*/
@ -668,7 +851,63 @@ public class UnicodeSetTest extends TestFmwk {
}
return pairs.toString();
}
/**
* Test function. Make sure that the sets have the right relation
*/
void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
byte relation = ((Byte) relationObj).byteValue();
UnicodeSet set1 = (UnicodeSet) set1Obj;
UnicodeSet set2 = (UnicodeSet) set2Obj;
boolean contains = set1.containsAll(set2);
boolean isContained = set2.containsAll(set1);
boolean disjoint = set1.containsNone(set2);
boolean equals = set1.equals(set2);
UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
// test basic properties
if (contains != (intersection.size() == set2.size())) {
errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
if (contains != (intersection.equals(set2))) {
errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
if (isContained != (intersection.size() == set1.size())) {
errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
if (isContained != (intersection.equals(set1))) {
errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
if ((contains && isContained) != equals) {
errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
if (disjoint != (intersection.size() == 0)) {
errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
}
// Now see if the expected relation is true
int status = (minus12.size() != 0 ? 4 : 0)
| (intersection.size() != 0 ? 2 : 0)
| (minus21.size() != 0 ? 1 : 0);
if (status != relation) {
errln("FAIL relation incorrect" + message
+ ": desired= " + RELATION_NAME[relation]
+ ": found= " + RELATION_NAME[status]);
}
}
/**
* Expect the given set to contain the characters in charsIn and
* to not contain those in charsOut.

View File

@ -5,8 +5,8 @@
******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/TrieIterator.java,v $
* $Date: 2002/02/16 03:05:37 $
* $Revision: 1.3 $
* $Date: 2002/03/06 19:28:32 $
* $Revision: 1.4 $
*
******************************************************************************
*/
@ -16,6 +16,7 @@ package com.ibm.icu.impl;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.RangeValueIterator.*;
/**
* <p>Class enabling iteration of the values in a Trie.</p>

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2002/02/25 22:43:58 $
* $Revision: 1.56 $
* $Date: 2002/03/06 19:28:32 $
* $Revision: 1.57 $
*
*****************************************************************************************
*/
@ -15,9 +15,12 @@ package com.ibm.icu.text;
import java.text.*;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.*;
import java.util.TreeSet;
import java.util.SortedSet;
import java.util.Iterator;
/**
* A mutable set of Unicode characters. Objects of this class
* A mutable set of Unicode characters and multicharacter strings. Objects of this class
* represent <em>character classes</em> used in regular expressions.
* A character specifies a subset of Unicode code points. Legal
* code points are U+0000 to U+10FFFF, inclusive.
@ -205,7 +208,7 @@ import com.ibm.icu.lang.*;
* Unicode property
* </table>
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.56 $ $Date: 2002/02/25 22:43:58 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.57 $ $Date: 2002/03/06 19:28:32 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -227,6 +230,9 @@ public class UnicodeSet extends UnicodeFilter {
private int[] list; // MUST be terminated with HIGH
private int[] rangeList; // internal buffer
private int[] buffer; // internal buffer
// NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
private TreeSet strings = new TreeSet();
/**
* The pattern representation of this set. This may not be the
@ -354,9 +360,10 @@ public class UnicodeSet extends UnicodeFilter {
* @param start first character in the set, inclusive
* @rparam end last character in the set, inclusive
*/
public void set(int start, int end) {
public UnicodeSet set(int start, int end) {
clear();
complement(start, end);
return this;
}
/**
@ -364,10 +371,12 @@ public class UnicodeSet extends UnicodeFilter {
* @param other a <code>UnicodeSet</code> whose value will be
* copied to this object
*/
public void set(UnicodeSet other) {
public UnicodeSet set(UnicodeSet other) {
list = (int[]) other.list.clone();
len = other.len;
pat = other.pat;
strings = (TreeSet)other.strings.clone();
return this;
}
/**
@ -378,8 +387,8 @@ public class UnicodeSet extends UnicodeFilter {
* @exception java.lang.IllegalArgumentException if the pattern
* contains a syntax error.
*/
public final void applyPattern(String pattern) {
applyPattern(pattern, true);
public final UnicodeSet applyPattern(String pattern) {
return applyPattern(pattern, true);
}
/**
@ -392,7 +401,7 @@ public class UnicodeSet extends UnicodeFilter {
* @exception java.lang.IllegalArgumentException if the pattern
* contains a syntax error.
*/
public void applyPattern(String pattern, boolean ignoreWhitespace) {
public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
ParsePosition pos = new ParsePosition(0);
applyPattern(pattern, pos, null, ignoreWhitespace);
@ -407,6 +416,7 @@ public class UnicodeSet extends UnicodeFilter {
throw new IllegalArgumentException("Parse of \"" + pattern +
"\" failed at " + i);
}
return this;
}
/**
@ -573,7 +583,7 @@ public class UnicodeSet extends UnicodeFilter {
for (int i = 0; i < count; ++i) {
n += getRangeEnd(i) - getRangeStart(i) + 1;
}
return n;
return n + strings.size();
}
/**
@ -582,7 +592,7 @@ public class UnicodeSet extends UnicodeFilter {
* @return <tt>true</tt> if this set contains no elements.
*/
public boolean isEmpty() {
return len == 1;
return len == 1; // TODO: optimize this
}
/**
@ -653,6 +663,7 @@ public class UnicodeSet extends UnicodeFilter {
} else {
return super.matches(text, offset, limit, incremental);
}
// TODO: fix this for strings!
}
/**
@ -753,7 +764,7 @@ public class UnicodeSet extends UnicodeFilter {
* @param end last character, inclusive, of range to be added
* to this set.
*/
public void add(int start, int end) {
public UnicodeSet add(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
}
@ -763,6 +774,7 @@ public class UnicodeSet extends UnicodeFilter {
if (start <= end) {
add(range(start, end), 2, 0);
}
return this;
}
/**
@ -770,10 +782,63 @@ public class UnicodeSet extends UnicodeFilter {
* present. If this set already contains the specified character,
* the call leaves this set unchanged.
*/
public final void add(int c) {
public final UnicodeSet add(int c) {
add(c, c);
return this;
}
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* @param string to add
*/
public final UnicodeSet addEach(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
add(cp, cp);
}
return this;
}
/**
* Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
* @param string to add
*/
public static UnicodeSet fromEach(String s) {
return new UnicodeSet().addEach(s);
}
/**
* Adds the specified multicharacter to this set if it is not already
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
* @param string to add
*/
public final UnicodeSet add(String s) {
if (s.length() < 0) return this;
// this is slightly odd; the reason is to avoid UTF16.countCodePoint(s)
// when we don't really need to iterate through the whole string
int cp = UTF16.charAt(s, 0);
if (UTF16.getCharCount(cp) == 1) {
add(cp, cp);
} else {
strings.add(s);
}
return this;
}
/**
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
* @param string to add
*/
public static UnicodeSet fromMultiple(String s) {
return new UnicodeSet().add(s);
}
/**
* Retain only the elements in this set that are contained in the
* specified range. If <code>end > start</code> then an empty range is
@ -784,7 +849,7 @@ public class UnicodeSet extends UnicodeFilter {
* @param end last character, inclusive, of range to be retained
* to this set.
*/
public void retain(int start, int end) {
public UnicodeSet retain(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
}
@ -796,13 +861,14 @@ public class UnicodeSet extends UnicodeFilter {
} else {
clear();
}
return this;
}
/**
* Retain the specified character from this set if it is present.
*/
public final void retain(int c) {
retain(c, c);
public final UnicodeSet retain(int c) {
return retain(c, c);
}
/**
@ -816,7 +882,7 @@ public class UnicodeSet extends UnicodeFilter {
* @param end last character, inclusive, of range to be removed
* from this set.
*/
public void remove(int start, int end) {
public UnicodeSet remove(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
}
@ -826,6 +892,7 @@ public class UnicodeSet extends UnicodeFilter {
if (start <= end) {
retain(range(start, end), 2, 2);
}
return this;
}
/**
@ -833,8 +900,8 @@ public class UnicodeSet extends UnicodeFilter {
* The set will not contain the specified character once the call
* returns.
*/
public final void remove(int c) {
remove(c, c);
public final UnicodeSet remove(int c) {
return remove(c, c);
}
/**
@ -848,7 +915,7 @@ public class UnicodeSet extends UnicodeFilter {
* @param end last character, inclusive, of range to be removed
* from this set.
*/
public void complement(int start, int end) {
public UnicodeSet complement(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
}
@ -858,6 +925,7 @@ public class UnicodeSet extends UnicodeFilter {
if (start <= end) {
xor(range(start, end), 2, 0);
}
return this;
}
/**
@ -865,16 +933,15 @@ public class UnicodeSet extends UnicodeFilter {
* will be removed if it is in this set, or will be added if it is
* not in this set.
*/
public final void complement(int c) {
complement(c, c);
public final UnicodeSet complement(int c) {
return complement(c, c);
}
/**
* Inverts this set. This operation modifies this set so that its
* value is its complement. This is equivalent to
* This is equivalent to
* <code>complement(MIN_VALUE, MAX_VALUE)</code>.
*/
public void complement() {
public UnicodeSet complement() {
if (list[0] == LOW) {
System.arraycopy(list, 1, list, 0, len-1);
--len;
@ -885,6 +952,7 @@ public class UnicodeSet extends UnicodeFilter {
++len;
}
pat = null;
return this;
}
/**
@ -905,27 +973,14 @@ public class UnicodeSet extends UnicodeFilter {
return false;
}
}
if (!strings.containsAll(c.strings)) return false;
return true;
}
// TODO: Make this public
/**
* Return TRUE if one or more characters in s is in this set.
* @return TRUE if every character in s is in this set.
*/
boolean containsSome(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
if (contains(cp)) return true;
}
return false;
}
// TODO: Make this public
/**
* Return TRUE if every character in s is in this set.
*/
boolean containsAll(String s) {
public boolean containsAll(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
@ -935,6 +990,63 @@ public class UnicodeSet extends UnicodeFilter {
}
/**
* Returns <tt>true</tt> if this set contains every character
* in the specified range of chars.
* If <code>end > start</code> then the results of this method
* are undefined.
*
* @return <tt>true</tt> if this set contains the specified range
* of chars.
*/
public boolean containsNone(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
}
if (end < MIN_VALUE || end > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
}
int i = -1;
while (true) {
if (start < list[++i]) break;
}
return ((i & 1) == 0 && end < list[i]);
}
/**
* Returns <tt>true</tt> if the specified set is disjoint with this set.
*
* @param c set to be checked for containment in this set.
* @return <tt>true</tt> if this set contains all of the elements of the
* specified set.
*/
public boolean containsNone(UnicodeSet c) {
// The specified set is a subset if all of its pairs are contained in
// this set. It's possible to code this more efficiently in terms of
// direct manipulation of the inversion lists if the need arises.
int n = c.getRangeCount();
for (int i=0; i<n; ++i) {
if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
return false;
}
}
if (!hasRelation(strings, DISJOINT, c.strings)) return false;
return true;
}
/**
* @return TRUE if one or more characters in s is in this set.
*/
public boolean containsNone(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
if (contains(cp)) return false;
}
return true;
}
/**
* Adds all of the elements in the specified set to this set if
* they're not already present. This operation effectively
@ -944,8 +1056,9 @@ public class UnicodeSet extends UnicodeFilter {
*
* @param c set whose elements are to be added to this set.
*/
public void addAll(UnicodeSet c) {
public UnicodeSet addAll(UnicodeSet c) {
add(c.list, c.len, 0);
return this;
}
/**
@ -957,8 +1070,9 @@ public class UnicodeSet extends UnicodeFilter {
*
* @param c set that defines which elements this set will retain.
*/
public void retainAll(UnicodeSet c) {
public UnicodeSet retainAll(UnicodeSet c) {
retain(c.list, c.len, 0);
return this;
}
/**
@ -970,8 +1084,9 @@ public class UnicodeSet extends UnicodeFilter {
* @param c set that defines which elements will be removed from
* this set.
*/
public void removeAll(UnicodeSet c) {
public UnicodeSet removeAll(UnicodeSet c) {
retain(c.list, c.len, 2);
return this;
}
/**
@ -982,18 +1097,21 @@ public class UnicodeSet extends UnicodeFilter {
* @param c set that defines which elements will be complemented from
* this set.
*/
public void complementAll(UnicodeSet c) {
public UnicodeSet complementAll(UnicodeSet c) {
xor(c.list, c.len, 0);
return this;
}
/**
* Removes all of the elements from this set. This set will be
* empty after this call returns.
*/
public void clear() {
public UnicodeSet clear() {
list[0] = HIGH;
len = 1;
pat = null;
strings.clear();
return this;
}
/**
@ -1034,7 +1152,7 @@ public class UnicodeSet extends UnicodeFilter {
* Reallocate this objects internal structures to take up the least
* possible space, without changing this object's value.
*/
public void compact() {
public UnicodeSet compact() {
if (len != list.length) {
int[] temp = new int[len];
System.arraycopy(list, 0, temp, 0, len);
@ -1042,6 +1160,7 @@ public class UnicodeSet extends UnicodeFilter {
}
rangeList = null;
buffer = null;
return this;
}
/**
@ -1368,6 +1487,29 @@ public class UnicodeSet extends UnicodeFilter {
nestedPatDone = true;
i = pos.getIndex();
}
/*else if (!isLiteral && c == '{') {
// start of a string. find the rest.
try {
StringBuffer result = new StringBuffer();
while (i < pattern.length()) {
// don't need to worry about surrogates, since
// the only significant characters are } and \\.
char ch = pattern.charAt(i++);
if (ch == '}') {
break;
} else if (ch == '\\') {
result.append(pattern.charAt(i++)); // TODO, handle \\n, \\uXXXX etc.
} else {
result.append(ch);
}
}
// We have new string. Add it to set and continue;
} catch (Exception e) {
throw new Exception("foo");
}
}
*/
}
/* At this point we have either a character c, or a nested set. If
@ -1517,7 +1659,7 @@ public class UnicodeSet extends UnicodeFilter {
// Debug parser
System.out.println("UnicodeSet(" +
pattern.substring(start, i+1) + ") -> " +
com.ibm.icu.impl.Utility.escape(toString()));
Utility.escape(toString()));
}
}
@ -1770,4 +1912,104 @@ public class UnicodeSet extends UnicodeFilter {
private static final int max(int a, int b) {
return (a > b) ? a : b;
}
/**
* The relationship between two sets A and B can be determined by looking at:
* A - B
* A & B (intersection)
* B - A
* These are represented by a set of bits.
* Bit 2 is true if A - B is not empty
* Bit 1 is true if A & B is not empty
* BIT 0 is true if B - A is not empty
*/
public static final int
A_NOT_B = 4,
A_AND_B = 2,
B_NOT_A = 1;
/**
* There are 8 combinations of the relationship bits. These correspond to
* the filters (combinations of allowed bits) in hasRelation. They also
* correspond to the modification functions, listed in comments.
*/
public static final int
ANY = A_NOT_B | A_AND_B | B_NOT_A, // union, addAll
CONTAINS = A_NOT_B | A_AND_B, // A (unnecessary)
DISJOINT = A_NOT_B | B_NOT_A, // A xor B, missing Java function
ISCONTAINED = A_AND_B | B_NOT_A, // B (unnecessary)
NO_B = A_NOT_B, // A setDiff B, removeAll
EQUALS = A_AND_B, // A intersect B, retainAll
NO_A = B_NOT_A, // B setDiff A, removeAll
NONE = 0; // null (unnecessary)
/**
* Utility that could be on SortedSet. Faster implementation than
* what is in Java.
* @param a first set
* @param allow filter, using ANY, CONTAINS, etc.
* @param b second set
* @return whether the filter relationship is true or not.
*/
public static boolean hasRelation(SortedSet a, int allow, SortedSet b) {
// extract filter conditions
// these are the ALLOWED conditions Set
boolean anb = (allow & A_NOT_B) != 0;
boolean ab = (allow & A_AND_B) != 0;
boolean bna = (allow & B_NOT_A) != 0;
// quick check on sizes
switch(allow) {
case CONTAINS: if (a.size() < b.size()) return false; break;
case ISCONTAINED: if (a.size() > b.size()) return false; break;
case EQUALS: if (a.size() != b.size()) return false; break;
}
// check for null sets
if (a.size() == 0) {
if (b.size() == 0) return true;
return bna;
} else if (b.size() == 0) {
return anb;
}
// pick up first strings, and start comparing
Iterator ait = a.iterator();
Iterator bit = b.iterator();
Comparable aa = (Comparable) ait.next();
Comparable bb = (Comparable) bit.next();
while (true) {
int comp = aa.compareTo(bb);
if (comp == 0) {
if (!ab) return false;
if (!ait.hasNext()) {
if (!bit.hasNext()) return true;
return bna;
} else if (!bit.hasNext()) {
return anb;
}
aa = (Comparable) ait.next();
bb = (Comparable) bit.next();
} else if (comp < 0) {
if (!anb) return false;
if (!ait.hasNext()) {
return bna;
}
aa = (Comparable) ait.next();
} else {
if (!bna) return false;
if (!bit.hasNext()) {
return anb;
}
bb = (Comparable) bit.next();
}
}
}
}