From 5994c85fac7af6ed255048019e653b2aaba9634f Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Tue, 15 Nov 2005 01:56:18 +0000 Subject: [PATCH] ICU-4700 updates for UnicodeSet X-SVN-Rev: 18796 --- .../com/ibm/icu/impl/CollectionUtilities.java | 393 ++++++++++++++++++ icu4j/src/com/ibm/icu/impl/PrettyPrinter.java | 231 ++++++++++ 2 files changed, 624 insertions(+) create mode 100644 icu4j/src/com/ibm/icu/impl/CollectionUtilities.java create mode 100644 icu4j/src/com/ibm/icu/impl/PrettyPrinter.java diff --git a/icu4j/src/com/ibm/icu/impl/CollectionUtilities.java b/icu4j/src/com/ibm/icu/impl/CollectionUtilities.java new file mode 100644 index 0000000000..e56c2497d0 --- /dev/null +++ b/icu4j/src/com/ibm/icu/impl/CollectionUtilities.java @@ -0,0 +1,393 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedSet; +import java.util.regex.Matcher; + +import com.ibm.icu.text.Transliterator; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; + +/** + * Utilities that ought to be on collections, but aren't + */ +public final class CollectionUtilities { + /** + * Utility like Arrays.asList() + */ + public static Map asMap(Object[][] source, Map target, boolean reverse) { + int from = 0, to = 1; + if (reverse) { + from = 1; to = 0; + } + for (int i = 0; i < source.length; ++i) { + target.put(source[i][from], source[i][to]); + } + return target; + } + + public static Collection addAll(Iterator source, Collection target) { + while (source.hasNext()) { + target.add(source.next()); + } + return target; // for chaining + } + + public static int size(Iterator source) { + int result = 0; + while (source.hasNext()) { + source.next(); + ++result; + } + return result; + } + + + public static Map asMap(Object[][] source) { + return asMap(source, new HashMap(), false); + } + + /** + * Utility that ought to be on Map + */ + public static Map removeAll(Map m, Collection itemsToRemove) { + for (Iterator it = itemsToRemove.iterator(); it.hasNext();) { + Object item = it.next(); + m.remove(item); + } + return m; + } + + public Object getFirst(Collection c) { + Iterator it = c.iterator(); + if (!it.hasNext()) return null; + return it.next(); + } + + public static Object getBest(Collection c, Comparator comp, int direction) { + Iterator it = c.iterator(); + if (!it.hasNext()) return null; + Object bestSoFar = it.next(); + while (it.hasNext()) { + Object item = it.next(); + if (comp.compare(item, bestSoFar) == direction) { + bestSoFar = item; + } + } + return bestSoFar; + } + + public interface Filter { + boolean matches(Object o); + } + + public static Collection removeAll(Collection c, Filter f) { + for (Iterator it = c.iterator(); it.hasNext();) { + Object item = it.next(); + if (f.matches(item)) it.remove(); + } + return c; + } + + public static Collection retainAll(Collection c, Filter f) { + for (Iterator it = c.iterator(); it.hasNext();) { + Object item = it.next(); + if (!f.matches(item)) it.remove(); + } + return c; + } + + public static boolean containsSome(Collection a, Collection b) { + // fast paths + if (a.size() == 0 || b.size() == 0) return false; + if (a == b) return true; // must test after size test. + + if (a instanceof SortedSet && b instanceof SortedSet) { + SortedSet aa = (SortedSet) a; + SortedSet bb = (SortedSet) b; + aa.containsAll(null); + Comparator bbc = bb.comparator(); + Comparator aac = aa.comparator(); + if (bbc == null) { + if (aac == null) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 + Comparable bo = (Comparable) bi.next(); + while (true) { + int rel = ao.compareTo(bo); + if (rel < 0) { + if (!ai.hasNext()) return false; + ao = (Comparable) ai.next(); + } else if (rel > 0) { + if (!bi.hasNext()) return false; + bo = (Comparable) bi.next(); + } else { + return true; + } + } + } + } else if (bbc.equals(a)) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Object ao = ai.next(); // these are ok, since the sizes are != 0 + Object bo = bi.next(); + while (true) { + int rel = aac.compare(ao, bo); + if (rel < 0) { + if (!ai.hasNext()) return false; + ao = ai.next(); + } else if (rel > 0) { + if (!bi.hasNext()) return false; + bo = bi.next(); + } else { + return true; + } + } + } + } + for (Iterator it = a.iterator(); it.hasNext();) { + if (b.contains(it.next())) return true; + } + return false; + } + + public static boolean containsAll(Collection a, Collection b) { + // fast paths + if (a == b) return true; + if (b.size() == 0) return true; + if (a.size() == 0) return false; + + if (a instanceof SortedSet && b instanceof SortedSet) { + SortedSet aa = (SortedSet) a; + SortedSet bb = (SortedSet) b; + Comparator bbc = bb.comparator(); + Comparator aac = aa.comparator(); + if (bbc == null) { + if (aac == null) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 + Comparable bo = (Comparable) bi.next(); + while (true) { + int rel = ao.compareTo(bo); + if (rel == 0) { + if (!bi.hasNext()) return true; + if (!ai.hasNext()) return false; + bo = (Comparable) bi.next(); + ao = (Comparable) ai.next(); + } else if (rel < 0) { + if (!ai.hasNext()) return false; + ao = (Comparable) ai.next(); + } else { + return false; + } + } + } + } else if (bbc.equals(a)) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Object ao = ai.next(); // these are ok, since the sizes are != 0 + Object bo = bi.next(); + while (true) { + int rel = aac.compare(ao, bo); + if (rel == 0) { + if (!bi.hasNext()) return true; + if (!ai.hasNext()) return false; + bo = bi.next(); + ao = ai.next(); + } else if (rel < 0) { + if (!ai.hasNext()) return false; + ao = ai.next(); + } else { + return false; + } + } + } + } + return a.containsAll(b); + } + + public static boolean containsNone(Collection a, Collection b) { + return !containsSome(a, b); + } + + /** + * Used for results of getContainmentRelation + */ + public static final int + ALL_EMPTY = 0, + NOT_A_SUPERSET_B = 1, + NOT_A_DISJOINT_B = 2, + NOT_A_SUBSET_B = 4, + NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B, + A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B, + A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B, + A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B; + + /** + * Assesses all the possible containment relations between collections A and B with one call.
+ * Returns an int with bits set, according to a "Venn Diagram" view of A vs B.
+ * NOT_A_SUPERSET_B: a - b != {}
+ * NOT_A_DISJOINT_B: a * b != {} // * is intersects
+ * NOT_A_SUBSET_B: b - a != {}
+ * Thus the bits can be used to get the following relations:
+ * for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0
+ * for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0
+ * for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0
+ * for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0
+ * for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0
+ */ + public static int getContainmentRelation(Collection a, Collection b) { + if (a.size() == 0) { + return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B; + } else if (b.size() == 0) { + return NOT_A_SUBSET_B; + } + int result = 0; + // WARNING: one might think that the following can be short-circuited, by looking at + // the sizes of a and b. However, this would fail in general, where a different comparator is being + // used in the two collections. Unfortunately, there is no failsafe way to test for that. + for (Iterator it = a.iterator(); result != 6 && it.hasNext();) { + result |= (b.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUBSET_B; + } + for (Iterator it = b.iterator(); (result & 3) != 3 && it.hasNext();) { + result |= (a.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUPERSET_B; + } + return result; + } + + public static String remove(String source, UnicodeSet removals) { + StringBuffer result = new StringBuffer(); + int cp; + for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(source, i); + if (!removals.contains(cp)) UTF16.append(result, cp); + } + return result.toString(); + } + + public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter, + Comparator ordering, Comparator spaceComparator) { + PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges); + if (toQuote != null) pp.setToQuote(toQuote); + if (ordering != null) pp.setOrdering(ordering); + if (spaceComparator != null) pp.setSpaceComparator(spaceComparator); + return pp.toPattern(uset); + } + + static class MultiComparator implements Comparator { + private Comparator[] comparators; + + public MultiComparator (Comparator[] comparators) { + this.comparators = comparators; + } + + /* Lexigraphic compare. Returns the first difference + * @return zero if equal. Otherwise +/- (i+1) + * where i is the index of the first comparator finding a difference + * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) + */ + public int compare(Object arg0, Object arg1) { + for (int i = 0; i < comparators.length; ++i) { + int result = comparators[i].compare(arg0, arg1); + if (result == 0) continue; + if (result > 0) return i+1; + return -(i+1); + } + return 0; + } + } + + /** + * Modifies Unicode set to flatten the strings. Eg [abc{da}] => [abcd] + * Returns the set for chaining. + * @param exemplar1 + * @return + */ + public static UnicodeSet flatten(UnicodeSet exemplar1) { + UnicodeSet result = new UnicodeSet(); + boolean gotString = false; + for (UnicodeSetIterator it = new UnicodeSetIterator(exemplar1); it.nextRange();) { + if (it.codepoint == it.IS_STRING) { + result.addAll(it.string); + gotString = true; + } else { + result.add(it.codepoint, it.codepointEnd); + } + } + if (gotString) exemplar1.set(result); + return exemplar1; + } + + /** + * For producing filtered iterators + */ + public static abstract class FilteredIterator implements Iterator { + private Iterator baseIterator; + private static final Object EMPTY = new Object(); + private static final Object DONE = new Object(); + private Object nextObject = EMPTY; + public FilteredIterator set(Iterator baseIterator) { + this.baseIterator = baseIterator; + return this; + } + public void remove() { + throw new UnsupportedOperationException("Doesn't support removal"); + } + public Object next() { + Object result = nextObject; + nextObject = EMPTY; + return result; + } + public boolean hasNext() { + if (nextObject == DONE) return false; + if (nextObject != EMPTY) return true; + while (baseIterator.hasNext()) { + nextObject = baseIterator.next(); + if (isIncluded(nextObject)) { + return true; + } + } + nextObject = DONE; + return false; + } + abstract public boolean isIncluded(Object item); + } + + public static class PrefixIterator extends FilteredIterator { + private String prefix; + public PrefixIterator set(Iterator baseIterator, String prefix) { + super.set(baseIterator); + this.prefix = prefix; + return this; + } + public boolean isIncluded(Object item) { + return ((String)item).startsWith(prefix); + } + } + + public static class RegexIterator extends FilteredIterator { + private Matcher matcher; + public RegexIterator set(Iterator baseIterator, Matcher matcher) { + super.set(baseIterator); + this.matcher = matcher; + return this; + } + public boolean isIncluded(Object item) { + return matcher.reset((String)item).matches(); + } + } + +} \ No newline at end of file diff --git a/icu4j/src/com/ibm/icu/impl/PrettyPrinter.java b/icu4j/src/com/ibm/icu/impl/PrettyPrinter.java new file mode 100644 index 0000000000..df3d546e7e --- /dev/null +++ b/icu4j/src/com/ibm/icu/impl/PrettyPrinter.java @@ -0,0 +1,231 @@ +/** + ******************************************************************************* + * Copyright (C) 1996-2005, international Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; + +import com.ibm.icu.impl.CollectionUtilities.MultiComparator; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RuleBasedCollator; +import com.ibm.icu.text.Transliterator; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; +import com.ibm.icu.util.ULocale; + +/** Provides more flexible formatting of UnicodeSet patterns. + */ +public class PrettyPrinter { + private static UnicodeSet patternWhitespace = new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]"); + + private boolean first = true; + private StringBuffer target = new StringBuffer(); + private int firstCodePoint = -2; + private int lastCodePoint = -2; + private boolean compressRanges = true; + private String lastString = ""; + private UnicodeSet toQuote = new UnicodeSet(patternWhitespace); + private Transliterator quoter = null; + + private Comparator ordering; + private Comparator spaceComp = Collator.getInstance(ULocale.ROOT); + { + setOrdering(Collator.getInstance(ULocale.ROOT)); + ((RuleBasedCollator)spaceComp).setStrength(RuleBasedCollator.PRIMARY); + } + + public Transliterator getQuoter() { + return quoter; + } + + public PrettyPrinter setQuoter(Transliterator quoter) { + this.quoter = quoter; + return this; // for chaining + } + + public boolean isCompressRanges() { + return compressRanges; + } + + /** + * @param compressRanges if you want abcde instead of a-e, make this false + * @return + */ + public PrettyPrinter setCompressRanges(boolean compressRanges) { + this.compressRanges = compressRanges; + return this; + } + + public Comparator getOrdering() { + return ordering; + } + + /** + * @param ordering the resulting ordering of the list of characters in the pattern + * @return + */ + public PrettyPrinter setOrdering(Comparator ordering) { + this.ordering = new MultiComparator(new Comparator[] {ordering, new UTF16.StringComparator(true,false,0)}); + return this; + } + + public Comparator getSpaceComparator() { + return spaceComp; + } + + /** + * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters + * @return this, for chaining + */ + public PrettyPrinter setSpaceComparator(Comparator spaceComp) { + this.spaceComp = spaceComp; + return this; + } + + public UnicodeSet getToQuote() { + return toQuote; + } + + /** + * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace) + * @param toQuote + */ + public void setToQuote(UnicodeSet toQuote) { + toQuote = (UnicodeSet)toQuote.clone(); + toQuote.addAll(patternWhitespace); + this.toQuote = toQuote; + } + + /** + * Get the pattern for a particular set. + * @param uset + * @return formatted UnicodeSet + */ + public String toPattern(UnicodeSet uset) { + // make sure that comparison separates all strings, even canonically equivalent ones + Set orderedStrings = new TreeSet(ordering); + for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) { + orderedStrings.add(it.getString()); + } + target.setLength(0); + target.append("["); + for (Iterator it = orderedStrings.iterator(); it.hasNext();) { + appendUnicodeSetItem((String) it.next()); + } + flushLast(); + target.append("]"); + String sresult = target.toString(); + UnicodeSet doubleCheck = new UnicodeSet(sresult); + if (!uset.equals(doubleCheck)) { + throw new InternalError("Failure to round-trip in pretty-print"); + } + return sresult; + } + + PrettyPrinter appendUnicodeSetItem(String s) { + int cp; + if (UTF16.hasMoreCodePointsThan(s, 1)) { + flushLast(); + addSpace(s); + target.append("{"); + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + appendQuoted(cp = UTF16.charAt(s, i)); + } + target.append("}"); + lastString = s; + } else { + if (!compressRanges) + flushLast(); + cp = UTF16.charAt(s, 0); + if (cp == lastCodePoint + 1) { + lastCodePoint = cp; // continue range + } else { // start range + flushLast(); + firstCodePoint = lastCodePoint = cp; + } + } + return this; + } + /** + * + */ + private void addSpace(String s) { + if (first) { + first = false; + } else if (spaceComp.compare(s, lastString) != 0) { + target.append(' '); + } else { + int type = UCharacter.getType(UTF16.charAt(s,0)); + if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) { + target.append(' '); + } + } + } + + private void flushLast() { + if (lastCodePoint >= 0) { + addSpace(UTF16.valueOf(firstCodePoint)); + if (firstCodePoint != lastCodePoint) { + appendQuoted(firstCodePoint); + target.append(firstCodePoint + 1 == lastCodePoint ? ' ' : '-'); + } + appendQuoted(lastCodePoint); + lastString = UTF16.valueOf(lastCodePoint); + firstCodePoint = lastCodePoint = -2; + } + } + PrettyPrinter appendQuoted(int codePoint) { + if (toQuote.contains(codePoint)) { + if (quoter != null) { + target.append(quoter.transliterate(UTF16.valueOf(codePoint))); + return this; + } + if (codePoint > 0xFFFF) { + target.append("\\U"); + target.append(Utility.hex(codePoint,8)); + } else { + target.append("\\u"); + target.append(Utility.hex(codePoint,4)); + } + return this; + } + switch (codePoint) { + case '[': // SET_OPEN: + case ']': // SET_CLOSE: + case '-': // HYPHEN: + case '^': // COMPLEMENT: + case '&': // INTERSECTION: + case '\\': //BACKSLASH: + case '{': + case '}': + case '$': + case ':': + target.append('\\'); + break; + default: + // Escape whitespace + if (patternWhitespace.contains(codePoint)) { + target.append('\\'); + } + break; + } + UTF16.append(target, codePoint); + return this; + } +// Appender append(String s) { +// target.append(s); +// return this; +// } +// public String toString() { +// return target.toString(); +// } +} \ No newline at end of file