ICU-4217 made UnicodeSet Freezable (also fixed bugs in pretty printer, an internal function)

X-SVN-Rev: 20109
This commit is contained in:
Mark Davis 2006-08-19 01:25:42 +00:00
parent 106b17a85e
commit 92d404cc01
4 changed files with 339 additions and 38 deletions

View File

@ -1,6 +1,6 @@
/* /*
******************************************************************************* *******************************************************************************
* Copyright (C) 1996-2005, International Business Machines Corporation and * * Copyright (C) 1996-2006, International Business Machines Corporation and *
* others. All Rights Reserved. * * others. All Rights Reserved. *
******************************************************************************* *******************************************************************************
*/ */
@ -9,6 +9,7 @@ import com.ibm.icu.lang.*;
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
import com.ibm.icu.text.*; import com.ibm.icu.text.*;
import com.ibm.icu.dev.test.*; import com.ibm.icu.dev.test.*;
import com.ibm.icu.impl.PrettyPrinter;
import com.ibm.icu.impl.Utility; import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.SortedSetRelation; import com.ibm.icu.impl.SortedSetRelation;
import java.util.*; import java.util.*;
@ -146,7 +147,6 @@ public class UnicodeSetTest extends TestFmwk {
return true; return true;
} }
// NOTE: copied the following from Utility. There ought to be a version in there with a flag // NOTE: copied the following from Utility. There ought to be a version in there with a flag
// that does the Java stuff // that does the Java stuff
@ -1308,6 +1308,123 @@ public class UnicodeSetTest extends TestFmwk {
expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"); expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]"); expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
} }
/**
* Test that frozen classes disallow changes. For 4217
*/
public void TestFrozen() {
UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
test.freeze();
checkModification(test, true);
checkModification(test, false);
}
public void checkModification(UnicodeSet original, boolean isFrozen) {
main:
for (int i = 0; ;++i) {
UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
boolean gotException = true;
boolean checkEquals = true;
try {
switch(i) {
case 0: test.add(0); break;
case 1: test.add(0,1); break;
case 2: test.add("a"); break;
case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
case 4: test.addAll("ab"); break;
case 5: test.addAll(new UnicodeSet("[ab]")); break;
case 6: test.applyIntPropertyValue(0,0); break;
case 7: test.applyPattern("[ab]"); break;
case 8: test.applyPattern("[ab]", true); break;
case 9: test.applyPattern("[ab]", 0); break;
case 10: test.applyPropertyAlias("hex","true"); break;
case 11: test.applyPropertyAlias("hex", "true", null); break;
case 12: test.closeOver(UnicodeSet.CASE); break;
case 13: test.compact(); checkEquals = false; break;
case 14: test.complement(0); break;
case 15: test.complement(0,0); break;
case 16: test.complement("ab"); break;
case 17: test.complementAll("ab"); break;
case 18: test.complementAll(new UnicodeSet("[ab]")); break;
case 19: test.remove(' '); break;
case 20: test.remove(' ','a'); break;
case 21: test.remove(" "); break;
case 22: test.removeAll(" a"); break;
case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
case 24: test.retain(' '); break;
case 25: test.retain(' ','a'); break;
case 26: test.retain(" "); break;
case 27: test.retainAll(" a"); break;
case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
case 29: test.set(0,1); break;
case 30: test.set(new UnicodeSet("[ab]")); break;
default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
case 35: return;
}
gotException = false;
} catch (UnsupportedOperationException e) {
// do nothing
}
if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
if (checkEquals) {
if (test.equals(original)) {
if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
} else { // unequal
if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
}
}
}
}
String[] prettyData = {
"[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
"[:any:]",
"[:whitespace:]",
"[:linebreak=AL:]",
};
public void TestPrettyPrinting() {
PrettyPrinter pp = new PrettyPrinter();
int i = 0;
for (; i < prettyData.length; ++i) {
UnicodeSet test = new UnicodeSet(prettyData[i]);
checkPrettySet(pp, i, test);
}
Random random = new Random(0);
UnicodeSet test = new UnicodeSet();
for (; i < 1000; ++i) {
double start = random.nextGaussian() * 0x10000;
if (start < 0) start = - start;
if (start > 0x10FFFF) {
start = 0x10FFFF;
}
double end = random.nextGaussian() * 0x100;
if (end < 0) end = -end;
end = start + end;
if (end > 0x10FFFF) {
end = 0x10FFFF;
}
test.complement((int)start, (int)end);
checkPrettySet(pp, i, test);
}
}
private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
String pretty = pp.toPattern(test);
UnicodeSet retry = new UnicodeSet(pretty);
if (!test.equals(retry)) {
errln(i + ". Failed test: " + test + " != " + pretty);
} else {
logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
}
}
private String truncate(String string) {
if (string.length() <= 100) return string;
return string.substring(0,97) + "...";
}
public class TokenSymbolTable implements SymbolTable { public class TokenSymbolTable implements SymbolTable {
HashMap contents = new HashMap(); HashMap contents = new HashMap();

View File

@ -7,10 +7,12 @@
*/ */
package com.ibm.icu.impl; package com.ibm.icu.impl;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.SortedSet; import java.util.SortedSet;
@ -328,6 +330,55 @@ public final class CollectionUtilities {
return result.toString(); return result.toString();
} }
/**
* Does one string contain another, starting at a specific offset?
* @param text
* @param offset
* @param other
* @return
*/
public static int matchesAt(CharSequence text, int offset, CharSequence other) {
int len = other.length();
int i = 0;
int j = offset;
for (; i < len; ++i, ++j) {
char pc = other.charAt(i);
char tc = text.charAt(j);
if (pc != tc) return -1;
}
return i;
}
/**
* Returns the ending offset found by matching characters with testSet, until a position is found that doen't match
* @param string
* @param offset
* @param testSet
* @return
*/
public int span(CharSequence string, int offset, UnicodeSet testSet) {
while (true) {
int newOffset = testSet.matchesAt(string, offset);
if (newOffset < 0) return offset;
}
}
/**
* Returns the ending offset found by matching characters with testSet, until a position is found that does match
* @param string
* @param offset
* @param testSet
* @return
*/
public int spanNot(CharSequence string, int offset, UnicodeSet testSet) {
while (true) {
int newOffset = testSet.matchesAt(string, offset);
if (newOffset >= 0) return offset;
++offset; // try next character position
// we don't have to worry about surrogates for this.
}
}
public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter, public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter,
Comparator ordering, Comparator spaceComparator) { Comparator ordering, Comparator spaceComparator) {
PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges); PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges);

View File

@ -25,7 +25,8 @@ import com.ibm.icu.util.ULocale;
/** Provides more flexible formatting of UnicodeSet patterns. /** Provides more flexible formatting of UnicodeSet patterns.
*/ */
public class PrettyPrinter { public class PrettyPrinter {
private static UnicodeSet patternWhitespace = new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]"); private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
private boolean first = true; private boolean first = true;
private StringBuffer target = new StringBuffer(); private StringBuffer target = new StringBuffer();
@ -113,27 +114,45 @@ public class PrettyPrinter {
*/ */
public String toPattern(UnicodeSet uset) { public String toPattern(UnicodeSet uset) {
first = true; first = true;
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
// make sure that comparison separates all strings, even canonically equivalent ones // make sure that comparison separates all strings, even canonically equivalent ones
Set orderedStrings = new TreeSet(ordering); Set orderedStrings = new TreeSet(ordering);
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) { for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
orderedStrings.add(it.getString()); if (it.codepoint == it.IS_STRING) {
orderedStrings.add(it.string);
} else {
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
if (!putAtEnd.contains(i)) {
orderedStrings.add(UTF16.valueOf(i));
}
}
}
} }
target.setLength(0); target.setLength(0);
target.append("["); target.append("[");
for (Iterator it = orderedStrings.iterator(); it.hasNext();) { for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
appendUnicodeSetItem((String) it.next()); appendUnicodeSetItem((String) it.next());
} }
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
appendUnicodeSetItem(it.codepoint);
}
flushLast(); flushLast();
target.append("]"); target.append("]");
String sresult = target.toString(); String sresult = target.toString();
UnicodeSet doubleCheck = new UnicodeSet(sresult);
if (!uset.equals(doubleCheck)) { // double check the results. This can be removed once we have more tests.
throw new IllegalStateException("Failure to round-trip in pretty-print"); // try {
} // UnicodeSet doubleCheck = new UnicodeSet(sresult);
// if (!uset.equals(doubleCheck)) {
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
// }
// } catch (RuntimeException e) {
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
// }
return sresult; return sresult;
} }
PrettyPrinter appendUnicodeSetItem(String s) { private PrettyPrinter appendUnicodeSetItem(String s) {
int cp; int cp;
if (UTF16.hasMoreCodePointsThan(s, 1)) { if (UTF16.hasMoreCodePointsThan(s, 1)) {
flushLast(); flushLast();
@ -145,18 +164,21 @@ public class PrettyPrinter {
target.append("}"); target.append("}");
lastString = s; lastString = s;
} else { } else {
if (!compressRanges) appendUnicodeSetItem(UTF16.charAt(s, 0));
flushLast();
cp = UTF16.charAt(s, 0);
if (cp == lastCodePoint + 1) {
lastCodePoint = cp; // continue range
} else { // start range
flushLast();
firstCodePoint = lastCodePoint = cp;
}
} }
return this; return this;
} }
private void appendUnicodeSetItem(int cp) {
if (!compressRanges)
flushLast();
if (cp == lastCodePoint + 1) {
lastCodePoint = cp; // continue range
} else { // start range
flushLast();
firstCodePoint = lastCodePoint = cp;
}
}
/** /**
* *
*/ */
@ -166,10 +188,13 @@ public class PrettyPrinter {
} else if (spaceComp.compare(s, lastString) != 0) { } else if (spaceComp.compare(s, lastString) != 0) {
target.append(' '); target.append(' ');
} else { } else {
int type = UCharacter.getType(UTF16.charAt(s,0)); int cp = UTF16.charAt(s,0);
int type = UCharacter.getType(cp);
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) { if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
target.append(' '); target.append(' ');
} } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
target.append(' '); // make sure we don't accidentally merge two surrogates
}
} }
} }

View File

@ -11,6 +11,7 @@ import com.ibm.icu.lang.*;
import java.io.IOException; import java.io.IOException;
import com.ibm.icu.impl.CollectionUtilities;
import com.ibm.icu.impl.NormalizerImpl; import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.Utility; import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.UCharacterProperty; import com.ibm.icu.impl.UCharacterProperty;
@ -20,6 +21,7 @@ import com.ibm.icu.impl.UPropertyAliases;
import com.ibm.icu.impl.SortedSetRelation; import com.ibm.icu.impl.SortedSetRelation;
import com.ibm.icu.impl.RuleCharacterIterator; import com.ibm.icu.impl.RuleCharacterIterator;
import com.ibm.icu.util.Freezable;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.VersionInfo; import com.ibm.icu.util.VersionInfo;
@ -265,7 +267,7 @@ import java.util.Collection;
* @stable ICU 2.0 * @stable ICU 2.0
* @see UnicodeSetIterator * @see UnicodeSetIterator
*/ */
public class UnicodeSet extends UnicodeFilter { public class UnicodeSet extends UnicodeFilter implements Freezable {
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units. private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
@ -439,7 +441,9 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public Object clone() { public Object clone() {
return new UnicodeSet(this); UnicodeSet result = new UnicodeSet(this);
result.frozen = this.frozen;
return result;
} }
/** /**
@ -452,6 +456,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet set(int start, int end) { public UnicodeSet set(int start, int end) {
checkFrozen();
clear(); clear();
complement(start, end); complement(start, end);
return this; return this;
@ -464,6 +469,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet set(UnicodeSet other) { public UnicodeSet set(UnicodeSet other) {
checkFrozen();
list = (int[]) other.list.clone(); list = (int[]) other.list.clone();
len = other.len; len = other.len;
pat = other.pat; pat = other.pat;
@ -481,6 +487,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public final UnicodeSet applyPattern(String pattern) { public final UnicodeSet applyPattern(String pattern) {
checkFrozen();
return applyPattern(pattern, null, null, IGNORE_SPACE); return applyPattern(pattern, null, null, IGNORE_SPACE);
} }
@ -496,6 +503,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) { public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
checkFrozen();
return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0); return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
} }
@ -511,6 +519,7 @@ public class UnicodeSet extends UnicodeFilter {
* @internal * @internal
*/ */
public UnicodeSet applyPattern(String pattern, int options) { public UnicodeSet applyPattern(String pattern, int options) {
checkFrozen();
return applyPattern(pattern, null, null, options); return applyPattern(pattern, null, null, options);
} }
@ -908,6 +917,41 @@ public class UnicodeSet extends UnicodeFilter {
return maxLen; return maxLen;
} }
/**
* Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. For now, an internal routine.
* @internal
*/
public int matchesAt(CharSequence text, int offset) {
int len = -1;
strings:
if (strings.size() != 0) {
char firstChar = text.charAt(offset);
String trial = null;
// find the first string starting with firstChar
Iterator it = strings.iterator();
while (it.hasNext()) {
trial = (String) it.next();
char firstStringChar = trial.charAt(0);
if (firstStringChar < firstChar) continue;
if (firstStringChar > firstChar) break strings;
}
// now keep checking string until we get the longest one
while (true) {
int tempLen = CollectionUtilities.matchesAt(text, offset, trial);
if (len > tempLen) break strings;
len = tempLen;
if (!it.hasNext()) break;
trial = (String) it.next();
}
}
if (len < 2) {
int cp = UTF16.charAt(text, offset);
if (contains(cp)) {
len = UTF16.getCharCount(cp);
}
}
return offset+len;
}
/** /**
* Implementation of UnicodeMatcher API. Union the set of all * Implementation of UnicodeMatcher API. Union the set of all
@ -987,6 +1031,12 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet add(int start, int end) { public UnicodeSet add(int start, int end) {
checkFrozen();
return add_unchecked(start, end);
}
// for internal use, after checkFrozen has been called
private UnicodeSet add_unchecked(int start, int end) {
if (start < MIN_VALUE || start > MAX_VALUE) { if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
} }
@ -1027,6 +1077,12 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public final UnicodeSet add(int c) { public final UnicodeSet add(int c) {
checkFrozen();
return add_unchecked(c);
}
// for internal use only, after checkFrozen has been called
private final UnicodeSet add_unchecked(int c) {
if (c < MIN_VALUE || c > MAX_VALUE) { if (c < MIN_VALUE || c > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6)); throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
} }
@ -1121,13 +1177,13 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public final UnicodeSet add(String s) { public final UnicodeSet add(String s) {
checkFrozen();
int cp = getSingleCP(s); int cp = getSingleCP(s);
if (cp < 0) { if (cp < 0) {
strings.add(s); strings.add(s);
pat = null; pat = null;
} else { } else {
add(cp, cp); add_unchecked(cp, cp);
} }
return this; return this;
} }
@ -1160,10 +1216,11 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public final UnicodeSet addAll(String s) { public final UnicodeSet addAll(String s) {
checkFrozen();
int cp; int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i); cp = UTF16.charAt(s, i);
add(cp, cp); add_unchecked(cp, cp);
} }
return this; return this;
} }
@ -1236,6 +1293,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet retain(int start, int end) { public UnicodeSet retain(int start, int end) {
checkFrozen();
if (start < MIN_VALUE || start > MAX_VALUE) { if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
} }
@ -1299,6 +1357,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet remove(int start, int end) { public UnicodeSet remove(int start, int end) {
checkFrozen();
if (start < MIN_VALUE || start > MAX_VALUE) { if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
} }
@ -1355,6 +1414,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet complement(int start, int end) { public UnicodeSet complement(int start, int end) {
checkFrozen();
if (start < MIN_VALUE || start > MAX_VALUE) { if (start < MIN_VALUE || start > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
} }
@ -1384,6 +1444,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet complement() { public UnicodeSet complement() {
checkFrozen();
if (list[0] == LOW) { if (list[0] == LOW) {
System.arraycopy(list, 1, list, 0, len-1); System.arraycopy(list, 1, list, 0, len-1);
--len; --len;
@ -1407,6 +1468,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public final UnicodeSet complement(String s) { public final UnicodeSet complement(String s) {
checkFrozen();
int cp = getSingleCP(s); int cp = getSingleCP(s);
if (cp < 0) { if (cp < 0) {
if (strings.contains(s)) strings.remove(s); if (strings.contains(s)) strings.remove(s);
@ -1838,6 +1900,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet addAll(UnicodeSet c) { public UnicodeSet addAll(UnicodeSet c) {
checkFrozen();
add(c.list, c.len, 0); add(c.list, c.len, 0);
strings.addAll(c.strings); strings.addAll(c.strings);
return this; return this;
@ -1854,6 +1917,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet retainAll(UnicodeSet c) { public UnicodeSet retainAll(UnicodeSet c) {
checkFrozen();
retain(c.list, c.len, 0); retain(c.list, c.len, 0);
strings.retainAll(c.strings); strings.retainAll(c.strings);
return this; return this;
@ -1870,6 +1934,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet removeAll(UnicodeSet c) { public UnicodeSet removeAll(UnicodeSet c) {
checkFrozen();
retain(c.list, c.len, 2); retain(c.list, c.len, 2);
strings.removeAll(c.strings); strings.removeAll(c.strings);
return this; return this;
@ -1885,6 +1950,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet complementAll(UnicodeSet c) { public UnicodeSet complementAll(UnicodeSet c) {
checkFrozen();
xor(c.list, c.len, 0); xor(c.list, c.len, 0);
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings); SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
return this; return this;
@ -1896,6 +1962,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet clear() { public UnicodeSet clear() {
checkFrozen();
list[0] = HIGH; list[0] = HIGH;
len = 1; len = 1;
pat = null; pat = null;
@ -1946,6 +2013,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public UnicodeSet compact() { public UnicodeSet compact() {
checkFrozen();
if (len != list.length) { if (len != list.length) {
int[] temp = new int[len]; int[] temp = new int[len];
System.arraycopy(list, 0, temp, 0, len); System.arraycopy(list, 0, temp, 0, len);
@ -2195,7 +2263,7 @@ public class UnicodeSet extends UnicodeFilter {
if (op != 0) { if (op != 0) {
syntaxError(chars, "Char expected after operator"); syntaxError(chars, "Char expected after operator");
} }
add(lastChar, lastChar); add_unchecked(lastChar, lastChar);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
lastItem = op = 0; lastItem = op = 0;
} }
@ -2260,12 +2328,12 @@ public class UnicodeSet extends UnicodeFilter {
switch (c) { switch (c) {
case ']': case ']':
if (lastItem == 1) { if (lastItem == 1) {
add(lastChar, lastChar); add_unchecked(lastChar, lastChar);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
} }
// Treat final trailing '-' as a literal // Treat final trailing '-' as a literal
if (op == '-') { if (op == '-') {
add(op, op); add_unchecked(op, op);
pat.append(op); pat.append(op);
} else if (op == '&') { } else if (op == '&') {
syntaxError(chars, "Trailing '&'"); syntaxError(chars, "Trailing '&'");
@ -2280,7 +2348,7 @@ public class UnicodeSet extends UnicodeFilter {
continue; continue;
} else { } else {
// Treat final trailing '-' as a literal // Treat final trailing '-' as a literal
add(c, c); add_unchecked(c, c);
c = chars.next(opts); c = chars.next(opts);
literal = chars.isEscaped(); literal = chars.isEscaped();
if (c == ']' && !literal) { if (c == ']' && !literal) {
@ -2304,7 +2372,7 @@ public class UnicodeSet extends UnicodeFilter {
syntaxError(chars, "Missing operand after operator"); syntaxError(chars, "Missing operand after operator");
} }
if (lastItem == 1) { if (lastItem == 1) {
add(lastChar, lastChar); add_unchecked(lastChar, lastChar);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
} }
lastItem = 0; lastItem = 0;
@ -2352,10 +2420,10 @@ public class UnicodeSet extends UnicodeFilter {
} }
if (anchor && op == 0) { if (anchor && op == 0) {
if (lastItem == 1) { if (lastItem == 1) {
add(lastChar, lastChar); add_unchecked(lastChar, lastChar);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
} }
add(UnicodeMatcher.ETHER); add_unchecked(UnicodeMatcher.ETHER);
usePat = true; usePat = true;
pat.append(SymbolTable.SYMBOL_REF).append(']'); pat.append(SymbolTable.SYMBOL_REF).append(']');
mode = 2; mode = 2;
@ -2383,13 +2451,13 @@ public class UnicodeSet extends UnicodeFilter {
// these are most likely typos. // these are most likely typos.
syntaxError(chars, "Invalid range"); syntaxError(chars, "Invalid range");
} }
add(lastChar, c); add_unchecked(lastChar, c);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
pat.append(op); pat.append(op);
_appendToPat(pat, c, false); _appendToPat(pat, c, false);
lastItem = op = 0; lastItem = op = 0;
} else { } else {
add(lastChar, lastChar); add_unchecked(lastChar, lastChar);
_appendToPat(pat, lastChar, false); _appendToPat(pat, lastChar, false);
lastChar = c; lastChar = c;
} }
@ -2456,6 +2524,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.8 * @stable ICU 2.8
*/ */
public void addAll(Collection source) { public void addAll(Collection source) {
checkFrozen();
Iterator it = source.iterator(); Iterator it = source.iterator();
while (it.hasNext()) { while (it.hasNext()) {
add(it.next().toString()); add(it.next().toString());
@ -2846,13 +2915,13 @@ public class UnicodeSet extends UnicodeFilter {
startHasProperty = ch; startHasProperty = ch;
} }
} else if (startHasProperty >= 0) { } else if (startHasProperty >= 0) {
add(startHasProperty, ch-1); add_unchecked(startHasProperty, ch-1);
startHasProperty = -1; startHasProperty = -1;
} }
} }
} }
if (startHasProperty >= 0) { if (startHasProperty >= 0) {
add(startHasProperty, 0x10FFFF); add_unchecked(startHasProperty, 0x10FFFF);
} }
return this; return this;
@ -2914,6 +2983,7 @@ public class UnicodeSet extends UnicodeFilter {
* @stable ICU 2.4 * @stable ICU 2.4
*/ */
public UnicodeSet applyIntPropertyValue(int prop, int value) { public UnicodeSet applyIntPropertyValue(int prop, int value) {
checkFrozen();
if (prop == UProperty.GENERAL_CATEGORY_MASK) { if (prop == UProperty.GENERAL_CATEGORY_MASK) {
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR); applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
} else { } else {
@ -2969,6 +3039,7 @@ public class UnicodeSet extends UnicodeFilter {
*/ */
public UnicodeSet applyPropertyAlias(String propertyAlias, public UnicodeSet applyPropertyAlias(String propertyAlias,
String valueAlias, SymbolTable symbols) { String valueAlias, SymbolTable symbols) {
checkFrozen();
int p; int p;
int v; int v;
boolean mustNotBeEmpty = false, invert = false; boolean mustNotBeEmpty = false, invert = false;
@ -3031,7 +3102,7 @@ public class UnicodeSet extends UnicodeFilter {
throw new IllegalArgumentException("Invalid character name"); throw new IllegalArgumentException("Invalid character name");
} }
clear(); clear();
add(ch); add_unchecked(ch);
return this; return this;
} }
case UProperty.AGE: case UProperty.AGE:
@ -3374,6 +3445,7 @@ public class UnicodeSet extends UnicodeFilter {
* @internal * @internal
*/ */
public UnicodeSet closeOver(int attribute) { public UnicodeSet closeOver(int attribute) {
checkFrozen();
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) { if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
UCaseProps csp; UCaseProps csp;
try { try {
@ -3470,4 +3542,40 @@ public class UnicodeSet extends UnicodeFilter {
return null; return null;
} }
} }
private boolean frozen;
/**
* Is this frozen, according to the Freezable interface?
* @return value
*/
public boolean isFrozen() {
return frozen;
}
/**
* Freeze this class, according to the Freezable interface.
* @return this
*/
public Object freeze() {
frozen = true;
return this;
}
/**
* Clone a thawed version of this class, according to the Freezable interface.
* @return this
*/
public Object cloneAsThawed() {
UnicodeSet result = (UnicodeSet) clone();
result.frozen = false;
return result;
}
// internal function
private void checkFrozen() {
if (frozen) {
throw new UnsupportedOperationException("Attempt to modify frozen object");
}
}
} }