ICU-4217 made UnicodeSet Freezable (also fixed bugs in pretty printer, an internal function)
X-SVN-Rev: 20109
This commit is contained in:
parent
106b17a85e
commit
92d404cc01
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2005, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -9,6 +9,7 @@ import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
|
||||
import com.ibm.icu.text.*;
|
||||
import com.ibm.icu.dev.test.*;
|
||||
import com.ibm.icu.impl.PrettyPrinter;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.impl.SortedSetRelation;
|
||||
import java.util.*;
|
||||
@ -146,7 +147,6 @@ public class UnicodeSetTest extends TestFmwk {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// NOTE: copied the following from Utility. There ought to be a version in there with a flag
|
||||
// that does the Java stuff
|
||||
|
||||
@ -1308,6 +1308,123 @@ public class UnicodeSetTest extends TestFmwk {
|
||||
expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
|
||||
expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that frozen classes disallow changes. For 4217
|
||||
*/
|
||||
public void TestFrozen() {
|
||||
UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
|
||||
test.freeze();
|
||||
checkModification(test, true);
|
||||
checkModification(test, false);
|
||||
}
|
||||
|
||||
public void checkModification(UnicodeSet original, boolean isFrozen) {
|
||||
main:
|
||||
for (int i = 0; ;++i) {
|
||||
UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
|
||||
boolean gotException = true;
|
||||
boolean checkEquals = true;
|
||||
try {
|
||||
switch(i) {
|
||||
case 0: test.add(0); break;
|
||||
case 1: test.add(0,1); break;
|
||||
case 2: test.add("a"); break;
|
||||
case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
|
||||
case 4: test.addAll("ab"); break;
|
||||
case 5: test.addAll(new UnicodeSet("[ab]")); break;
|
||||
case 6: test.applyIntPropertyValue(0,0); break;
|
||||
case 7: test.applyPattern("[ab]"); break;
|
||||
case 8: test.applyPattern("[ab]", true); break;
|
||||
case 9: test.applyPattern("[ab]", 0); break;
|
||||
case 10: test.applyPropertyAlias("hex","true"); break;
|
||||
case 11: test.applyPropertyAlias("hex", "true", null); break;
|
||||
case 12: test.closeOver(UnicodeSet.CASE); break;
|
||||
case 13: test.compact(); checkEquals = false; break;
|
||||
case 14: test.complement(0); break;
|
||||
case 15: test.complement(0,0); break;
|
||||
case 16: test.complement("ab"); break;
|
||||
case 17: test.complementAll("ab"); break;
|
||||
case 18: test.complementAll(new UnicodeSet("[ab]")); break;
|
||||
case 19: test.remove(' '); break;
|
||||
case 20: test.remove(' ','a'); break;
|
||||
case 21: test.remove(" "); break;
|
||||
case 22: test.removeAll(" a"); break;
|
||||
case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
|
||||
case 24: test.retain(' '); break;
|
||||
case 25: test.retain(' ','a'); break;
|
||||
case 26: test.retain(" "); break;
|
||||
case 27: test.retainAll(" a"); break;
|
||||
case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
|
||||
case 29: test.set(0,1); break;
|
||||
case 30: test.set(new UnicodeSet("[ab]")); break;
|
||||
|
||||
default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
|
||||
case 35: return;
|
||||
}
|
||||
gotException = false;
|
||||
} catch (UnsupportedOperationException e) {
|
||||
// do nothing
|
||||
}
|
||||
if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
|
||||
if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
|
||||
if (checkEquals) {
|
||||
if (test.equals(original)) {
|
||||
if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
|
||||
} else { // unequal
|
||||
if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] prettyData = {
|
||||
"[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
|
||||
"[:any:]",
|
||||
"[:whitespace:]",
|
||||
"[:linebreak=AL:]",
|
||||
};
|
||||
|
||||
public void TestPrettyPrinting() {
|
||||
PrettyPrinter pp = new PrettyPrinter();
|
||||
int i = 0;
|
||||
for (; i < prettyData.length; ++i) {
|
||||
UnicodeSet test = new UnicodeSet(prettyData[i]);
|
||||
checkPrettySet(pp, i, test);
|
||||
}
|
||||
Random random = new Random(0);
|
||||
UnicodeSet test = new UnicodeSet();
|
||||
for (; i < 1000; ++i) {
|
||||
double start = random.nextGaussian() * 0x10000;
|
||||
if (start < 0) start = - start;
|
||||
if (start > 0x10FFFF) {
|
||||
start = 0x10FFFF;
|
||||
}
|
||||
double end = random.nextGaussian() * 0x100;
|
||||
if (end < 0) end = -end;
|
||||
end = start + end;
|
||||
if (end > 0x10FFFF) {
|
||||
end = 0x10FFFF;
|
||||
}
|
||||
test.complement((int)start, (int)end);
|
||||
checkPrettySet(pp, i, test);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
|
||||
String pretty = pp.toPattern(test);
|
||||
UnicodeSet retry = new UnicodeSet(pretty);
|
||||
if (!test.equals(retry)) {
|
||||
errln(i + ". Failed test: " + test + " != " + pretty);
|
||||
} else {
|
||||
logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
|
||||
}
|
||||
}
|
||||
|
||||
private String truncate(String string) {
|
||||
if (string.length() <= 100) return string;
|
||||
return string.substring(0,97) + "...";
|
||||
}
|
||||
|
||||
public class TokenSymbolTable implements SymbolTable {
|
||||
HashMap contents = new HashMap();
|
||||
|
@ -7,10 +7,12 @@
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.SortedSet;
|
||||
|
||||
@ -328,6 +330,55 @@ public final class CollectionUtilities {
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does one string contain another, starting at a specific offset?
|
||||
* @param text
|
||||
* @param offset
|
||||
* @param other
|
||||
* @return
|
||||
*/
|
||||
public static int matchesAt(CharSequence text, int offset, CharSequence other) {
|
||||
int len = other.length();
|
||||
int i = 0;
|
||||
int j = offset;
|
||||
for (; i < len; ++i, ++j) {
|
||||
char pc = other.charAt(i);
|
||||
char tc = text.charAt(j);
|
||||
if (pc != tc) return -1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ending offset found by matching characters with testSet, until a position is found that doen't match
|
||||
* @param string
|
||||
* @param offset
|
||||
* @param testSet
|
||||
* @return
|
||||
*/
|
||||
public int span(CharSequence string, int offset, UnicodeSet testSet) {
|
||||
while (true) {
|
||||
int newOffset = testSet.matchesAt(string, offset);
|
||||
if (newOffset < 0) return offset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ending offset found by matching characters with testSet, until a position is found that does match
|
||||
* @param string
|
||||
* @param offset
|
||||
* @param testSet
|
||||
* @return
|
||||
*/
|
||||
public int spanNot(CharSequence string, int offset, UnicodeSet testSet) {
|
||||
while (true) {
|
||||
int newOffset = testSet.matchesAt(string, offset);
|
||||
if (newOffset >= 0) return offset;
|
||||
++offset; // try next character position
|
||||
// we don't have to worry about surrogates for this.
|
||||
}
|
||||
}
|
||||
|
||||
public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter,
|
||||
Comparator ordering, Comparator spaceComparator) {
|
||||
PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges);
|
||||
|
@ -25,7 +25,8 @@ import com.ibm.icu.util.ULocale;
|
||||
/** Provides more flexible formatting of UnicodeSet patterns.
|
||||
*/
|
||||
public class PrettyPrinter {
|
||||
private static UnicodeSet patternWhitespace = new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]");
|
||||
private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
|
||||
private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
|
||||
|
||||
private boolean first = true;
|
||||
private StringBuffer target = new StringBuffer();
|
||||
@ -113,27 +114,45 @@ public class PrettyPrinter {
|
||||
*/
|
||||
public String toPattern(UnicodeSet uset) {
|
||||
first = true;
|
||||
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
|
||||
// make sure that comparison separates all strings, even canonically equivalent ones
|
||||
Set orderedStrings = new TreeSet(ordering);
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) {
|
||||
orderedStrings.add(it.getString());
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
|
||||
if (it.codepoint == it.IS_STRING) {
|
||||
orderedStrings.add(it.string);
|
||||
} else {
|
||||
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
|
||||
if (!putAtEnd.contains(i)) {
|
||||
orderedStrings.add(UTF16.valueOf(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
target.setLength(0);
|
||||
target.append("[");
|
||||
for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
|
||||
appendUnicodeSetItem((String) it.next());
|
||||
}
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
|
||||
appendUnicodeSetItem(it.codepoint);
|
||||
}
|
||||
flushLast();
|
||||
target.append("]");
|
||||
String sresult = target.toString();
|
||||
UnicodeSet doubleCheck = new UnicodeSet(sresult);
|
||||
if (!uset.equals(doubleCheck)) {
|
||||
throw new IllegalStateException("Failure to round-trip in pretty-print");
|
||||
}
|
||||
|
||||
// double check the results. This can be removed once we have more tests.
|
||||
// try {
|
||||
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
|
||||
// if (!uset.equals(doubleCheck)) {
|
||||
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
|
||||
// }
|
||||
// } catch (RuntimeException e) {
|
||||
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
|
||||
// }
|
||||
return sresult;
|
||||
}
|
||||
|
||||
PrettyPrinter appendUnicodeSetItem(String s) {
|
||||
private PrettyPrinter appendUnicodeSetItem(String s) {
|
||||
int cp;
|
||||
if (UTF16.hasMoreCodePointsThan(s, 1)) {
|
||||
flushLast();
|
||||
@ -145,18 +164,21 @@ public class PrettyPrinter {
|
||||
target.append("}");
|
||||
lastString = s;
|
||||
} else {
|
||||
if (!compressRanges)
|
||||
flushLast();
|
||||
cp = UTF16.charAt(s, 0);
|
||||
if (cp == lastCodePoint + 1) {
|
||||
lastCodePoint = cp; // continue range
|
||||
} else { // start range
|
||||
flushLast();
|
||||
firstCodePoint = lastCodePoint = cp;
|
||||
}
|
||||
appendUnicodeSetItem(UTF16.charAt(s, 0));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private void appendUnicodeSetItem(int cp) {
|
||||
if (!compressRanges)
|
||||
flushLast();
|
||||
if (cp == lastCodePoint + 1) {
|
||||
lastCodePoint = cp; // continue range
|
||||
} else { // start range
|
||||
flushLast();
|
||||
firstCodePoint = lastCodePoint = cp;
|
||||
}
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@ -166,10 +188,13 @@ public class PrettyPrinter {
|
||||
} else if (spaceComp.compare(s, lastString) != 0) {
|
||||
target.append(' ');
|
||||
} else {
|
||||
int type = UCharacter.getType(UTF16.charAt(s,0));
|
||||
int cp = UTF16.charAt(s,0);
|
||||
int type = UCharacter.getType(cp);
|
||||
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
|
||||
target.append(' ');
|
||||
}
|
||||
} else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
|
||||
target.append(' '); // make sure we don't accidentally merge two surrogates
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@ import com.ibm.icu.lang.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.CollectionUtilities;
|
||||
import com.ibm.icu.impl.NormalizerImpl;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
@ -20,6 +21,7 @@ import com.ibm.icu.impl.UPropertyAliases;
|
||||
import com.ibm.icu.impl.SortedSetRelation;
|
||||
import com.ibm.icu.impl.RuleCharacterIterator;
|
||||
|
||||
import com.ibm.icu.util.Freezable;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
@ -265,7 +267,7 @@ import java.util.Collection;
|
||||
* @stable ICU 2.0
|
||||
* @see UnicodeSetIterator
|
||||
*/
|
||||
public class UnicodeSet extends UnicodeFilter {
|
||||
public class UnicodeSet extends UnicodeFilter implements Freezable {
|
||||
|
||||
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
|
||||
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
|
||||
@ -439,7 +441,9 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public Object clone() {
|
||||
return new UnicodeSet(this);
|
||||
UnicodeSet result = new UnicodeSet(this);
|
||||
result.frozen = this.frozen;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -452,6 +456,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet set(int start, int end) {
|
||||
checkFrozen();
|
||||
clear();
|
||||
complement(start, end);
|
||||
return this;
|
||||
@ -464,6 +469,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet set(UnicodeSet other) {
|
||||
checkFrozen();
|
||||
list = (int[]) other.list.clone();
|
||||
len = other.len;
|
||||
pat = other.pat;
|
||||
@ -481,6 +487,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet applyPattern(String pattern) {
|
||||
checkFrozen();
|
||||
return applyPattern(pattern, null, null, IGNORE_SPACE);
|
||||
}
|
||||
|
||||
@ -496,6 +503,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
|
||||
checkFrozen();
|
||||
return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
|
||||
}
|
||||
|
||||
@ -511,6 +519,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @internal
|
||||
*/
|
||||
public UnicodeSet applyPattern(String pattern, int options) {
|
||||
checkFrozen();
|
||||
return applyPattern(pattern, null, null, options);
|
||||
}
|
||||
|
||||
@ -908,6 +917,41 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
return maxLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. For now, an internal routine.
|
||||
* @internal
|
||||
*/
|
||||
public int matchesAt(CharSequence text, int offset) {
|
||||
int len = -1;
|
||||
strings:
|
||||
if (strings.size() != 0) {
|
||||
char firstChar = text.charAt(offset);
|
||||
String trial = null;
|
||||
// find the first string starting with firstChar
|
||||
Iterator it = strings.iterator();
|
||||
while (it.hasNext()) {
|
||||
trial = (String) it.next();
|
||||
char firstStringChar = trial.charAt(0);
|
||||
if (firstStringChar < firstChar) continue;
|
||||
if (firstStringChar > firstChar) break strings;
|
||||
}
|
||||
// now keep checking string until we get the longest one
|
||||
while (true) {
|
||||
int tempLen = CollectionUtilities.matchesAt(text, offset, trial);
|
||||
if (len > tempLen) break strings;
|
||||
len = tempLen;
|
||||
if (!it.hasNext()) break;
|
||||
trial = (String) it.next();
|
||||
}
|
||||
}
|
||||
if (len < 2) {
|
||||
int cp = UTF16.charAt(text, offset);
|
||||
if (contains(cp)) {
|
||||
len = UTF16.getCharCount(cp);
|
||||
}
|
||||
}
|
||||
return offset+len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of UnicodeMatcher API. Union the set of all
|
||||
@ -987,6 +1031,12 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet add(int start, int end) {
|
||||
checkFrozen();
|
||||
return add_unchecked(start, end);
|
||||
}
|
||||
|
||||
// for internal use, after checkFrozen has been called
|
||||
private UnicodeSet add_unchecked(int start, int end) {
|
||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||
}
|
||||
@ -1027,6 +1077,12 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet add(int c) {
|
||||
checkFrozen();
|
||||
return add_unchecked(c);
|
||||
}
|
||||
|
||||
// for internal use only, after checkFrozen has been called
|
||||
private final UnicodeSet add_unchecked(int c) {
|
||||
if (c < MIN_VALUE || c > MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
|
||||
}
|
||||
@ -1121,13 +1177,13 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet add(String s) {
|
||||
|
||||
checkFrozen();
|
||||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings.add(s);
|
||||
pat = null;
|
||||
} else {
|
||||
add(cp, cp);
|
||||
add_unchecked(cp, cp);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
@ -1160,10 +1216,11 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet addAll(String s) {
|
||||
checkFrozen();
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
add(cp, cp);
|
||||
add_unchecked(cp, cp);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
@ -1236,6 +1293,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet retain(int start, int end) {
|
||||
checkFrozen();
|
||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||
}
|
||||
@ -1299,6 +1357,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet remove(int start, int end) {
|
||||
checkFrozen();
|
||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||
}
|
||||
@ -1355,6 +1414,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet complement(int start, int end) {
|
||||
checkFrozen();
|
||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||
}
|
||||
@ -1384,6 +1444,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet complement() {
|
||||
checkFrozen();
|
||||
if (list[0] == LOW) {
|
||||
System.arraycopy(list, 1, list, 0, len-1);
|
||||
--len;
|
||||
@ -1407,6 +1468,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet complement(String s) {
|
||||
checkFrozen();
|
||||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings.contains(s)) strings.remove(s);
|
||||
@ -1838,6 +1900,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet addAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
add(c.list, c.len, 0);
|
||||
strings.addAll(c.strings);
|
||||
return this;
|
||||
@ -1854,6 +1917,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet retainAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
retain(c.list, c.len, 0);
|
||||
strings.retainAll(c.strings);
|
||||
return this;
|
||||
@ -1870,6 +1934,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet removeAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
retain(c.list, c.len, 2);
|
||||
strings.removeAll(c.strings);
|
||||
return this;
|
||||
@ -1885,6 +1950,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet complementAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
xor(c.list, c.len, 0);
|
||||
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
|
||||
return this;
|
||||
@ -1896,6 +1962,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet clear() {
|
||||
checkFrozen();
|
||||
list[0] = HIGH;
|
||||
len = 1;
|
||||
pat = null;
|
||||
@ -1946,6 +2013,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet compact() {
|
||||
checkFrozen();
|
||||
if (len != list.length) {
|
||||
int[] temp = new int[len];
|
||||
System.arraycopy(list, 0, temp, 0, len);
|
||||
@ -2195,7 +2263,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
if (op != 0) {
|
||||
syntaxError(chars, "Char expected after operator");
|
||||
}
|
||||
add(lastChar, lastChar);
|
||||
add_unchecked(lastChar, lastChar);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
lastItem = op = 0;
|
||||
}
|
||||
@ -2260,12 +2328,12 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
switch (c) {
|
||||
case ']':
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
add_unchecked(lastChar, lastChar);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
}
|
||||
// Treat final trailing '-' as a literal
|
||||
if (op == '-') {
|
||||
add(op, op);
|
||||
add_unchecked(op, op);
|
||||
pat.append(op);
|
||||
} else if (op == '&') {
|
||||
syntaxError(chars, "Trailing '&'");
|
||||
@ -2280,7 +2348,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
continue;
|
||||
} else {
|
||||
// Treat final trailing '-' as a literal
|
||||
add(c, c);
|
||||
add_unchecked(c, c);
|
||||
c = chars.next(opts);
|
||||
literal = chars.isEscaped();
|
||||
if (c == ']' && !literal) {
|
||||
@ -2304,7 +2372,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
syntaxError(chars, "Missing operand after operator");
|
||||
}
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
add_unchecked(lastChar, lastChar);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
}
|
||||
lastItem = 0;
|
||||
@ -2352,10 +2420,10 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
}
|
||||
if (anchor && op == 0) {
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
add_unchecked(lastChar, lastChar);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
}
|
||||
add(UnicodeMatcher.ETHER);
|
||||
add_unchecked(UnicodeMatcher.ETHER);
|
||||
usePat = true;
|
||||
pat.append(SymbolTable.SYMBOL_REF).append(']');
|
||||
mode = 2;
|
||||
@ -2383,13 +2451,13 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
// these are most likely typos.
|
||||
syntaxError(chars, "Invalid range");
|
||||
}
|
||||
add(lastChar, c);
|
||||
add_unchecked(lastChar, c);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
pat.append(op);
|
||||
_appendToPat(pat, c, false);
|
||||
lastItem = op = 0;
|
||||
} else {
|
||||
add(lastChar, lastChar);
|
||||
add_unchecked(lastChar, lastChar);
|
||||
_appendToPat(pat, lastChar, false);
|
||||
lastChar = c;
|
||||
}
|
||||
@ -2456,6 +2524,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public void addAll(Collection source) {
|
||||
checkFrozen();
|
||||
Iterator it = source.iterator();
|
||||
while (it.hasNext()) {
|
||||
add(it.next().toString());
|
||||
@ -2846,13 +2915,13 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
startHasProperty = ch;
|
||||
}
|
||||
} else if (startHasProperty >= 0) {
|
||||
add(startHasProperty, ch-1);
|
||||
add_unchecked(startHasProperty, ch-1);
|
||||
startHasProperty = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (startHasProperty >= 0) {
|
||||
add(startHasProperty, 0x10FFFF);
|
||||
add_unchecked(startHasProperty, 0x10FFFF);
|
||||
}
|
||||
|
||||
return this;
|
||||
@ -2914,6 +2983,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
||||
checkFrozen();
|
||||
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
||||
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
|
||||
} else {
|
||||
@ -2969,6 +3039,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
*/
|
||||
public UnicodeSet applyPropertyAlias(String propertyAlias,
|
||||
String valueAlias, SymbolTable symbols) {
|
||||
checkFrozen();
|
||||
int p;
|
||||
int v;
|
||||
boolean mustNotBeEmpty = false, invert = false;
|
||||
@ -3031,7 +3102,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
throw new IllegalArgumentException("Invalid character name");
|
||||
}
|
||||
clear();
|
||||
add(ch);
|
||||
add_unchecked(ch);
|
||||
return this;
|
||||
}
|
||||
case UProperty.AGE:
|
||||
@ -3374,6 +3445,7 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
* @internal
|
||||
*/
|
||||
public UnicodeSet closeOver(int attribute) {
|
||||
checkFrozen();
|
||||
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
|
||||
UCaseProps csp;
|
||||
try {
|
||||
@ -3470,4 +3542,40 @@ public class UnicodeSet extends UnicodeFilter {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean frozen;
|
||||
|
||||
/**
|
||||
* Is this frozen, according to the Freezable interface?
|
||||
* @return value
|
||||
*/
|
||||
public boolean isFrozen() {
|
||||
return frozen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Freeze this class, according to the Freezable interface.
|
||||
* @return this
|
||||
*/
|
||||
public Object freeze() {
|
||||
frozen = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone a thawed version of this class, according to the Freezable interface.
|
||||
* @return this
|
||||
*/
|
||||
public Object cloneAsThawed() {
|
||||
UnicodeSet result = (UnicodeSet) clone();
|
||||
result.frozen = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
// internal function
|
||||
private void checkFrozen() {
|
||||
if (frozen) {
|
||||
throw new UnsupportedOperationException("Attempt to modify frozen object");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user