ICU-4217 made UnicodeSet Freezable (also fixed bugs in pretty printer, an internal function)
X-SVN-Rev: 20109
This commit is contained in:
parent
106b17a85e
commit
92d404cc01
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
* Copyright (C) 1996-2005, International Business Machines Corporation and *
|
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||||||
* others. All Rights Reserved. *
|
* others. All Rights Reserved. *
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -9,6 +9,7 @@ import com.ibm.icu.lang.*;
|
|||||||
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
|
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
|
||||||
import com.ibm.icu.text.*;
|
import com.ibm.icu.text.*;
|
||||||
import com.ibm.icu.dev.test.*;
|
import com.ibm.icu.dev.test.*;
|
||||||
|
import com.ibm.icu.impl.PrettyPrinter;
|
||||||
import com.ibm.icu.impl.Utility;
|
import com.ibm.icu.impl.Utility;
|
||||||
import com.ibm.icu.impl.SortedSetRelation;
|
import com.ibm.icu.impl.SortedSetRelation;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@ -146,7 +147,6 @@ public class UnicodeSetTest extends TestFmwk {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// NOTE: copied the following from Utility. There ought to be a version in there with a flag
|
// NOTE: copied the following from Utility. There ought to be a version in there with a flag
|
||||||
// that does the Java stuff
|
// that does the Java stuff
|
||||||
|
|
||||||
@ -1308,6 +1308,123 @@ public class UnicodeSetTest extends TestFmwk {
|
|||||||
expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
|
expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
|
||||||
expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
|
expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that frozen classes disallow changes. For 4217
|
||||||
|
*/
|
||||||
|
public void TestFrozen() {
|
||||||
|
UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
|
||||||
|
test.freeze();
|
||||||
|
checkModification(test, true);
|
||||||
|
checkModification(test, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkModification(UnicodeSet original, boolean isFrozen) {
|
||||||
|
main:
|
||||||
|
for (int i = 0; ;++i) {
|
||||||
|
UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
|
||||||
|
boolean gotException = true;
|
||||||
|
boolean checkEquals = true;
|
||||||
|
try {
|
||||||
|
switch(i) {
|
||||||
|
case 0: test.add(0); break;
|
||||||
|
case 1: test.add(0,1); break;
|
||||||
|
case 2: test.add("a"); break;
|
||||||
|
case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
|
||||||
|
case 4: test.addAll("ab"); break;
|
||||||
|
case 5: test.addAll(new UnicodeSet("[ab]")); break;
|
||||||
|
case 6: test.applyIntPropertyValue(0,0); break;
|
||||||
|
case 7: test.applyPattern("[ab]"); break;
|
||||||
|
case 8: test.applyPattern("[ab]", true); break;
|
||||||
|
case 9: test.applyPattern("[ab]", 0); break;
|
||||||
|
case 10: test.applyPropertyAlias("hex","true"); break;
|
||||||
|
case 11: test.applyPropertyAlias("hex", "true", null); break;
|
||||||
|
case 12: test.closeOver(UnicodeSet.CASE); break;
|
||||||
|
case 13: test.compact(); checkEquals = false; break;
|
||||||
|
case 14: test.complement(0); break;
|
||||||
|
case 15: test.complement(0,0); break;
|
||||||
|
case 16: test.complement("ab"); break;
|
||||||
|
case 17: test.complementAll("ab"); break;
|
||||||
|
case 18: test.complementAll(new UnicodeSet("[ab]")); break;
|
||||||
|
case 19: test.remove(' '); break;
|
||||||
|
case 20: test.remove(' ','a'); break;
|
||||||
|
case 21: test.remove(" "); break;
|
||||||
|
case 22: test.removeAll(" a"); break;
|
||||||
|
case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
|
||||||
|
case 24: test.retain(' '); break;
|
||||||
|
case 25: test.retain(' ','a'); break;
|
||||||
|
case 26: test.retain(" "); break;
|
||||||
|
case 27: test.retainAll(" a"); break;
|
||||||
|
case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
|
||||||
|
case 29: test.set(0,1); break;
|
||||||
|
case 30: test.set(new UnicodeSet("[ab]")); break;
|
||||||
|
|
||||||
|
default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
|
||||||
|
case 35: return;
|
||||||
|
}
|
||||||
|
gotException = false;
|
||||||
|
} catch (UnsupportedOperationException e) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
|
||||||
|
if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
|
||||||
|
if (checkEquals) {
|
||||||
|
if (test.equals(original)) {
|
||||||
|
if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
|
||||||
|
} else { // unequal
|
||||||
|
if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] prettyData = {
|
||||||
|
"[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
|
||||||
|
"[:any:]",
|
||||||
|
"[:whitespace:]",
|
||||||
|
"[:linebreak=AL:]",
|
||||||
|
};
|
||||||
|
|
||||||
|
public void TestPrettyPrinting() {
|
||||||
|
PrettyPrinter pp = new PrettyPrinter();
|
||||||
|
int i = 0;
|
||||||
|
for (; i < prettyData.length; ++i) {
|
||||||
|
UnicodeSet test = new UnicodeSet(prettyData[i]);
|
||||||
|
checkPrettySet(pp, i, test);
|
||||||
|
}
|
||||||
|
Random random = new Random(0);
|
||||||
|
UnicodeSet test = new UnicodeSet();
|
||||||
|
for (; i < 1000; ++i) {
|
||||||
|
double start = random.nextGaussian() * 0x10000;
|
||||||
|
if (start < 0) start = - start;
|
||||||
|
if (start > 0x10FFFF) {
|
||||||
|
start = 0x10FFFF;
|
||||||
|
}
|
||||||
|
double end = random.nextGaussian() * 0x100;
|
||||||
|
if (end < 0) end = -end;
|
||||||
|
end = start + end;
|
||||||
|
if (end > 0x10FFFF) {
|
||||||
|
end = 0x10FFFF;
|
||||||
|
}
|
||||||
|
test.complement((int)start, (int)end);
|
||||||
|
checkPrettySet(pp, i, test);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
|
||||||
|
String pretty = pp.toPattern(test);
|
||||||
|
UnicodeSet retry = new UnicodeSet(pretty);
|
||||||
|
if (!test.equals(retry)) {
|
||||||
|
errln(i + ". Failed test: " + test + " != " + pretty);
|
||||||
|
} else {
|
||||||
|
logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String truncate(String string) {
|
||||||
|
if (string.length() <= 100) return string;
|
||||||
|
return string.substring(0,97) + "...";
|
||||||
|
}
|
||||||
|
|
||||||
public class TokenSymbolTable implements SymbolTable {
|
public class TokenSymbolTable implements SymbolTable {
|
||||||
HashMap contents = new HashMap();
|
HashMap contents = new HashMap();
|
||||||
|
@ -7,10 +7,12 @@
|
|||||||
*/
|
*/
|
||||||
package com.ibm.icu.impl;
|
package com.ibm.icu.impl;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
|
|
||||||
@ -328,6 +330,55 @@ public final class CollectionUtilities {
|
|||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does one string contain another, starting at a specific offset?
|
||||||
|
* @param text
|
||||||
|
* @param offset
|
||||||
|
* @param other
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static int matchesAt(CharSequence text, int offset, CharSequence other) {
|
||||||
|
int len = other.length();
|
||||||
|
int i = 0;
|
||||||
|
int j = offset;
|
||||||
|
for (; i < len; ++i, ++j) {
|
||||||
|
char pc = other.charAt(i);
|
||||||
|
char tc = text.charAt(j);
|
||||||
|
if (pc != tc) return -1;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the ending offset found by matching characters with testSet, until a position is found that doen't match
|
||||||
|
* @param string
|
||||||
|
* @param offset
|
||||||
|
* @param testSet
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int span(CharSequence string, int offset, UnicodeSet testSet) {
|
||||||
|
while (true) {
|
||||||
|
int newOffset = testSet.matchesAt(string, offset);
|
||||||
|
if (newOffset < 0) return offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the ending offset found by matching characters with testSet, until a position is found that does match
|
||||||
|
* @param string
|
||||||
|
* @param offset
|
||||||
|
* @param testSet
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int spanNot(CharSequence string, int offset, UnicodeSet testSet) {
|
||||||
|
while (true) {
|
||||||
|
int newOffset = testSet.matchesAt(string, offset);
|
||||||
|
if (newOffset >= 0) return offset;
|
||||||
|
++offset; // try next character position
|
||||||
|
// we don't have to worry about surrogates for this.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter,
|
public static String prettyPrint(UnicodeSet uset, boolean compressRanges, UnicodeSet toQuote, Transliterator quoter,
|
||||||
Comparator ordering, Comparator spaceComparator) {
|
Comparator ordering, Comparator spaceComparator) {
|
||||||
PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges);
|
PrettyPrinter pp = new PrettyPrinter().setCompressRanges(compressRanges);
|
||||||
|
@ -25,7 +25,8 @@ import com.ibm.icu.util.ULocale;
|
|||||||
/** Provides more flexible formatting of UnicodeSet patterns.
|
/** Provides more flexible formatting of UnicodeSet patterns.
|
||||||
*/
|
*/
|
||||||
public class PrettyPrinter {
|
public class PrettyPrinter {
|
||||||
private static UnicodeSet patternWhitespace = new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]");
|
private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
|
||||||
|
private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
|
||||||
|
|
||||||
private boolean first = true;
|
private boolean first = true;
|
||||||
private StringBuffer target = new StringBuffer();
|
private StringBuffer target = new StringBuffer();
|
||||||
@ -113,27 +114,45 @@ public class PrettyPrinter {
|
|||||||
*/
|
*/
|
||||||
public String toPattern(UnicodeSet uset) {
|
public String toPattern(UnicodeSet uset) {
|
||||||
first = true;
|
first = true;
|
||||||
|
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
|
||||||
// make sure that comparison separates all strings, even canonically equivalent ones
|
// make sure that comparison separates all strings, even canonically equivalent ones
|
||||||
Set orderedStrings = new TreeSet(ordering);
|
Set orderedStrings = new TreeSet(ordering);
|
||||||
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) {
|
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
|
||||||
orderedStrings.add(it.getString());
|
if (it.codepoint == it.IS_STRING) {
|
||||||
|
orderedStrings.add(it.string);
|
||||||
|
} else {
|
||||||
|
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
|
||||||
|
if (!putAtEnd.contains(i)) {
|
||||||
|
orderedStrings.add(UTF16.valueOf(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
target.setLength(0);
|
target.setLength(0);
|
||||||
target.append("[");
|
target.append("[");
|
||||||
for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
|
for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
|
||||||
appendUnicodeSetItem((String) it.next());
|
appendUnicodeSetItem((String) it.next());
|
||||||
}
|
}
|
||||||
|
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
|
||||||
|
appendUnicodeSetItem(it.codepoint);
|
||||||
|
}
|
||||||
flushLast();
|
flushLast();
|
||||||
target.append("]");
|
target.append("]");
|
||||||
String sresult = target.toString();
|
String sresult = target.toString();
|
||||||
UnicodeSet doubleCheck = new UnicodeSet(sresult);
|
|
||||||
if (!uset.equals(doubleCheck)) {
|
// double check the results. This can be removed once we have more tests.
|
||||||
throw new IllegalStateException("Failure to round-trip in pretty-print");
|
// try {
|
||||||
}
|
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
|
||||||
|
// if (!uset.equals(doubleCheck)) {
|
||||||
|
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
|
||||||
|
// }
|
||||||
|
// } catch (RuntimeException e) {
|
||||||
|
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
|
||||||
|
// }
|
||||||
return sresult;
|
return sresult;
|
||||||
}
|
}
|
||||||
|
|
||||||
PrettyPrinter appendUnicodeSetItem(String s) {
|
private PrettyPrinter appendUnicodeSetItem(String s) {
|
||||||
int cp;
|
int cp;
|
||||||
if (UTF16.hasMoreCodePointsThan(s, 1)) {
|
if (UTF16.hasMoreCodePointsThan(s, 1)) {
|
||||||
flushLast();
|
flushLast();
|
||||||
@ -145,18 +164,21 @@ public class PrettyPrinter {
|
|||||||
target.append("}");
|
target.append("}");
|
||||||
lastString = s;
|
lastString = s;
|
||||||
} else {
|
} else {
|
||||||
if (!compressRanges)
|
appendUnicodeSetItem(UTF16.charAt(s, 0));
|
||||||
flushLast();
|
|
||||||
cp = UTF16.charAt(s, 0);
|
|
||||||
if (cp == lastCodePoint + 1) {
|
|
||||||
lastCodePoint = cp; // continue range
|
|
||||||
} else { // start range
|
|
||||||
flushLast();
|
|
||||||
firstCodePoint = lastCodePoint = cp;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void appendUnicodeSetItem(int cp) {
|
||||||
|
if (!compressRanges)
|
||||||
|
flushLast();
|
||||||
|
if (cp == lastCodePoint + 1) {
|
||||||
|
lastCodePoint = cp; // continue range
|
||||||
|
} else { // start range
|
||||||
|
flushLast();
|
||||||
|
firstCodePoint = lastCodePoint = cp;
|
||||||
|
}
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -166,10 +188,13 @@ public class PrettyPrinter {
|
|||||||
} else if (spaceComp.compare(s, lastString) != 0) {
|
} else if (spaceComp.compare(s, lastString) != 0) {
|
||||||
target.append(' ');
|
target.append(' ');
|
||||||
} else {
|
} else {
|
||||||
int type = UCharacter.getType(UTF16.charAt(s,0));
|
int cp = UTF16.charAt(s,0);
|
||||||
|
int type = UCharacter.getType(cp);
|
||||||
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
|
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
|
||||||
target.append(' ');
|
target.append(' ');
|
||||||
}
|
} else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
|
||||||
|
target.append(' '); // make sure we don't accidentally merge two surrogates
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ import com.ibm.icu.lang.*;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.ibm.icu.impl.CollectionUtilities;
|
||||||
import com.ibm.icu.impl.NormalizerImpl;
|
import com.ibm.icu.impl.NormalizerImpl;
|
||||||
import com.ibm.icu.impl.Utility;
|
import com.ibm.icu.impl.Utility;
|
||||||
import com.ibm.icu.impl.UCharacterProperty;
|
import com.ibm.icu.impl.UCharacterProperty;
|
||||||
@ -20,6 +21,7 @@ import com.ibm.icu.impl.UPropertyAliases;
|
|||||||
import com.ibm.icu.impl.SortedSetRelation;
|
import com.ibm.icu.impl.SortedSetRelation;
|
||||||
import com.ibm.icu.impl.RuleCharacterIterator;
|
import com.ibm.icu.impl.RuleCharacterIterator;
|
||||||
|
|
||||||
|
import com.ibm.icu.util.Freezable;
|
||||||
import com.ibm.icu.util.ULocale;
|
import com.ibm.icu.util.ULocale;
|
||||||
import com.ibm.icu.util.VersionInfo;
|
import com.ibm.icu.util.VersionInfo;
|
||||||
|
|
||||||
@ -265,7 +267,7 @@ import java.util.Collection;
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
* @see UnicodeSetIterator
|
* @see UnicodeSetIterator
|
||||||
*/
|
*/
|
||||||
public class UnicodeSet extends UnicodeFilter {
|
public class UnicodeSet extends UnicodeFilter implements Freezable {
|
||||||
|
|
||||||
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
|
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
|
||||||
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
|
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
|
||||||
@ -439,7 +441,9 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
return new UnicodeSet(this);
|
UnicodeSet result = new UnicodeSet(this);
|
||||||
|
result.frozen = this.frozen;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -452,6 +456,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet set(int start, int end) {
|
public UnicodeSet set(int start, int end) {
|
||||||
|
checkFrozen();
|
||||||
clear();
|
clear();
|
||||||
complement(start, end);
|
complement(start, end);
|
||||||
return this;
|
return this;
|
||||||
@ -464,6 +469,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet set(UnicodeSet other) {
|
public UnicodeSet set(UnicodeSet other) {
|
||||||
|
checkFrozen();
|
||||||
list = (int[]) other.list.clone();
|
list = (int[]) other.list.clone();
|
||||||
len = other.len;
|
len = other.len;
|
||||||
pat = other.pat;
|
pat = other.pat;
|
||||||
@ -481,6 +487,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public final UnicodeSet applyPattern(String pattern) {
|
public final UnicodeSet applyPattern(String pattern) {
|
||||||
|
checkFrozen();
|
||||||
return applyPattern(pattern, null, null, IGNORE_SPACE);
|
return applyPattern(pattern, null, null, IGNORE_SPACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -496,6 +503,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
|
public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) {
|
||||||
|
checkFrozen();
|
||||||
return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
|
return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -511,6 +519,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
public UnicodeSet applyPattern(String pattern, int options) {
|
public UnicodeSet applyPattern(String pattern, int options) {
|
||||||
|
checkFrozen();
|
||||||
return applyPattern(pattern, null, null, options);
|
return applyPattern(pattern, null, null, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -908,6 +917,41 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
return maxLen;
|
return maxLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. For now, an internal routine.
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
public int matchesAt(CharSequence text, int offset) {
|
||||||
|
int len = -1;
|
||||||
|
strings:
|
||||||
|
if (strings.size() != 0) {
|
||||||
|
char firstChar = text.charAt(offset);
|
||||||
|
String trial = null;
|
||||||
|
// find the first string starting with firstChar
|
||||||
|
Iterator it = strings.iterator();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
trial = (String) it.next();
|
||||||
|
char firstStringChar = trial.charAt(0);
|
||||||
|
if (firstStringChar < firstChar) continue;
|
||||||
|
if (firstStringChar > firstChar) break strings;
|
||||||
|
}
|
||||||
|
// now keep checking string until we get the longest one
|
||||||
|
while (true) {
|
||||||
|
int tempLen = CollectionUtilities.matchesAt(text, offset, trial);
|
||||||
|
if (len > tempLen) break strings;
|
||||||
|
len = tempLen;
|
||||||
|
if (!it.hasNext()) break;
|
||||||
|
trial = (String) it.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (len < 2) {
|
||||||
|
int cp = UTF16.charAt(text, offset);
|
||||||
|
if (contains(cp)) {
|
||||||
|
len = UTF16.getCharCount(cp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return offset+len;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implementation of UnicodeMatcher API. Union the set of all
|
* Implementation of UnicodeMatcher API. Union the set of all
|
||||||
@ -987,6 +1031,12 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet add(int start, int end) {
|
public UnicodeSet add(int start, int end) {
|
||||||
|
checkFrozen();
|
||||||
|
return add_unchecked(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
// for internal use, after checkFrozen has been called
|
||||||
|
private UnicodeSet add_unchecked(int start, int end) {
|
||||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||||
}
|
}
|
||||||
@ -1027,6 +1077,12 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public final UnicodeSet add(int c) {
|
public final UnicodeSet add(int c) {
|
||||||
|
checkFrozen();
|
||||||
|
return add_unchecked(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// for internal use only, after checkFrozen has been called
|
||||||
|
private final UnicodeSet add_unchecked(int c) {
|
||||||
if (c < MIN_VALUE || c > MAX_VALUE) {
|
if (c < MIN_VALUE || c > MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
|
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
|
||||||
}
|
}
|
||||||
@ -1121,13 +1177,13 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public final UnicodeSet add(String s) {
|
public final UnicodeSet add(String s) {
|
||||||
|
checkFrozen();
|
||||||
int cp = getSingleCP(s);
|
int cp = getSingleCP(s);
|
||||||
if (cp < 0) {
|
if (cp < 0) {
|
||||||
strings.add(s);
|
strings.add(s);
|
||||||
pat = null;
|
pat = null;
|
||||||
} else {
|
} else {
|
||||||
add(cp, cp);
|
add_unchecked(cp, cp);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@ -1160,10 +1216,11 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public final UnicodeSet addAll(String s) {
|
public final UnicodeSet addAll(String s) {
|
||||||
|
checkFrozen();
|
||||||
int cp;
|
int cp;
|
||||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||||
cp = UTF16.charAt(s, i);
|
cp = UTF16.charAt(s, i);
|
||||||
add(cp, cp);
|
add_unchecked(cp, cp);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@ -1236,6 +1293,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet retain(int start, int end) {
|
public UnicodeSet retain(int start, int end) {
|
||||||
|
checkFrozen();
|
||||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||||
}
|
}
|
||||||
@ -1299,6 +1357,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet remove(int start, int end) {
|
public UnicodeSet remove(int start, int end) {
|
||||||
|
checkFrozen();
|
||||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||||
}
|
}
|
||||||
@ -1355,6 +1414,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet complement(int start, int end) {
|
public UnicodeSet complement(int start, int end) {
|
||||||
|
checkFrozen();
|
||||||
if (start < MIN_VALUE || start > MAX_VALUE) {
|
if (start < MIN_VALUE || start > MAX_VALUE) {
|
||||||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
|
||||||
}
|
}
|
||||||
@ -1384,6 +1444,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet complement() {
|
public UnicodeSet complement() {
|
||||||
|
checkFrozen();
|
||||||
if (list[0] == LOW) {
|
if (list[0] == LOW) {
|
||||||
System.arraycopy(list, 1, list, 0, len-1);
|
System.arraycopy(list, 1, list, 0, len-1);
|
||||||
--len;
|
--len;
|
||||||
@ -1407,6 +1468,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public final UnicodeSet complement(String s) {
|
public final UnicodeSet complement(String s) {
|
||||||
|
checkFrozen();
|
||||||
int cp = getSingleCP(s);
|
int cp = getSingleCP(s);
|
||||||
if (cp < 0) {
|
if (cp < 0) {
|
||||||
if (strings.contains(s)) strings.remove(s);
|
if (strings.contains(s)) strings.remove(s);
|
||||||
@ -1838,6 +1900,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet addAll(UnicodeSet c) {
|
public UnicodeSet addAll(UnicodeSet c) {
|
||||||
|
checkFrozen();
|
||||||
add(c.list, c.len, 0);
|
add(c.list, c.len, 0);
|
||||||
strings.addAll(c.strings);
|
strings.addAll(c.strings);
|
||||||
return this;
|
return this;
|
||||||
@ -1854,6 +1917,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet retainAll(UnicodeSet c) {
|
public UnicodeSet retainAll(UnicodeSet c) {
|
||||||
|
checkFrozen();
|
||||||
retain(c.list, c.len, 0);
|
retain(c.list, c.len, 0);
|
||||||
strings.retainAll(c.strings);
|
strings.retainAll(c.strings);
|
||||||
return this;
|
return this;
|
||||||
@ -1870,6 +1934,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet removeAll(UnicodeSet c) {
|
public UnicodeSet removeAll(UnicodeSet c) {
|
||||||
|
checkFrozen();
|
||||||
retain(c.list, c.len, 2);
|
retain(c.list, c.len, 2);
|
||||||
strings.removeAll(c.strings);
|
strings.removeAll(c.strings);
|
||||||
return this;
|
return this;
|
||||||
@ -1885,6 +1950,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet complementAll(UnicodeSet c) {
|
public UnicodeSet complementAll(UnicodeSet c) {
|
||||||
|
checkFrozen();
|
||||||
xor(c.list, c.len, 0);
|
xor(c.list, c.len, 0);
|
||||||
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
|
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
|
||||||
return this;
|
return this;
|
||||||
@ -1896,6 +1962,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet clear() {
|
public UnicodeSet clear() {
|
||||||
|
checkFrozen();
|
||||||
list[0] = HIGH;
|
list[0] = HIGH;
|
||||||
len = 1;
|
len = 1;
|
||||||
pat = null;
|
pat = null;
|
||||||
@ -1946,6 +2013,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public UnicodeSet compact() {
|
public UnicodeSet compact() {
|
||||||
|
checkFrozen();
|
||||||
if (len != list.length) {
|
if (len != list.length) {
|
||||||
int[] temp = new int[len];
|
int[] temp = new int[len];
|
||||||
System.arraycopy(list, 0, temp, 0, len);
|
System.arraycopy(list, 0, temp, 0, len);
|
||||||
@ -2195,7 +2263,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
if (op != 0) {
|
if (op != 0) {
|
||||||
syntaxError(chars, "Char expected after operator");
|
syntaxError(chars, "Char expected after operator");
|
||||||
}
|
}
|
||||||
add(lastChar, lastChar);
|
add_unchecked(lastChar, lastChar);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
lastItem = op = 0;
|
lastItem = op = 0;
|
||||||
}
|
}
|
||||||
@ -2260,12 +2328,12 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
switch (c) {
|
switch (c) {
|
||||||
case ']':
|
case ']':
|
||||||
if (lastItem == 1) {
|
if (lastItem == 1) {
|
||||||
add(lastChar, lastChar);
|
add_unchecked(lastChar, lastChar);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
}
|
}
|
||||||
// Treat final trailing '-' as a literal
|
// Treat final trailing '-' as a literal
|
||||||
if (op == '-') {
|
if (op == '-') {
|
||||||
add(op, op);
|
add_unchecked(op, op);
|
||||||
pat.append(op);
|
pat.append(op);
|
||||||
} else if (op == '&') {
|
} else if (op == '&') {
|
||||||
syntaxError(chars, "Trailing '&'");
|
syntaxError(chars, "Trailing '&'");
|
||||||
@ -2280,7 +2348,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// Treat final trailing '-' as a literal
|
// Treat final trailing '-' as a literal
|
||||||
add(c, c);
|
add_unchecked(c, c);
|
||||||
c = chars.next(opts);
|
c = chars.next(opts);
|
||||||
literal = chars.isEscaped();
|
literal = chars.isEscaped();
|
||||||
if (c == ']' && !literal) {
|
if (c == ']' && !literal) {
|
||||||
@ -2304,7 +2372,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
syntaxError(chars, "Missing operand after operator");
|
syntaxError(chars, "Missing operand after operator");
|
||||||
}
|
}
|
||||||
if (lastItem == 1) {
|
if (lastItem == 1) {
|
||||||
add(lastChar, lastChar);
|
add_unchecked(lastChar, lastChar);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
}
|
}
|
||||||
lastItem = 0;
|
lastItem = 0;
|
||||||
@ -2352,10 +2420,10 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
}
|
}
|
||||||
if (anchor && op == 0) {
|
if (anchor && op == 0) {
|
||||||
if (lastItem == 1) {
|
if (lastItem == 1) {
|
||||||
add(lastChar, lastChar);
|
add_unchecked(lastChar, lastChar);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
}
|
}
|
||||||
add(UnicodeMatcher.ETHER);
|
add_unchecked(UnicodeMatcher.ETHER);
|
||||||
usePat = true;
|
usePat = true;
|
||||||
pat.append(SymbolTable.SYMBOL_REF).append(']');
|
pat.append(SymbolTable.SYMBOL_REF).append(']');
|
||||||
mode = 2;
|
mode = 2;
|
||||||
@ -2383,13 +2451,13 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
// these are most likely typos.
|
// these are most likely typos.
|
||||||
syntaxError(chars, "Invalid range");
|
syntaxError(chars, "Invalid range");
|
||||||
}
|
}
|
||||||
add(lastChar, c);
|
add_unchecked(lastChar, c);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
pat.append(op);
|
pat.append(op);
|
||||||
_appendToPat(pat, c, false);
|
_appendToPat(pat, c, false);
|
||||||
lastItem = op = 0;
|
lastItem = op = 0;
|
||||||
} else {
|
} else {
|
||||||
add(lastChar, lastChar);
|
add_unchecked(lastChar, lastChar);
|
||||||
_appendToPat(pat, lastChar, false);
|
_appendToPat(pat, lastChar, false);
|
||||||
lastChar = c;
|
lastChar = c;
|
||||||
}
|
}
|
||||||
@ -2456,6 +2524,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.8
|
* @stable ICU 2.8
|
||||||
*/
|
*/
|
||||||
public void addAll(Collection source) {
|
public void addAll(Collection source) {
|
||||||
|
checkFrozen();
|
||||||
Iterator it = source.iterator();
|
Iterator it = source.iterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
add(it.next().toString());
|
add(it.next().toString());
|
||||||
@ -2846,13 +2915,13 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
startHasProperty = ch;
|
startHasProperty = ch;
|
||||||
}
|
}
|
||||||
} else if (startHasProperty >= 0) {
|
} else if (startHasProperty >= 0) {
|
||||||
add(startHasProperty, ch-1);
|
add_unchecked(startHasProperty, ch-1);
|
||||||
startHasProperty = -1;
|
startHasProperty = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (startHasProperty >= 0) {
|
if (startHasProperty >= 0) {
|
||||||
add(startHasProperty, 0x10FFFF);
|
add_unchecked(startHasProperty, 0x10FFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
@ -2914,6 +2983,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @stable ICU 2.4
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
||||||
|
checkFrozen();
|
||||||
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
||||||
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
|
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
|
||||||
} else {
|
} else {
|
||||||
@ -2969,6 +3039,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
*/
|
*/
|
||||||
public UnicodeSet applyPropertyAlias(String propertyAlias,
|
public UnicodeSet applyPropertyAlias(String propertyAlias,
|
||||||
String valueAlias, SymbolTable symbols) {
|
String valueAlias, SymbolTable symbols) {
|
||||||
|
checkFrozen();
|
||||||
int p;
|
int p;
|
||||||
int v;
|
int v;
|
||||||
boolean mustNotBeEmpty = false, invert = false;
|
boolean mustNotBeEmpty = false, invert = false;
|
||||||
@ -3031,7 +3102,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
throw new IllegalArgumentException("Invalid character name");
|
throw new IllegalArgumentException("Invalid character name");
|
||||||
}
|
}
|
||||||
clear();
|
clear();
|
||||||
add(ch);
|
add_unchecked(ch);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
case UProperty.AGE:
|
case UProperty.AGE:
|
||||||
@ -3374,6 +3445,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
public UnicodeSet closeOver(int attribute) {
|
public UnicodeSet closeOver(int attribute) {
|
||||||
|
checkFrozen();
|
||||||
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
|
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
|
||||||
UCaseProps csp;
|
UCaseProps csp;
|
||||||
try {
|
try {
|
||||||
@ -3470,4 +3542,40 @@ public class UnicodeSet extends UnicodeFilter {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean frozen;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this frozen, according to the Freezable interface?
|
||||||
|
* @return value
|
||||||
|
*/
|
||||||
|
public boolean isFrozen() {
|
||||||
|
return frozen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Freeze this class, according to the Freezable interface.
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public Object freeze() {
|
||||||
|
frozen = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone a thawed version of this class, according to the Freezable interface.
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public Object cloneAsThawed() {
|
||||||
|
UnicodeSet result = (UnicodeSet) clone();
|
||||||
|
result.frozen = false;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// internal function
|
||||||
|
private void checkFrozen() {
|
||||||
|
if (frozen) {
|
||||||
|
throw new UnsupportedOperationException("Attempt to modify frozen object");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user