ICU-11265 UnicodeSet: Replace StringBuffer with StringBuilder

X-SVN-Rev: 37116
This commit is contained in:
Markus Scherer 2015-03-03 22:46:32 +00:00
parent 4dadadf10e
commit 31bc4daf4c

View File

@ -6,6 +6,7 @@
*/ */
package com.ibm.icu.text; package com.ibm.icu.text;
import java.io.IOException;
import java.text.ParsePosition; import java.text.ParsePosition;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -30,6 +31,7 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript; import com.ibm.icu.lang.UScript;
import com.ibm.icu.util.Freezable; import com.ibm.icu.util.Freezable;
import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.OutputInt; import com.ibm.icu.util.OutputInt;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.VersionInfo; import com.ibm.icu.util.VersionInfo;
@ -600,10 +602,40 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
} }
/** /**
* Append the <code>toPattern()</code> representation of a * TODO: create Appendable version of UTF16.append(buf, c),
* string to the given <code>StringBuffer</code>. * maybe in new class Appendables?
* @throws IOException
*/ */
private static StringBuffer _appendToPat(StringBuffer buf, String s, boolean escapeUnprintable) { private static void appendCodePoint(Appendable app, int c) {
assert 0 <= c && c <= 0x10ffff;
try {
if (c <= 0xffff) {
app.append((char) c);
} else {
app.append(UTF16.getLeadSurrogate(c)).append(UTF16.getTrailSurrogate(c));
}
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
/**
* TODO: create class Appendables?
* @throws IOException
*/
private static void append(Appendable app, CharSequence s) {
try {
app.append(s);
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
/**
* Append the <code>toPattern()</code> representation of a
* string to the given <code>Appendable</code>.
*/
private static <A extends Appendable> A _appendToPat(A buf, String s, boolean escapeUnprintable) {
int cp; int cp;
for (int i = 0; i < s.length(); i += Character.charCount(cp)) { for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
cp = s.codePointAt(i); cp = s.codePointAt(i);
@ -614,41 +646,43 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/** /**
* Append the <code>toPattern()</code> representation of a * Append the <code>toPattern()</code> representation of a
* character to the given <code>StringBuffer</code>. * character to the given <code>Appendable</code>.
*/ */
private static StringBuffer _appendToPat(StringBuffer buf, int c, boolean escapeUnprintable) { private static <A extends Appendable> A _appendToPat(A buf, int c, boolean escapeUnprintable) {
// "Utility.isUnprintable(c)" seems redundant since the the call try {
// "Utility.escapeUnprintable(buf, c)" does it again inside the if statement if (escapeUnprintable && Utility.isUnprintable(c)) {
if (escapeUnprintable && Utility.isUnprintable(c)) { // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything
// Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything // unprintable
// unprintable if (Utility.escapeUnprintable(buf, c)) {
if (Utility.escapeUnprintable(buf, c)) { return buf;
return buf; }
} }
} // Okay to let ':' pass through
// Okay to let ':' pass through switch (c) {
switch (c) { case '[': // SET_OPEN:
case '[': // SET_OPEN: case ']': // SET_CLOSE:
case ']': // SET_CLOSE: case '-': // HYPHEN:
case '-': // HYPHEN: case '^': // COMPLEMENT:
case '^': // COMPLEMENT: case '&': // INTERSECTION:
case '&': // INTERSECTION: case '\\': //BACKSLASH:
case '\\': //BACKSLASH: case '{':
case '{': case '}':
case '}': case '$':
case '$': case ':':
case ':':
buf.append('\\');
break;
default:
// Escape whitespace
if (PatternProps.isWhiteSpace(c)) {
buf.append('\\'); buf.append('\\');
break;
default:
// Escape whitespace
if (PatternProps.isWhiteSpace(c)) {
buf.append('\\');
}
break;
} }
break; appendCodePoint(buf, c);
return buf;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
} }
UTF16.append(buf, c);
return buf;
} }
/** /**
@ -658,7 +692,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
public String toPattern(boolean escapeUnprintable) { public String toPattern(boolean escapeUnprintable) {
StringBuffer result = new StringBuffer(); if (pat != null && !escapeUnprintable) {
return pat;
}
StringBuilder result = new StringBuilder();
return _toPattern(result, escapeUnprintable).toString(); return _toPattern(result, escapeUnprintable).toString();
} }
@ -667,37 +704,44 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* a cleaned version of the string passed to applyPattern(), if there * a cleaned version of the string passed to applyPattern(), if there
* is one. Otherwise it will be generated. * is one. Otherwise it will be generated.
*/ */
private StringBuffer _toPattern(StringBuffer result, private <A extends Appendable> A _toPattern(A result,
boolean escapeUnprintable) { boolean escapeUnprintable) {
if (pat != null) { if (pat == null) {
int i; return appendNewPattern(result, escapeUnprintable, true);
int backslashCount = 0; }
for (i=0; i<pat.length(); ) { try {
int c = UTF16.charAt(pat, i); if (!escapeUnprintable) {
i += UTF16.getCharCount(c); result.append(pat);
if (escapeUnprintable && Utility.isUnprintable(c)) { return result;
}
boolean oddNumberOfBackslashes = false;
for (int i=0; i<pat.length(); ) {
int c = pat.codePointAt(i);
i += Character.charCount(c);
if (Utility.isUnprintable(c)) {
// If the unprintable character is preceded by an odd // If the unprintable character is preceded by an odd
// number of backslashes, then it has been escaped. // number of backslashes, then it has been escaped
// Before unescaping it, we delete the final // and we omit the last backslash.
// backslash.
if (backslashCount % 2 != 0) {
result.setLength(result.length() - 1);
}
Utility.escapeUnprintable(result, c); Utility.escapeUnprintable(result, c);
backslashCount = 0; oddNumberOfBackslashes = false;
} else if (!oddNumberOfBackslashes && c == '\\') {
// Temporarily withhold an odd-numbered backslash.
oddNumberOfBackslashes = true;
} else { } else {
UTF16.append(result, c); if (oddNumberOfBackslashes) {
if (c == '\\') { result.append('\\');
++backslashCount;
} else {
backslashCount = 0;
} }
appendCodePoint(result, c);
oddNumberOfBackslashes = false;
} }
} }
if (oddNumberOfBackslashes) {
result.append('\\');
}
return result; return result;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
} }
return _generatePattern(result, escapeUnprintable, true);
} }
/** /**
@ -721,66 +765,66 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
*/ */
public StringBuffer _generatePattern(StringBuffer result, public StringBuffer _generatePattern(StringBuffer result,
boolean escapeUnprintable, boolean includeStrings) { boolean escapeUnprintable, boolean includeStrings) {
result.append('['); return appendNewPattern(result, escapeUnprintable, includeStrings);
}
// // Check against the predefined categories. We implicitly build private <A extends Appendable> A appendNewPattern(
// // up ALL category sets the first time toPattern() is called. A result, boolean escapeUnprintable, boolean includeStrings) {
// for (int cat=0; cat<CATEGORY_COUNT; ++cat) { try {
// if (this.equals(getCategorySet(cat))) { result.append('[');
// result.append(':');
// result.append(CATEGORY_NAMES.substring(cat*2, cat*2+2)); int count = getRangeCount();
// return result.append(":]");
// } // If the set contains at least 2 intervals and includes both
// } // MIN_VALUE and MAX_VALUE, then the inverse representation will
// be more economical.
int count = getRangeCount(); if (count > 1 &&
getRangeStart(0) == MIN_VALUE &&
// If the set contains at least 2 intervals and includes both getRangeEnd(count-1) == MAX_VALUE) {
// MIN_VALUE and MAX_VALUE, then the inverse representation will
// be more economical. // Emit the inverse
if (count > 1 && result.append('^');
getRangeStart(0) == MIN_VALUE &&
getRangeEnd(count-1) == MAX_VALUE) { for (int i = 1; i < count; ++i) {
int start = getRangeEnd(i-1)+1;
// Emit the inverse int end = getRangeStart(i)-1;
result.append('^'); _appendToPat(result, start, escapeUnprintable);
if (start != end) {
for (int i = 1; i < count; ++i) { if ((start+1) != end) {
int start = getRangeEnd(i-1)+1; result.append('-');
int end = getRangeStart(i)-1; }
_appendToPat(result, start, escapeUnprintable); _appendToPat(result, end, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
result.append('-');
} }
_appendToPat(result, end, escapeUnprintable);
} }
} }
}
// Default; emit the ranges as pairs
// Default; emit the ranges as pairs else {
else { for (int i = 0; i < count; ++i) {
for (int i = 0; i < count; ++i) { int start = getRangeStart(i);
int start = getRangeStart(i); int end = getRangeEnd(i);
int end = getRangeEnd(i); _appendToPat(result, start, escapeUnprintable);
_appendToPat(result, start, escapeUnprintable); if (start != end) {
if (start != end) { if ((start+1) != end) {
if ((start+1) != end) { result.append('-');
result.append('-'); }
_appendToPat(result, end, escapeUnprintable);
} }
_appendToPat(result, end, escapeUnprintable);
} }
} }
}
if (includeStrings && strings.size() > 0) {
if (includeStrings && strings.size() > 0) { for (String s : strings) {
for (String s : strings) { result.append('{');
result.append('{'); _appendToPat(result, s, escapeUnprintable);
_appendToPat(result, s, escapeUnprintable); result.append('}');
result.append('}'); }
} }
result.append(']');
return result;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
} }
return result.append(']');
} }
/** /**
@ -1974,8 +2018,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
if (strings.size() == 0) { if (strings.size() == 0) {
return toString(); return toString();
} }
StringBuffer result = new StringBuffer("(?:"); StringBuilder result = new StringBuilder("(?:");
_generatePattern(result, true, false); appendNewPattern(result, true, false);
for (String s : strings) { for (String s : strings) {
result.append('|'); result.append('|');
_appendToPat(result, s, true); _appendToPat(result, s, true);
@ -2363,7 +2407,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
pos = new ParsePosition(0); pos = new ParsePosition(0);
} }
StringBuffer rebuiltPat = new StringBuffer(); StringBuilder rebuiltPat = new StringBuilder();
RuleCharacterIterator chars = RuleCharacterIterator chars =
new RuleCharacterIterator(pattern, symbols, pos); new RuleCharacterIterator(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options); applyPattern(chars, symbols, rebuiltPat, options);
@ -2401,8 +2445,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* @param options a bit mask of zero or more of the following: * @param options a bit mask of zero or more of the following:
* IGNORE_SPACE, CASE. * IGNORE_SPACE, CASE.
*/ */
void applyPattern(RuleCharacterIterator chars, SymbolTable symbols, private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
StringBuffer rebuiltPat, int options) { Appendable rebuiltPat, int options) {
// Syntax characters: [ ] ^ - & { } // Syntax characters: [ ] ^ - & { }
@ -2414,7 +2458,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
opts |= RuleCharacterIterator.SKIP_WHITESPACE; opts |= RuleCharacterIterator.SKIP_WHITESPACE;
} }
StringBuffer patBuf = new StringBuffer(), buf = null; StringBuilder patBuf = new StringBuilder(), buf = null;
boolean usePat = false; boolean usePat = false;
UnicodeSet scratch = null; UnicodeSet scratch = null;
Object backup = null; Object backup = null;
@ -2634,7 +2678,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
} }
lastItem = 0; lastItem = 0;
if (buf == null) { if (buf == null) {
buf = new StringBuffer(); buf = new StringBuilder();
} else { } else {
buf.setLength(0); buf.setLength(0);
} }
@ -2646,7 +2690,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
ok = true; ok = true;
break; break;
} }
UTF16.append(buf, c); appendCodePoint(buf, c);
} }
if (buf.length() < 1 || !ok) { if (buf.length() < 1 || !ok) {
syntaxError(chars, "Invalid multicharacter string"); syntaxError(chars, "Invalid multicharacter string");
@ -2752,9 +2796,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
// Use the rebuilt pattern (pat) only if necessary. Prefer the // Use the rebuilt pattern (pat) only if necessary. Prefer the
// generated pattern. // generated pattern.
if (usePat) { if (usePat) {
rebuiltPat.append(patBuf.toString()); append(rebuiltPat, patBuf.toString());
} else { } else {
_generatePattern(rebuiltPat, false, true); appendNewPattern(rebuiltPat, false, true);
} }
} }
@ -3624,7 +3668,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* @param symbols TODO * @param symbols TODO
*/ */
private void applyPropertyPattern(RuleCharacterIterator chars, private void applyPropertyPattern(RuleCharacterIterator chars,
StringBuffer rebuiltPat, SymbolTable symbols) { Appendable rebuiltPat, SymbolTable symbols) {
String patStr = chars.lookahead(); String patStr = chars.lookahead();
ParsePosition pos = new ParsePosition(0); ParsePosition pos = new ParsePosition(0);
applyPropertyPattern(patStr, pos, symbols); applyPropertyPattern(patStr, pos, symbols);
@ -3632,7 +3676,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
syntaxError(chars, "Invalid property pattern"); syntaxError(chars, "Invalid property pattern");
} }
chars.jumpahead(pos.getIndex()); chars.jumpahead(pos.getIndex());
rebuiltPat.append(patStr.substring(0, pos.getIndex())); append(rebuiltPat, patStr.substring(0, pos.getIndex()));
} }
//---------------------------------------------------------------- //----------------------------------------------------------------
@ -4148,7 +4192,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
*/ */
@Override @Override
public String toString() { public String toString() {
StringBuffer b = new StringBuffer(); StringBuilder b = new StringBuilder();
return ( return (
codepoint == codepointEnd ? _appendToPat(b, codepoint, false) codepoint == codepointEnd ? _appendToPat(b, codepoint, false)
: _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false)) : _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false))