ICU-13613 Move Java StringSegment to com.ibm.icu.impl.

X-SVN-Rev: 41003
This commit is contained in:
Shane Carr 2018-02-28 05:42:11 +00:00
parent 33156381b1
commit f616fca69b
26 changed files with 164 additions and 49 deletions

View File

@ -1,13 +1,17 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
package com.ibm.icu.impl;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
/**
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
* subSequence methods all operate relative to the fixed offset into the String.
* A mutable String wrapper with a variable offset and length and support for case folding.
* <p>
* The charAt, length, and subSequence methods all operate relative to the fixed offset into the String.
* <p>
* CAUTION: Since this class is mutable, it must not be used anywhere that an immutable object is
* required, like in a cache or as the key of a hash map.
*
* @author sffc
*/
@ -17,11 +21,11 @@ public class StringSegment implements CharSequence {
private int end;
private boolean foldCase;
public StringSegment(String str, int parseFlags) {
public StringSegment(String str, boolean foldCase) {
this.str = str;
this.start = 0;
this.end = str.length();
this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
this.foldCase = foldCase;
}
public int getOffset() {
@ -37,9 +41,10 @@ public class StringSegment implements CharSequence {
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
*
* <p>
* This method is usually called by a Matcher to register that a char was consumed. If the char is
* strong (it usually is, except for things like whitespace), follow this with a call to
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
* Number parsing note: This method is usually called by a Matcher to register that a char was
* consumed. If the char is strong (it usually is, except for things like whitespace), follow this
* with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that
* method.
*/
public void adjustOffset(int delta) {
assert start + delta >= 0;
@ -48,7 +53,7 @@ public class StringSegment implements CharSequence {
}
/**
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
* Adjusts the offset by the width of the current lead code point, either 1 or 2 chars.
*/
public void adjustOffsetByCodePoint() {
start += Character.charCount(getCodePoint());
@ -86,8 +91,8 @@ public class StringSegment implements CharSequence {
* code point.
*
* <p>
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
* folding logic, instead of this method.
* <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles
* case folding logic, instead of this method.
*/
public int getCodePoint() {
assert start < end;
@ -107,14 +112,14 @@ public class StringSegment implements CharSequence {
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
public boolean matches(int otherCp) {
public boolean startsWith(int otherCp) {
return codePointsEqual(getCodePoint(), otherCp, foldCase);
}
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
public boolean matches(UnicodeSet uniset) {
public boolean startsWith(UnicodeSet uniset) {
// TODO: Move UnicodeSet case-folding logic here.
// TODO: Handle string matches here instead of separately.
int cp = getCodePoint();
@ -130,15 +135,18 @@ public class StringSegment implements CharSequence {
* since the first 2 characters are the same.
*
* <p>
* This method will perform case folding if case folding is enabled for the parser.
* This method only returns offsets along code point boundaries.
*
* <p>
* This method will perform case folding if case folding was enabled in the constructor.
*/
public int getCommonPrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, foldCase);
}
/**
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
* enabled for the parser.
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was
* enabled in the constructor.
*/
public int getCaseSensitivePrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, false);
@ -147,29 +155,16 @@ public class StringSegment implements CharSequence {
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
int offset = 0;
for (; offset < Math.min(length(), other.length());) {
// TODO: case-fold code points, not chars
char c1 = charAt(offset);
char c2 = other.charAt(offset);
if (!codePointsEqual(c1, c2, foldCase)) {
int cp1 = Character.codePointAt(this, offset);
int cp2 = Character.codePointAt(other, offset);
if (!codePointsEqual(cp1, cp2, foldCase)) {
break;
}
offset++;
offset += Character.charCount(cp1);
}
return offset;
}
// /**
// * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
// */
// public static String maybeFold(String input, int parseFlags) {
// UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
// if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
// return UCharacter.foldCase(input, true);
// } else {
// return input;
// }
// }
private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
if (cp1 == cp2) {
return true;
@ -182,6 +177,26 @@ public class StringSegment implements CharSequence {
return cp1 == cp2;
}
/**
* Equals any CharSequence with the same chars as this segment.
*
* <p>
* This method does not perform case folding; if you want case-insensitive equality, use
* {@link #getCommonPrefixLength}.
*/
@Override
public boolean equals(Object other) {
if (!(other instanceof CharSequence))
return false;
return Utility.charSequenceEquals(this, (CharSequence) other);
}
/** Returns a hash code equivalent to calling .toString().hashCode() */
@Override
public int hashCode() {
return Utility.charSequenceHashCode(this);
}
@Override
public String toString() {
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);

View File

@ -1863,4 +1863,36 @@ public final class Utility {
}
return r;
}
/**
* Returns whether the chars in the two CharSequences are equal.
*/
public static boolean charSequenceEquals(CharSequence a, CharSequence b) {
if (a == b) {
return true;
}
if (a == null || b == null) {
return false;
}
if (a.length() != b.length()) {
return false;
}
for (int i = 0; i < a.length(); i++) {
if (a.charAt(i) != b.charAt(i))
return false;
}
return true;
}
/**
* Returns a hash code for a CharSequence that is equivalent to calling
* charSequence.toString().hashCode()
*/
public static int charSequenceHashCode(CharSequence value) {
int hash = 0;
for (int i = 0; i < value.length(); i++) {
hash = hash * 31 + value.charAt(i);
}
return hash;
}
}

View File

@ -7,6 +7,7 @@ import java.util.Collections;
import java.util.Comparator;
import com.ibm.icu.impl.StandardPlural;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;

View File

@ -5,6 +5,7 @@ package com.ibm.icu.impl.number.parse;
import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
@ -24,7 +25,7 @@ public class CodePointMatcher implements NumberParseMatcher {
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
if (segment.matches(cp)) {
if (segment.startsWith(cp)) {
segment.adjustOffsetByCodePoint();
result.setCharsConsumed(segment);
}

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;

View File

@ -4,6 +4,7 @@ package com.ibm.icu.impl.number.parse;
import java.util.Iterator;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -8,6 +8,7 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
@ -380,7 +381,8 @@ public class NumberParserImpl {
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
assert frozen;
assert start >= 0 && start < input.length();
StringSegment segment = new StringSegment(input, parseFlags);
StringSegment segment = new StringSegment(input,
0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result);

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -5,6 +5,7 @@ package com.ibm.icu.impl.number.parse;
import java.math.BigDecimal;
import java.util.Comparator;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
@ -47,10 +48,10 @@ public class ScientificMatcher implements NumberParseMatcher {
// Allow a sign, and then try to match digits.
boolean minusSign = false;
if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
if (segment.startsWith(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
minusSign = true;
segment.adjustOffsetByCodePoint();
} else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
} else if (segment.startsWith(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
segment.adjustOffsetByCodePoint();
}

View File

@ -5,6 +5,7 @@ package com.ibm.icu.impl.number.parse;
import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
@ -47,7 +48,7 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
}
}
if (segment.matches(uniSet)) {
if (segment.startsWith(uniSet)) {
segment.adjustOffsetByCodePoint();
accept(segment, result);
return false;

View File

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**

View File

@ -1,12 +1,12 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.number;
package com.ibm.icu.dev.test.impl;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import com.ibm.icu.impl.number.parse.StringSegment;
import com.ibm.icu.impl.StringSegment;
/**
* @author sffc
@ -17,9 +17,11 @@ public class StringSegmentTest {
@Test
public void testOffset() {
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0, segment.getOffset());
segment.adjustOffset(3);
segment.adjustOffsetByCodePoint();
assertEquals(2, segment.getOffset());
segment.adjustOffset(1);
assertEquals(3, segment.getOffset());
segment.adjustOffset(2);
assertEquals(5, segment.getOffset());
@ -29,7 +31,7 @@ public class StringSegmentTest {
@Test
public void testLength() {
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(11, segment.length());
segment.adjustOffset(3);
assertEquals(8, segment.length());
@ -43,7 +45,7 @@ public class StringSegmentTest {
@Test
public void testCharAt() {
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertCharSequenceEquals(SAMPLE_STRING, segment);
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
@ -53,7 +55,7 @@ public class StringSegmentTest {
@Test
public void testGetCodePoint() {
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals(0xD83D, segment.getCodePoint());
@ -66,18 +68,20 @@ public class StringSegmentTest {
@Test
public void testCommonPrefixLength() {
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
StringSegment segment = new StringSegment(SAMPLE_STRING, true);
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
assertEquals(3, segment.getCommonPrefixLength("📻 x"));
assertEquals(0, segment.getCommonPrefixLength("x"));
assertEquals(0, segment.getCommonPrefixLength(""));
segment.adjustOffset(3);
assertEquals(0, segment.getCommonPrefixLength("RADiO"));
assertEquals(5, segment.getCommonPrefixLength("raDio"));
assertEquals(5, segment.getCommonPrefixLength("radio"));
assertEquals(2, segment.getCommonPrefixLength("rafio"));
assertEquals(0, segment.getCommonPrefixLength("fadio"));
assertEquals(0, segment.getCommonPrefixLength(""));
assertEquals(5, segment.getCaseSensitivePrefixLength("radio"));
assertEquals(2, segment.getCaseSensitivePrefixLength("raDio"));
segment.setLength(3);
assertEquals(3, segment.getCommonPrefixLength("radio"));
assertEquals(2, segment.getCommonPrefixLength("rafio"));

View File

@ -8,6 +8,7 @@ import static org.junit.Assert.assertTrue;
import org.junit.Test;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
@ -17,7 +18,6 @@ import com.ibm.icu.impl.number.parse.ParsingUtils;
import com.ibm.icu.impl.number.parse.PercentMatcher;
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
import com.ibm.icu.impl.number.parse.SeriesMatcher;
import com.ibm.icu.impl.number.parse.StringSegment;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.text.DecimalFormatSymbols;
@ -192,7 +192,7 @@ public class NumberParserTest {
int expectedOffset = (Integer) cas[1];
boolean expectedMaybeMore = (Boolean) cas[2];
StringSegment segment = new StringSegment(input, 0);
StringSegment segment = new StringSegment(input, false);
ParsedNumber result = new ParsedNumber();
boolean actualMaybeMore = series.match(segment, result);
int actualOffset = segment.getOffset();

View File

@ -15,6 +15,7 @@ package com.ibm.icu.dev.test.util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.junit.Test;
@ -249,4 +250,45 @@ public class UtilityTest extends TestFmwk {
public String CheckSourceLocale() {
return TestFmwk.sourceLocation();
}
static final String RANDOM_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
static final Random RANDOM = new Random(2018);
@Test
public void TestCharSequenceEqualsAndHashCode() {
for (int t=0; t<1000; t++) {
int length = RANDOM.nextInt(5);
CharSequence a = randomCharSequence(length);
CharSequence b = randomCharSequence(length);
CharSequence c = randomCharSequence(length + 3);
String message = "a=" + a + "; b=" + b + "; c=" + c;
assertTrue(message, Utility.charSequenceEquals(a, a));
assertFalse(message, Utility.charSequenceEquals(a, c));
assertTrue(message, Utility.charSequenceEquals(b, b));
assertFalse(message, Utility.charSequenceEquals(b, c));
assertFalse(message, Utility.charSequenceEquals(c, a));
assertFalse(message, Utility.charSequenceEquals(c, b));
assertTrue(message, Utility.charSequenceEquals(c, c));
if (length == 0 || a.toString().equals(b.toString())) {
assertTrue(message, Utility.charSequenceEquals(a, b));
assertTrue(message, Utility.charSequenceEquals(b, a));
} else {
assertFalse(message, Utility.charSequenceEquals(a, b));
assertFalse(message, Utility.charSequenceEquals(b, a));
}
assertEquals(message, Utility.charSequenceHashCode(a), a.toString().hashCode());
assertEquals(message, Utility.charSequenceHashCode(b), b.toString().hashCode());
assertEquals(message, Utility.charSequenceHashCode(c), c.toString().hashCode());
}
}
private CharSequence randomCharSequence(int length) {
StringBuilder sb = new StringBuilder();
for (int i=0; i<length; i++) {
sb.append(RANDOM_CHARS.charAt(RANDOM.nextInt(RANDOM_CHARS.length())));
}
return sb;
}
}