ICU-7077 UTrie2 port to Java, cleanup and optimization of iteration
X-SVN-Rev: 26959
This commit is contained in:
parent
22efa333d0
commit
a7f460e761
@ -26,16 +26,7 @@ import java.util.NoSuchElementException;
|
||||
*/
|
||||
public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
|
||||
/**
|
||||
* Selectors for the width of a UTrie2 data value.
|
||||
* TODO: this can probably be removed. It's no longer used in the
|
||||
* primary API
|
||||
*/
|
||||
enum ValueWidth {
|
||||
BITS_16,
|
||||
BITS_32
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a Trie2 from its serialized form. Inverse of utrie2_serialize().
|
||||
* The serialized format is identical between ICU4C and ICU4J, so this function
|
||||
@ -215,16 +206,14 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
*
|
||||
* @param is an InputStream containing the serialized form
|
||||
* of a UTrie, version 1 or 2. The stream must support mark() and reset().
|
||||
* TODO: is requiring mark and reset ok?
|
||||
* The position of the input stream will be left unchanged.
|
||||
* @param anyEndianOk If FALSE, only big-endian (Java native) serialized forms are recognized.
|
||||
* @param littleEndianOk If FALSE, only big-endian (Java native) serialized forms are recognized.
|
||||
* If TRUE, little-endian serialized forms are recognized as well.
|
||||
* TODO: dump this option, always allow either endian? Or allow only big endian?
|
||||
* @return the Trie version of the serialized form, or 0 if it is not
|
||||
* recognized as a serialized UTrie
|
||||
* @throws IOException on errors in reading from the input stream.
|
||||
*/
|
||||
public static int getVersion(InputStream is, boolean anyEndianOk) throws IOException {
|
||||
public static int getVersion(InputStream is, boolean littleEndianOk) throws IOException {
|
||||
if (! is.markSupported()) {
|
||||
throw new IllegalArgumentException("Input stream must support mark().");
|
||||
}
|
||||
@ -239,7 +228,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='2') {
|
||||
return 2;
|
||||
}
|
||||
if (anyEndianOk) {
|
||||
if (littleEndianOk) {
|
||||
if (sig[0]=='e' && sig[1]=='i' && sig[2]=='r' && sig[3]=='T') {
|
||||
return 1;
|
||||
}
|
||||
@ -525,7 +514,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
*
|
||||
* @param text A text string to be iterated over.
|
||||
* @param index The starting iteration position within the input text.
|
||||
* @return An iterator
|
||||
* @return the CharSequenceIterator
|
||||
*/
|
||||
public CharSequenceIterator charSequenceIterator(CharSequence text, int index) {
|
||||
return new CharSequenceIterator(text, index);
|
||||
@ -626,6 +615,14 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
|
||||
|
||||
/**
|
||||
* Selectors for the width of a UTrie2 data value.
|
||||
*/
|
||||
enum ValueWidth {
|
||||
BITS_16,
|
||||
BITS_32
|
||||
}
|
||||
|
||||
/**
|
||||
* Trie2 data structure in serialized form:
|
||||
*
|
||||
* UTrie2Header header;
|
||||
@ -899,7 +896,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
// Iteration over code point values.
|
||||
val = get(nextStart);
|
||||
mappedVal = mapper.map(val);
|
||||
endOfRange = rangeEnd(nextStart);
|
||||
endOfRange = rangeEnd(nextStart, limitCP, val);
|
||||
// Loop once for each range in the Trie2 with the same raw (unmapped) value.
|
||||
// Loop continues so long as the mapped values are the same.
|
||||
for (;;) {
|
||||
@ -910,7 +907,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
if (mapper.map(val) != mappedVal) {
|
||||
break;
|
||||
}
|
||||
endOfRange = rangeEnd(endOfRange+1);
|
||||
endOfRange = rangeEnd(endOfRange+1, limitCP, val);
|
||||
}
|
||||
} else {
|
||||
// Iteration over the alternate lead surrogate values.
|
||||
@ -949,31 +946,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the last character in a contiguous range of characters with the
|
||||
* same Trie2 value as the input character.
|
||||
*
|
||||
* @param c The character to begin with.
|
||||
* @return The last contiguous character with the same value.
|
||||
*/
|
||||
private int rangeEnd(int startingC) {
|
||||
// TODO: add optimizations
|
||||
int c;
|
||||
int val = get(startingC);
|
||||
int limit = Math.min(highStart, limitCP);
|
||||
|
||||
for (c = startingC+1; c < limit; c++) {
|
||||
if (get(c) != val) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (c >= highStart) {
|
||||
c = limitCP;
|
||||
}
|
||||
return c - 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the last lead surrogate in a contiguous range with the
|
||||
* same Trie2 value as the input character.
|
||||
@ -981,6 +954,11 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
* Use the alternate Lead Surrogate values from the Trie2,
|
||||
* not the code-point values.
|
||||
*
|
||||
* Note: Trie2_16 and Trie2_32 override this implementation with optimized versions,
|
||||
* meaning that the implementation here is only being used with
|
||||
* Trie2Writable. The code here is logically correct with any type
|
||||
* of Trie2, however.
|
||||
*
|
||||
* @param c The character to begin with.
|
||||
* @return The last contiguous character with the same value.
|
||||
*/
|
||||
@ -989,7 +967,6 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
return 0xdbff;
|
||||
}
|
||||
|
||||
// TODO: add optimizations
|
||||
int c;
|
||||
int val = getFromU16SingleLead(startingLS);
|
||||
for (c = startingLS+1; c <= 0x0dbff; c++) {
|
||||
@ -1020,6 +997,28 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
|
||||
private boolean doLeadSurrogates = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the last character in a contiguous range of characters with the
|
||||
* same Trie2 value as the input character.
|
||||
*
|
||||
* @param c The character to begin with.
|
||||
* @return The last contiguous character with the same value.
|
||||
*/
|
||||
int rangeEnd(int start, int limitp, int val) {
|
||||
int c;
|
||||
int limit = Math.min(highStart, limitp);
|
||||
|
||||
for (c = start+1; c < limit; c++) {
|
||||
if (get(c) != val) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (c >= highStart) {
|
||||
c = limitp;
|
||||
}
|
||||
return c - 1;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Hashing implementation functions. FNV hash. Respected public domain algorithm.
|
||||
|
@ -17,7 +17,7 @@ public class Trie2Writable extends Trie2 {
|
||||
|
||||
|
||||
/**
|
||||
* Create a new, empty, writable Trie2. At build time, 32-bit data values are used.
|
||||
* Create a new, empty, writable Trie2. 32-bit data values are used.
|
||||
*
|
||||
* @param initialValueP the initial value that is set for all code points
|
||||
* @param errorValueP the value for out-of-range code points and illegal UTF-8
|
||||
@ -136,9 +136,9 @@ public class Trie2Writable extends Trie2 {
|
||||
|
||||
|
||||
/**
|
||||
* Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie.
|
||||
* Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie2.
|
||||
*
|
||||
* @param source the source Trie
|
||||
* @param source the source Trie2. Its contents will be copied into the new Trie2.
|
||||
*/
|
||||
public Trie2Writable(Trie2 source) {
|
||||
init(source.initialValue, source.errorValue);
|
||||
@ -524,7 +524,6 @@ public class Trie2Writable extends Trie2 {
|
||||
public Trie2Writable setRange(Trie2.Range range, boolean overwrite) {
|
||||
fHash = 0;
|
||||
if (range.leadSurrogate) {
|
||||
// TODO: optimize this.
|
||||
for (int c=range.startCodePoint; c<=range.endCodePoint; c++) {
|
||||
if (overwrite || getFromU16SingleLead((char)c) == this.initialValue) {
|
||||
setForLeadSurrogateCodeUnit((char)c, range.value);
|
||||
@ -549,11 +548,8 @@ public class Trie2Writable extends Trie2 {
|
||||
* For code units outside of the lead surrogate range, this function
|
||||
* behaves identically to set().
|
||||
*
|
||||
* TODO: ICU4C restricts this function to lead surrogates only.
|
||||
* Should ICU4J match, or should ICU4C be loosened?
|
||||
*
|
||||
* @param codeUnit A UTF-16 code unit.
|
||||
* @param value the value
|
||||
* @param value the value to be stored in the Trie2.
|
||||
*/
|
||||
public Trie2Writable setForLeadSurrogateCodeUnit(char codeUnit, int value) {
|
||||
fHash = 0;
|
||||
@ -563,7 +559,7 @@ public class Trie2Writable extends Trie2 {
|
||||
|
||||
|
||||
/**
|
||||
* Get the value for a code point as stored in the trie.
|
||||
* Get the value for a code point as stored in the Trie2.
|
||||
*
|
||||
* @param codePoint the code point
|
||||
* @return the value
|
||||
@ -978,13 +974,10 @@ public class Trie2Writable extends Trie2 {
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* serialization ------------------------------------------------------------ */
|
||||
|
||||
|
||||
/**
|
||||
* Produce an optimized, read-only Trie2_16 from this writable Trie.
|
||||
* The data values must all fit as an unsigned 16 bit value.
|
||||
* The data values outside of the range that will fit in a 16 bit
|
||||
* unsigned value will be truncated.
|
||||
*/
|
||||
public Trie2_16 toTrie2_16() {
|
||||
Trie2_16 frozenTrie = new Trie2_16();
|
||||
@ -1153,7 +1146,7 @@ public class Trie2Writable extends Trie2 {
|
||||
}
|
||||
break;
|
||||
}
|
||||
// The writable, but compressed, Trie stays around unless the caller drops its references to it.
|
||||
// The writable, but compressed, Trie2 stays around unless the caller drops its references to it.
|
||||
}
|
||||
|
||||
|
||||
|
@ -158,4 +158,92 @@ public final class Trie2_16 extends Trie2 {
|
||||
bytesWritten += dataLength*2;
|
||||
return bytesWritten;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a starting code point, find the last in a range of code points,
|
||||
* all with the same value.
|
||||
*
|
||||
* This function is part of the implementation of iterating over the
|
||||
* Trie2's contents.
|
||||
* @param startingCP The code point at which to begin looking.
|
||||
* @return The last code point with the same value as the starting code point.
|
||||
*/
|
||||
@Override
|
||||
int rangeEnd(int startingCP, int limit, int value) {
|
||||
int cp = startingCP;
|
||||
int block = 0;
|
||||
int index2Block = 0;
|
||||
|
||||
// Loop runs once for each of
|
||||
// - a partial data block
|
||||
// - a reference to the null (default) data block.
|
||||
// - a reference to the index2 null block
|
||||
|
||||
outerLoop:
|
||||
for (;;) {
|
||||
if (cp >= limit) {
|
||||
break;
|
||||
}
|
||||
if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
|
||||
// Ordinary BMP code point, excluding leading surrogates.
|
||||
// BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
|
||||
// 16 bit data is stored in the index array itself.
|
||||
index2Block = 0;
|
||||
block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
|
||||
} else if (cp < 0xffff) {
|
||||
// Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
|
||||
index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
|
||||
block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
|
||||
} else if (cp < highStart) {
|
||||
// Supplemental code point, use two-level lookup.
|
||||
int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
|
||||
index2Block = index[ix];
|
||||
block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
|
||||
} else {
|
||||
// Code point above highStart.
|
||||
if (value == index[highValueIndex]) {
|
||||
cp = limit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (index2Block == index2NullOffset) {
|
||||
if (value != initialValue) {
|
||||
break;
|
||||
}
|
||||
cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
|
||||
} else if (block == dataNullOffset) {
|
||||
// The block at dataNullOffset has all values == initialValue.
|
||||
// Because Trie2 iteration always proceeds in ascending order, we will always
|
||||
// encounter a null block at its beginning, and can skip over
|
||||
// a number of code points equal to the length of the block.
|
||||
if (value != initialValue) {
|
||||
break;
|
||||
}
|
||||
cp += UTRIE2_DATA_BLOCK_LENGTH;
|
||||
} else {
|
||||
// Current position refers to an ordinary data block.
|
||||
// Walk over the data entries, checking the values.
|
||||
int startIx = block + (cp & UTRIE2_DATA_MASK);
|
||||
int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
|
||||
for (int ix = startIx; ix<limitIx; ix++) {
|
||||
if (index[ix] != value) {
|
||||
// We came to an entry with a different value.
|
||||
// We are done.
|
||||
cp += (ix - startIx);
|
||||
break outerLoop;
|
||||
}
|
||||
}
|
||||
// The ordinary data block contained our value until its end.
|
||||
// Advance the current code point, and continue the outerloop.
|
||||
cp += limitIx - startIx;
|
||||
}
|
||||
}
|
||||
if (cp > limit) {
|
||||
cp = limit;
|
||||
}
|
||||
|
||||
return cp - 1;
|
||||
}
|
||||
}
|
||||
|
@ -155,5 +155,94 @@ public class Trie2_32 extends Trie2 {
|
||||
bytesWritten += dataLength*4;
|
||||
return bytesWritten;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a starting code point, find the last in a range of code points,
|
||||
* all with the same value.
|
||||
*
|
||||
* This function is part of the implementation of iterating over the
|
||||
* Trie2's contents.
|
||||
* @param startingCP The code point at which to begin looking.
|
||||
* @return The last code point with the same value as the starting code point.
|
||||
*/
|
||||
@Override
|
||||
int rangeEnd(int startingCP, int limit, int value) {
|
||||
int cp = startingCP;
|
||||
int block = 0;
|
||||
int index2Block = 0;
|
||||
|
||||
// Loop runs once for each of
|
||||
// - a partial data block
|
||||
// - a reference to the null (default) data block.
|
||||
// - a reference to the index2 null block
|
||||
|
||||
outerLoop:
|
||||
for (;;) {
|
||||
if (cp >= limit) {
|
||||
break;
|
||||
}
|
||||
if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
|
||||
// Ordinary BMP code point, excluding leading surrogates.
|
||||
// BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
|
||||
// 16 bit data is stored in the index array itself.
|
||||
index2Block = 0;
|
||||
block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
|
||||
} else if (cp < 0xffff) {
|
||||
// Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
|
||||
index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
|
||||
block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
|
||||
} else if (cp < highStart) {
|
||||
// Supplemental code point, use two-level lookup.
|
||||
int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
|
||||
index2Block = index[ix];
|
||||
block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
|
||||
} else {
|
||||
// Code point above highStart.
|
||||
if (value == data32[highValueIndex]) {
|
||||
cp = limit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (index2Block == index2NullOffset) {
|
||||
if (value != initialValue) {
|
||||
break;
|
||||
}
|
||||
cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
|
||||
} else if (block == dataNullOffset) {
|
||||
// The block at dataNullOffset has all values == initialValue.
|
||||
// Because Trie2 iteration always proceeds in ascending order, we will always
|
||||
// encounter a null block at its beginning, and can skip over
|
||||
// a number of code points equal to the length of the block.
|
||||
if (value != initialValue) {
|
||||
break;
|
||||
}
|
||||
cp += UTRIE2_DATA_BLOCK_LENGTH;
|
||||
} else {
|
||||
// Current position refers to an ordinary data block.
|
||||
// Walk over the data entries, checking the values.
|
||||
int startIx = block + (cp & UTRIE2_DATA_MASK);
|
||||
int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
|
||||
for (int ix = startIx; ix<limitIx; ix++) {
|
||||
if (data32[ix] != value) {
|
||||
// We came to an entry with a different value.
|
||||
// We are done.
|
||||
cp += (ix - startIx);
|
||||
break outerLoop;
|
||||
}
|
||||
}
|
||||
// The ordinary data block contained our value until its end.
|
||||
// Advance the current code point, and continue the outer loop.
|
||||
cp += limitIx - startIx;
|
||||
}
|
||||
}
|
||||
if (cp > limit) {
|
||||
cp = limit;
|
||||
}
|
||||
|
||||
return cp - 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ public class Trie2Test extends TestFmwk {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
trie.toTrie2_16().serialize(os);
|
||||
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
|
||||
assertEquals(where(), 2, Trie2.getVersion(is, true));
|
||||
assertEquals(null, 2, Trie2.getVersion(is, true));
|
||||
} catch (IOException e) {
|
||||
errln(where() + e.toString());
|
||||
}
|
||||
@ -67,18 +67,18 @@ public class Trie2Test extends TestFmwk {
|
||||
Trie2Writable trieWB = new Trie2Writable(0,0);
|
||||
Trie2 trieA = trieWA;
|
||||
Trie2 trieB = trieWB;
|
||||
assertTrue(where(), trieA.equals(trieB));
|
||||
assertEquals(where(), trieA, trieB);
|
||||
assertEquals(where(), trieA.hashCode(), trieB.hashCode());
|
||||
assertTrue("", trieA.equals(trieB));
|
||||
assertEquals("", trieA, trieB);
|
||||
assertEquals("", trieA.hashCode(), trieB.hashCode());
|
||||
trieWA.set(500, 2);
|
||||
assertNotEquals(where(), trieA, trieB);
|
||||
assertNotEquals("", trieA, trieB);
|
||||
// Note that the hash codes do not strictly need to be different,
|
||||
// but it's highly likely that something is wrong if they are the same.
|
||||
assertNotEquals(where(), trieA.hashCode(), trieB.hashCode());
|
||||
assertNotEquals("", trieA.hashCode(), trieB.hashCode());
|
||||
trieWB.set(500, 2);
|
||||
trieA = trieWA.toTrie2_16();
|
||||
assertEquals(where(), trieA, trieB);
|
||||
assertEquals(where(), trieA.hashCode(), trieB.hashCode());
|
||||
assertEquals("", trieA, trieB);
|
||||
assertEquals("", trieA.hashCode(), trieB.hashCode());
|
||||
}
|
||||
|
||||
//
|
||||
@ -90,32 +90,32 @@ public class Trie2Test extends TestFmwk {
|
||||
it = trie.iterator();
|
||||
|
||||
Trie2.Range r = it.next();
|
||||
assertEquals(where(), 0, r.startCodePoint);
|
||||
assertEquals(where(), 0x10ffff, r.endCodePoint);
|
||||
assertEquals(where(), 17, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0, r.startCodePoint);
|
||||
assertEquals("", 0x10ffff, r.endCodePoint);
|
||||
assertEquals("", 17, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
|
||||
r = it.next();
|
||||
assertEquals(where(), 0xd800, r.startCodePoint);
|
||||
assertEquals(where(), 0xdbff, r.endCodePoint);
|
||||
assertEquals(where(), 17, r.value);
|
||||
assertEquals(where(), true, r.leadSurrogate);
|
||||
assertEquals("", 0xd800, r.startCodePoint);
|
||||
assertEquals("", 0xdbff, r.endCodePoint);
|
||||
assertEquals("", 17, r.value);
|
||||
assertEquals("", true, r.leadSurrogate);
|
||||
|
||||
|
||||
int i = 0;
|
||||
for (Trie2.Range rr: trie) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
assertEquals(where(), 0, rr.startCodePoint);
|
||||
assertEquals(where(), 0x10ffff, rr.endCodePoint);
|
||||
assertEquals(where(), 17, rr.value);
|
||||
assertEquals(where(), false, rr.leadSurrogate);
|
||||
assertEquals("", 0, rr.startCodePoint);
|
||||
assertEquals("", 0x10ffff, rr.endCodePoint);
|
||||
assertEquals("", 17, rr.value);
|
||||
assertEquals("", false, rr.leadSurrogate);
|
||||
break;
|
||||
case 1:
|
||||
assertEquals(where(), 0xd800, rr.startCodePoint);
|
||||
assertEquals(where(), 0xdbff, rr.endCodePoint);
|
||||
assertEquals(where(), 17, rr.value);
|
||||
assertEquals(where(), true, rr.leadSurrogate);
|
||||
assertEquals("", 0xd800, rr.startCodePoint);
|
||||
assertEquals("", 0xdbff, rr.endCodePoint);
|
||||
assertEquals("", 17, rr.value);
|
||||
assertEquals("", true, rr.leadSurrogate);
|
||||
break;
|
||||
default:
|
||||
errln(where() + " Unexpected iteration result");
|
||||
@ -140,10 +140,10 @@ public class Trie2Test extends TestFmwk {
|
||||
};
|
||||
Iterator<Trie2.Range> it = trie.iterator(vm);
|
||||
Trie2.Range r = it.next();
|
||||
assertEquals(where(), 0, r.startCodePoint);
|
||||
assertEquals(where(), 0x10ffff, r.endCodePoint);
|
||||
assertEquals(where(), 42, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0, r.startCodePoint);
|
||||
assertEquals("", 0x10ffff, r.endCodePoint);
|
||||
assertEquals("", 42, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
}
|
||||
|
||||
|
||||
@ -154,24 +154,24 @@ public class Trie2Test extends TestFmwk {
|
||||
trie.set(0x2f810, 10);
|
||||
Iterator<Trie2.Range> it = trie.iteratorForLeadSurrogate((char)0xd87e);
|
||||
Trie2.Range r = it.next();
|
||||
assertEquals(where(), 0x2f800, r.startCodePoint);
|
||||
assertEquals(where(), 0x2f80f, r.endCodePoint);
|
||||
assertEquals(where(), 0xdefa17, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0x2f800, r.startCodePoint);
|
||||
assertEquals("", 0x2f80f, r.endCodePoint);
|
||||
assertEquals("", 0xdefa17, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
|
||||
r = it.next();
|
||||
assertEquals(where(), 0x2f810, r.startCodePoint);
|
||||
assertEquals(where(), 0x2f810, r.endCodePoint);
|
||||
assertEquals(where(), 10, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0x2f810, r.startCodePoint);
|
||||
assertEquals("", 0x2f810, r.endCodePoint);
|
||||
assertEquals("", 10, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
|
||||
r = it.next();
|
||||
assertEquals(where(), 0x2f811, r.startCodePoint);
|
||||
assertEquals(where(), 0x2fbff, r.endCodePoint);
|
||||
assertEquals(where(), 0xdefa17, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0x2f811, r.startCodePoint);
|
||||
assertEquals("", 0x2fbff, r.endCodePoint);
|
||||
assertEquals("", 0xdefa17, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
|
||||
assertFalse(where(), it.hasNext());
|
||||
assertFalse("", it.hasNext());
|
||||
}
|
||||
|
||||
// Iteration over a leading surrogate range with a ValueMapper.
|
||||
@ -189,12 +189,12 @@ public class Trie2Test extends TestFmwk {
|
||||
};
|
||||
Iterator<Trie2.Range> it = trie.iteratorForLeadSurrogate((char)0xd87e, m);
|
||||
Trie2.Range r = it.next();
|
||||
assertEquals(where(), 0x2f800, r.startCodePoint);
|
||||
assertEquals(where(), 0x2fbff, r.endCodePoint);
|
||||
assertEquals(where(), 0xdefa17, r.value);
|
||||
assertEquals(where(), false, r.leadSurrogate);
|
||||
assertEquals("", 0x2f800, r.startCodePoint);
|
||||
assertEquals("", 0x2fbff, r.endCodePoint);
|
||||
assertEquals("", 0xdefa17, r.value);
|
||||
assertEquals("", false, r.leadSurrogate);
|
||||
|
||||
assertFalse(where(), it.hasNext());
|
||||
assertFalse("", it.hasNext());
|
||||
}
|
||||
|
||||
// Trie2.serialize()
|
||||
@ -206,23 +206,23 @@ public class Trie2Test extends TestFmwk {
|
||||
trie.set(0xffee, 300);
|
||||
Trie2_16 frozen16 = trie.toTrie2_16();
|
||||
Trie2_32 frozen32 = trie.toTrie2_32();
|
||||
assertEquals(where(), trie, frozen16);
|
||||
assertEquals(where(), trie, frozen32);
|
||||
assertEquals(where(), frozen16, frozen32);
|
||||
assertEquals("", trie, frozen16);
|
||||
assertEquals("", trie, frozen32);
|
||||
assertEquals("", frozen16, frozen32);
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
try {
|
||||
frozen16.serialize(os);
|
||||
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
|
||||
Trie2 unserialized16 = Trie2.createFromSerialized(is);
|
||||
assertEquals(where(), trie, unserialized16);
|
||||
assertEquals(where(), Trie2_16.class, unserialized16.getClass());
|
||||
assertEquals("", trie, unserialized16);
|
||||
assertEquals("", Trie2_16.class, unserialized16.getClass());
|
||||
|
||||
os.reset();
|
||||
frozen32.serialize(os);
|
||||
is = new ByteArrayInputStream(os.toByteArray());
|
||||
Trie2 unserialized32 = Trie2.createFromSerialized(is);
|
||||
assertEquals(where(), trie, unserialized32);
|
||||
assertEquals(where(), Trie2_32.class, unserialized32.getClass());
|
||||
assertEquals("", trie, unserialized32);
|
||||
assertEquals("", Trie2_32.class, unserialized32.getClass());
|
||||
} catch (IOException e) {
|
||||
errln(where() + " Unexpected exception: " + e);
|
||||
}
|
||||
@ -243,28 +243,28 @@ public class Trie2Test extends TestFmwk {
|
||||
|
||||
// Constructor from another Trie2
|
||||
Trie2 t2 = new Trie2Writable(t1);
|
||||
assertTrue(where(), t1.equals(t2));
|
||||
assertTrue("", t1.equals(t2));
|
||||
|
||||
// Set / Get
|
||||
Trie2Writable t1w = new Trie2Writable(10, 666);
|
||||
t1w.set(0x4567, 99);
|
||||
assertEquals(where(), 10, t1w.get(0x4566));
|
||||
assertEquals(where(), 99, t1w.get(0x4567));
|
||||
assertEquals(where(), 666, t1w.get(-1));
|
||||
assertEquals(where(), 666, t1w.get(0x110000));
|
||||
assertEquals("", 10, t1w.get(0x4566));
|
||||
assertEquals("", 99, t1w.get(0x4567));
|
||||
assertEquals("", 666, t1w.get(-1));
|
||||
assertEquals("", 666, t1w.get(0x110000));
|
||||
|
||||
|
||||
// SetRange
|
||||
t1w = new Trie2Writable(10, 666);
|
||||
t1w.setRange(13 /*start*/, 6666 /*end*/, 7788 /*value*/, false /*overwrite */);
|
||||
t1w.setRange(6000, 7000, 9900, true);
|
||||
assertEquals(where(), 10, t1w.get(12));
|
||||
assertEquals(where(), 7788, t1w.get(13));
|
||||
assertEquals(where(), 7788, t1w.get(5999));
|
||||
assertEquals(where(), 9900, t1w.get(6000));
|
||||
assertEquals(where(), 9900, t1w.get(7000));
|
||||
assertEquals(where(), 10, t1w.get(7001));
|
||||
assertEquals(where(), 666, t1w.get(0x110000));
|
||||
assertEquals("", 10, t1w.get(12));
|
||||
assertEquals("", 7788, t1w.get(13));
|
||||
assertEquals("", 7788, t1w.get(5999));
|
||||
assertEquals("", 9900, t1w.get(6000));
|
||||
assertEquals("", 9900, t1w.get(7000));
|
||||
assertEquals("", 10, t1w.get(7001));
|
||||
assertEquals("", 666, t1w.get(0x110000));
|
||||
|
||||
// setRange from a Trie2.Range
|
||||
// (Ranges are more commonly created by iterating over a Trie2,
|
||||
@ -276,19 +276,19 @@ public class Trie2Test extends TestFmwk {
|
||||
r.leadSurrogate = false;
|
||||
t1w = new Trie2Writable(0, 0xbad);
|
||||
t1w.setRange(r, true);
|
||||
assertEquals(where(), 0, t1w.get(49));
|
||||
assertEquals(where(), 0x12345678, t1w.get(50));
|
||||
assertEquals(where(), 0x12345678, t1w.get(52));
|
||||
assertEquals(where(), 0, t1w.get(53));
|
||||
assertEquals(null, 0, t1w.get(49));
|
||||
assertEquals("", 0x12345678, t1w.get(50));
|
||||
assertEquals("", 0x12345678, t1w.get(52));
|
||||
assertEquals("", 0, t1w.get(53));
|
||||
|
||||
|
||||
// setForLeadSurrogateCodeUnit / getFromU16SingleLead
|
||||
t1w = new Trie2Writable(10, 0xbad);
|
||||
assertEquals(where(), 10, t1w.getFromU16SingleLead((char)0x0d801));
|
||||
assertEquals("", 10, t1w.getFromU16SingleLead((char)0x0d801));
|
||||
t1w.setForLeadSurrogateCodeUnit((char)0xd801, 5000);
|
||||
t1w.set(0xd801, 6000);
|
||||
assertEquals(where(), 5000, t1w.getFromU16SingleLead((char)0x0d801));
|
||||
assertEquals(where(), 6000, t1w.get(0x0d801));
|
||||
assertEquals("", 5000, t1w.getFromU16SingleLead((char)0x0d801));
|
||||
assertEquals("", 6000, t1w.get(0x0d801));
|
||||
|
||||
// get(). Is covered by nearly every other test.
|
||||
|
||||
@ -298,12 +298,12 @@ public class Trie2Test extends TestFmwk {
|
||||
t1w.set(42, 5555);
|
||||
t1w.set(0x1ff00, 224);
|
||||
Trie2_16 t1_16 = t1w.toTrie2_16();
|
||||
assertTrue(where(), t1w.equals(t1_16));
|
||||
assertTrue("", t1w.equals(t1_16));
|
||||
// alter the writable Trie2 and then re-freeze.
|
||||
t1w.set(152, 129);
|
||||
t1_16 = t1w.toTrie2_16();
|
||||
assertTrue(where(), t1w.equals(t1_16));
|
||||
assertEquals(where(), 129, t1w.get(152));
|
||||
assertTrue("", t1w.equals(t1_16));
|
||||
assertEquals("", 129, t1w.get(152));
|
||||
|
||||
// Trie2_32 getAsFrozen_32()
|
||||
//
|
||||
@ -311,13 +311,13 @@ public class Trie2Test extends TestFmwk {
|
||||
t1w.set(42, 5555);
|
||||
t1w.set(0x1ff00, 224);
|
||||
Trie2_32 t1_32 = t1w.toTrie2_32();
|
||||
assertTrue(where(), t1w.equals(t1_32));
|
||||
assertTrue("", t1w.equals(t1_32));
|
||||
// alter the writable Trie2 and then re-freeze.
|
||||
t1w.set(152, 129);
|
||||
assertNotEquals(where(), t1_32, t1w);
|
||||
assertNotEquals("", t1_32, t1w);
|
||||
t1_32 = t1w.toTrie2_32();
|
||||
assertTrue(where(), t1w.equals(t1_32));
|
||||
assertEquals(where(), 129, t1w.get(152));
|
||||
assertTrue("", t1w.equals(t1_32));
|
||||
assertEquals("", 129, t1w.get(152));
|
||||
|
||||
|
||||
// serialize(OutputStream os, ValueWidth width)
|
||||
@ -336,22 +336,22 @@ public class Trie2Test extends TestFmwk {
|
||||
int serializedLen = t1w.toTrie2_16().serialize(os);
|
||||
// Fragile test. Serialized length could change with changes to compaction.
|
||||
// But it should not change unexpectedly.
|
||||
assertEquals(where(), 3508, serializedLen);
|
||||
assertEquals("", 3508, serializedLen);
|
||||
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
|
||||
Trie2 t1ws16 = Trie2.createFromSerialized(is);
|
||||
assertEquals(where(), t1ws16.getClass(), Trie2_16.class);
|
||||
assertEquals(where(), t1w, t1ws16);
|
||||
assertEquals("", t1ws16.getClass(), Trie2_16.class);
|
||||
assertEquals("", t1w, t1ws16);
|
||||
|
||||
// Serialize to 32 bits
|
||||
os.reset();
|
||||
serializedLen = t1w.toTrie2_32().serialize(os);
|
||||
// Fragile test. Serialized length could change with changes to compaction.
|
||||
// But it should not change unexpectedly.
|
||||
assertEquals(where(), 4332, serializedLen);
|
||||
assertEquals("", 4332, serializedLen);
|
||||
is = new ByteArrayInputStream(os.toByteArray());
|
||||
Trie2 t1ws32 = Trie2.createFromSerialized(is);
|
||||
assertEquals(where(), t1ws32.getClass(), Trie2_32.class);
|
||||
assertEquals(where(), t1w, t1ws32);
|
||||
assertEquals("", t1ws32.getClass(), Trie2_32.class);
|
||||
assertEquals("", t1w, t1ws32);
|
||||
} catch (IOException e) {
|
||||
errln(where() + e.toString());
|
||||
}
|
||||
@ -377,14 +377,14 @@ public class Trie2Test extends TestFmwk {
|
||||
for (i=0; it.hasNext(); i++) {
|
||||
ir = it.next();
|
||||
int expectedCP = Character.codePointAt(text, i);
|
||||
assertEquals(where() + " i="+i, expectedCP, ir.codePoint);
|
||||
assertEquals(where() + " i="+i, i, ir.index);
|
||||
assertEquals(where() + " i="+i, vals.charAt(i), ir.value);
|
||||
assertEquals("" + " i="+i, expectedCP, ir.codePoint);
|
||||
assertEquals("" + " i="+i, i, ir.index);
|
||||
assertEquals("" + " i="+i, vals.charAt(i), ir.value);
|
||||
if (expectedCP >= 0x10000) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
assertEquals(where(), text.length(), i);
|
||||
assertEquals("", text.length(), i);
|
||||
|
||||
// Check reverse iteration, starting at an intermediate point.
|
||||
it.set(5);
|
||||
@ -392,11 +392,11 @@ public class Trie2Test extends TestFmwk {
|
||||
ir = it.previous();
|
||||
int expectedCP = Character.codePointBefore(text, i);
|
||||
i -= (expectedCP < 0x10000? 1 : 2);
|
||||
assertEquals(where() + " i="+i, expectedCP, ir.codePoint);
|
||||
assertEquals(where() + " i="+i, i, ir.index);
|
||||
assertEquals(where() + " i="+i, vals.charAt(i), ir.value);
|
||||
assertEquals("" + " i="+i, expectedCP, ir.codePoint);
|
||||
assertEquals("" + " i="+i, i, ir.index);
|
||||
assertEquals("" + " i="+i, vals.charAt(i), ir.value);
|
||||
}
|
||||
assertEquals(where(), 0, i);
|
||||
assertEquals("", 0, i);
|
||||
|
||||
}
|
||||
|
||||
@ -628,10 +628,6 @@ public class Trie2Test extends TestFmwk {
|
||||
int start, limit;
|
||||
int i, countSpecials;
|
||||
|
||||
boolean isFrozen = trie instanceof Trie2_16 || trie instanceof Trie2_32;
|
||||
|
||||
String typeName= isFrozen ? "frozen trie" : "newTrie";
|
||||
|
||||
countSpecials=0; /*getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);*/
|
||||
errorValue = 0x0bad;
|
||||
initialValue = 0;
|
||||
@ -647,9 +643,10 @@ public class Trie2Test extends TestFmwk {
|
||||
|
||||
while(start<limit) {
|
||||
value2=trie.get(start);
|
||||
if(value!=value2) {
|
||||
errln("error: " + typeName + "(" + testName + ").get(" + Integer.toHexString(start) +") == " +
|
||||
Integer.toHexString(value2) + " instead of " + Integer.toHexString(value));
|
||||
if (value != value2) {
|
||||
// The redundant if, outside of the assert, is for speed.
|
||||
// It makes a significant difference for this test.
|
||||
assertEquals("wrong value for " + testName + " of " + Integer.toHexString(start), value, value2);
|
||||
}
|
||||
++start;
|
||||
}
|
||||
@ -699,25 +696,15 @@ public class Trie2Test extends TestFmwk {
|
||||
|
||||
// Check that Trie enumeration produces the same contents as simple get()
|
||||
for (Trie2.Range range: trie) {
|
||||
if (false) {
|
||||
System.out.println("(start, end, value) = (" + Integer.toHexString(range.startCodePoint) +
|
||||
", " + Integer.toHexString(range.endCodePoint) +
|
||||
", " + Integer.toHexString(range.value) + ")");
|
||||
}
|
||||
String wa = where() + "a"; // TODO: fix asserts to do where only on error.
|
||||
String wb = where() + "b";
|
||||
String wc = where() + "c";
|
||||
for (int cp=range.startCodePoint; cp<=range.endCodePoint; cp++) {
|
||||
if (range.leadSurrogate) {
|
||||
assertTrue(wa, cp>=(char)0xd800 && cp<(char)0xdc00);
|
||||
assertEquals(wb, range.value, trie.getFromU16SingleLead((char)cp));
|
||||
assertTrue(testName, cp>=(char)0xd800 && cp<(char)0xdc00);
|
||||
assertEquals(testName, range.value, trie.getFromU16SingleLead((char)cp));
|
||||
} else {
|
||||
assertEquals(wc, range.value, trie.get(cp));
|
||||
assertEquals(testName, range.value, trie.get(cp));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (false) System.out.println("\n\n");
|
||||
|
||||
}
|
||||
|
||||
// Was testTrieRanges in ICU4C. Renamed to not conflict with ICU4J test framework.
|
||||
@ -742,8 +729,8 @@ public class Trie2Test extends TestFmwk {
|
||||
// Run the same tests against locally contructed Tries.
|
||||
Trie2Writable trieW = genTrieFromSetRanges(setRanges);
|
||||
trieGettersTest(testName, trieW, checkRanges);
|
||||
assertEquals(where(), trieW, trie16); // Locally built tries must be
|
||||
assertEquals(where(), trieW, trie32); // the same as those imported from ICU4C
|
||||
assertEquals("", trieW, trie16); // Locally built tries must be
|
||||
assertEquals("", trieW, trie32); // the same as those imported from ICU4C
|
||||
|
||||
|
||||
Trie2_32 trie32a = trieW.toTrie2_32();
|
||||
@ -751,6 +738,7 @@ public class Trie2Test extends TestFmwk {
|
||||
|
||||
Trie2_16 trie16a = trieW.toTrie2_16();
|
||||
trieGettersTest(testName, trie16a, checkRanges);
|
||||
|
||||
}
|
||||
|
||||
// Was "TrieTest" in trie2test.c
|
||||
@ -765,8 +753,6 @@ public class Trie2Test extends TestFmwk {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// TODO: push this where() function up into the test framework implementation of assert
|
||||
private String where() {
|
||||
StackTraceElement[] st = new Throwable().getStackTrace();
|
||||
String w = "File: " + st[1].getFileName() + ", Line " + st[1].getLineNumber();
|
||||
|
Loading…
Reference in New Issue
Block a user