From 339ca27b17ebbfd1c36b32c1177cec95448a8a52 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 10 Jan 2011 23:53:47 +0000 Subject: [PATCH] ICU-8167 port UCharsTrieTest to Java X-SVN-Rev: 29281 --- .../core/src/com/ibm/icu/impl/CharsTrie.java | 4 +- .../com/ibm/icu/impl/CharsTrieBuilder.java | 2 +- .../ibm/icu/dev/test/util/BytesTrieTest.java | 14 +- .../ibm/icu/dev/test/util/CharsTrieTest.java | 899 ++++++++++++++++++ .../com/ibm/icu/dev/test/util/TestAll.java | 4 +- .../src/com/ibm/icu/dev/test/TestFmwk.java | 23 +- 6 files changed, 920 insertions(+), 26 deletions(-) create mode 100644 icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CharsTrieTest.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrie.java index cdcd07df63..c788ffab85 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrie.java @@ -361,7 +361,7 @@ public final class CharsTrie implements Cloneable, Iterable { * @return A new CharsTrie.Iterator. */ public static Iterator iterator(CharSequence trieChars, int offset, int maxStringLength) { - return new Iterator(trieChars, offset, -1, 0); + return new Iterator(trieChars, offset, -1, maxStringLength); } /** @@ -559,7 +559,7 @@ public final class CharsTrie implements Cloneable, Iterable { private int initialRemainingMatchLength_; private boolean skipValue_; // Skip intermediate value which was already delivered. - private StringBuilder str_; + private StringBuilder str_=new StringBuilder(); private int maxLength_; private Entry entry_=new Entry(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrieBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrieBuilder.java index eacfc5effe..cb3422ff58 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrieBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharsTrieBuilder.java @@ -367,7 +367,7 @@ public final class CharsTrieBuilder extends StringTrieBuilder { return hash; } - private StringBuilder strings; + private StringBuilder strings=new StringBuilder(); private ArrayList elements=new ArrayList(); // char serialization of the trie. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java index 8a2a1f086e..ef68bc9cc1 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java @@ -40,7 +40,7 @@ public class BytesTrieTest extends TestFmwk { } try { byte[] equal=new byte[] { 0x3d }; // "=" - builder.add(equal, 0, 0).add(equal, 0, 1).build(StringTrieBuilder.Option.FAST); + builder.add(equal, 1, 0).add(equal, 1, 1).build(StringTrieBuilder.Option.FAST); errln("BytesTrieBuilder.build() did not detect duplicates"); return; } catch(IllegalArgumentException e) { @@ -257,7 +257,7 @@ public class BytesTrieTest extends TestFmwk { trie.next('j'); trie.next('a'); trie.next('n'); - // hasUniqueValue() directly after next() + // getUniqueValue() directly after next() if((uniqueValue=trie.getUniqueValue())!=((1<<1)|1)) { errln("not unique value 1 after \"jan\": instead "+uniqueValue); } @@ -269,11 +269,11 @@ public class BytesTrieTest extends TestFmwk { if(trie.next('n')!=BytesTrie.Result.INTERMEDIATE_VALUE || 6!=trie.getValue()) { errln("not normal value 6 after \"jun\""); } - // hasUniqueValue() after getValue() + // getUniqueValue() after getValue() if((uniqueValue=trie.getUniqueValue())!=((6<<1)|1)) { errln("not unique value 6 after \"jun\""); } - // hasUniqueValue() from within a linear-match node + // getUniqueValue() from within a linear-match node trie.first('a'); trie.next('u'); if((uniqueValue=trie.getUniqueValue())!=((8<<1)|1)) { @@ -549,12 +549,12 @@ public class BytesTrieTest extends TestFmwk { int c=data[i].s.charAt(0); BytesTrie.Result firstResult=trie.first(c); int firstValue=firstResult.hasValue() ? trie.getValue() : -1; - int c1= data[i].s.length()>1 ? data[i].s.charAt(1) : 0; - BytesTrie.Result nextResult=trie.next(c1); + int nextC=data[i].s.length()>1 ? data[i].s.charAt(1) : 0; + BytesTrie.Result nextResult=trie.next(nextC); if(firstResult!=trie.reset().next(c) || firstResult!=trie.current() || firstValue!=(firstResult.hasValue() ? trie.getValue() : -1) || - nextResult!=trie.next(c1) + nextResult!=trie.next(nextC) ) { errln(String.format("trie.first(%c)!=trie.reset().next(same) for %s", c, data[i].s)); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CharsTrieTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CharsTrieTest.java new file mode 100644 index 0000000000..7f43d09c10 --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CharsTrieTest.java @@ -0,0 +1,899 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* created on: 2011jan10 +* created by: Markus W. Scherer +* ported from ICU4C ucharstrietest.h/.cpp +*/ + +package com.ibm.icu.dev.test.util; + +import java.nio.CharBuffer; +import java.util.NoSuchElementException; + +import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.impl.BytesTrie; +import com.ibm.icu.impl.CharsTrie; +import com.ibm.icu.impl.CharsTrieBuilder; +import com.ibm.icu.impl.StringTrieBuilder; +import com.ibm.icu.text.UnicodeSet; + +public class CharsTrieTest extends TestFmwk { + public static void main(String[] args) throws Exception { + new CharsTrieTest().run(args); + } + public CharsTrieTest() {} + + // All test functions have a TestNN prefix where NN is a double-digit number. + // This is so that when tests are run in sorted order + // the simpler ones are run first. + // If there is a problem, the simpler ones are easier to step through. + + public void Test00Builder() { + CharsTrieBuilder builder=new CharsTrieBuilder(); + try { + builder.build(StringTrieBuilder.Option.FAST); + errln("CharsTrieBuilder().build() did not throw IndexOutOfBoundsException"); + return; + } catch(IndexOutOfBoundsException e) { + // good + } + try { + builder.add("=", 0).add("=", 1).build(StringTrieBuilder.Option.FAST); + errln("CharsTrieBuilder.build() did not detect duplicates"); + return; + } catch(IllegalArgumentException e) { + // good + } + } + + private static final class StringAndValue { + public StringAndValue(String str, int val) { + s=str; + value=val; + } + + public String s; + public int value; + } + // Note: C++ StringAndValue initializers converted to Java syntax + // with Eclipse Find/Replace regular expressions: + // Find: \{ (".*", [-0-9xa-fA-F]+) \} + // Replace with: new StringAndValue($1) + + public void Test10Empty() { + final StringAndValue[] data={ + new StringAndValue("", 0) + }; + checkData(data); + } + + public void Test11_a() { + final StringAndValue[] data={ + new StringAndValue("a", 1) + }; + checkData(data); + } + + public void Test12_a_ab() { + final StringAndValue[] data={ + new StringAndValue("a", 1), + new StringAndValue("ab", 100) + }; + checkData(data); + } + + public void Test20ShortestBranch() { + final StringAndValue[] data={ + new StringAndValue("a", 1000), + new StringAndValue("b", 2000) + }; + checkData(data); + } + + public void Test21Branches() { + final StringAndValue[] data={ + new StringAndValue("a", 0x10), + new StringAndValue("cc", 0x40), + new StringAndValue("e", 0x100), + new StringAndValue("ggg", 0x400), + new StringAndValue("i", 0x1000), + new StringAndValue("kkkk", 0x4000), + new StringAndValue("n", 0x10000), + new StringAndValue("ppppp", 0x40000), + new StringAndValue("r", 0x100000), + new StringAndValue("sss", 0x200000), + new StringAndValue("t", 0x400000), + new StringAndValue("uu", 0x800000), + new StringAndValue("vv", 0x7fffffff), + new StringAndValue("zz", 0x80000000) + }; + for(int length=2; length<=data.length; ++length) { + logln("TestBranches length="+length); + checkData(data, length); + } + } + + public void Test22LongSequence() { + final StringAndValue[] data={ + new StringAndValue("a", -1), + // sequence of linear-match nodes + new StringAndValue("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2), + // more than 256 units + new StringAndValue( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3) + }; + checkData(data); + } + + public void Test23LongBranch() { + // Split-branch and interesting compact-integer values. + final StringAndValue[] data={ + new StringAndValue("a", -2), + new StringAndValue("b", -1), + new StringAndValue("c", 0), + new StringAndValue("d2", 1), + new StringAndValue("f", 0x3f), + new StringAndValue("g", 0x40), + new StringAndValue("h", 0x41), + new StringAndValue("j23", 0x1900), + new StringAndValue("j24", 0x19ff), + new StringAndValue("j25", 0x1a00), + new StringAndValue("k2", 0x1a80), + new StringAndValue("k3", 0x1aff), + new StringAndValue("l234567890", 0x1b00), + new StringAndValue("l234567890123", 0x1b01), + new StringAndValue("nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff), + new StringAndValue("oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000), + new StringAndValue("pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000), + new StringAndValue("r", 0x333333), + new StringAndValue("s2345", 0x4444444), + new StringAndValue("t234567890", 0x77777777), + new StringAndValue("z", 0x80000001) + }; + checkData(data); + } + + public void Test24ValuesForState() { + // Check that saveState() and resetToState() interact properly + // with next() and current(). + final StringAndValue[] data={ + new StringAndValue("a", -1), + new StringAndValue("ab", -2), + new StringAndValue("abc", -3), + new StringAndValue("abcd", -4), + new StringAndValue("abcde", -5), + new StringAndValue("abcdef", -6) + }; + checkData(data); + } + + public void Test30Compact() { + // Duplicate trailing strings and values provide opportunities for compacting. + final StringAndValue[] data={ + new StringAndValue("+", 0), + new StringAndValue("+august", 8), + new StringAndValue("+december", 12), + new StringAndValue("+july", 7), + new StringAndValue("+june", 6), + new StringAndValue("+november", 11), + new StringAndValue("+october", 10), + new StringAndValue("+september", 9), + new StringAndValue("-", 0), + new StringAndValue("-august", 8), + new StringAndValue("-december", 12), + new StringAndValue("-july", 7), + new StringAndValue("-june", 6), + new StringAndValue("-november", 11), + new StringAndValue("-october", 10), + new StringAndValue("-september", 9), + // The l+n branch (with its sub-nodes) is a duplicate but will be written + // both times because each time it follows a different linear-match node. + new StringAndValue("xjuly", 7), + new StringAndValue("xjune", 6) + }; + checkData(data); + } + + public void Test31FirstForCodePoint() { + final StringAndValue[] data={ + new StringAndValue("a", 1), + new StringAndValue("a\ud800", 2), + new StringAndValue("a\ud800\udc00", 3), // "a\\U00010000" + new StringAndValue("\ud840", 4), + new StringAndValue("\ud840\udc00\udbff", 5), // "\\U00020000\udbff" + new StringAndValue("\ud840\udc00\udbff\udfff", 6), // "\\U00020000\\U0010ffff" + new StringAndValue("\ud840\udc00\udbff\udfffz", 7), // "\\U00020000\\U0010ffffz" + new StringAndValue("\ud900\udc00xy", 8), // "\\U00050000xy" + new StringAndValue("\ud900\udc00xyz", 9) // "\\U00050000xyz" + }; + checkData(data); + } + + public void Test32NextForCodePoint() { + final StringAndValue[] data={ + // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff" + new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000), + // "\u4dff\\U00010000\u9999\\U00020002" + new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02", 44444), + // "\u4dff\\U000103ff" + new StringAndValue("\u4dff\ud800\udfff", 99999) + }; + CharsTrieBuilder builder=new CharsTrieBuilder(); + CharBuffer trieChars=buildTrie(data, data.length, builder, StringTrieBuilder.Option.FAST); + CharsTrie trie=new CharsTrie(trieChars, 0); + BytesTrie.Result result; + if( (result=trie.nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0xdfff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10ffff))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || + trie.getValue()!=2000000000 + ) { + errln("CharsTrie.nextForCodePoint() fails for "+data[0].s); + } + if( (result=trie.firstForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20002))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || + trie.getValue()!=44444 + ) { + errln("CharsTrie.nextForCodePoint() fails for "+data[1].s); + } + if( (result=trie.reset().nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20222))!=BytesTrie.Result.NO_MATCH || result!=trie.current() // no match for trail surrogate + ) { + errln("CharsTrie.nextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222"); + } + if( (result=trie.reset().nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x103ff))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || + trie.getValue()!=99999 + ) { + errln("CharsTrie.nextForCodePoint() fails for "+data[2].s); + } + } + + // Generate (string, value) pairs. + // The first string (before next()) will be empty. + private static final class Generator { + public Generator() { + value=4711; + num=0; + } + public void next() { + char c; + s.setLength(0); + s.append(c=(char)(value>>16)); + s.append((char)(value>>4)); + if((value&1)!=0) { + s.append((char)value); + } + set.add(c); + value+=((value>>5)&0x7ff)*3+1; + ++num; + } + public CharSequence getString() { return s; } + public int getValue() { return value; } + public int countUniqueFirstChars() { return set.size(); } + public int getIndex() { return num; } + + private StringBuilder s=new StringBuilder(); + private UnicodeSet set=new UnicodeSet(); + private int value; + private int num; + }; + + private CharBuffer buildLargeTrie(CharsTrieBuilder builder, int numUniqueFirst) { + Generator gen=new Generator(); + builder.clear(); + while(gen.countUniqueFirstChars()1 ? expectedString.charAt(1) : 0; + BytesTrie.Result firstResult=trie.first(c); + int firstValue=firstResult.hasValue() ? trie.getValue() : -1; + BytesTrie.Result nextResult=trie.next(nextCp); + if(firstResult!=trie.reset().next(c) || + firstResult!=trie.current() || + firstValue!=(firstResult.hasValue() ? trie.getValue() : -1) || + nextResult!=trie.next(nextCp) + ) { + errln(String.format("trie.first(U+%04X)!=trie.reset().next(same) for %s", + c, data[i].s)); + } + c=expectedString.codePointAt(0); + int cLength=Character.charCount(c); + nextCp=expectedString.length()>cLength ? expectedString.codePointAt(cLength) : 0; + firstResult=trie.firstForCodePoint(c); + firstValue=firstResult.hasValue() ? trie.getValue() : -1; + nextResult=trie.nextForCodePoint(nextCp); + if(firstResult!=trie.reset().nextForCodePoint(c) || + firstResult!=trie.current() || + firstValue!=(firstResult.hasValue() ? trie.getValue() : -1) || + nextResult!=trie.nextForCodePoint(nextCp) + ) { + errln(String.format("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", + c, data[i].s)); + } + } + } + + private void checkNext(CharSequence trieChars, StringAndValue[] data, int dataLength) { + CharsTrie trie=new CharsTrie(trieChars, 0); + CharsTrie.State state=new CharsTrie.State(); + for(int i=0; i 0xfffff) { result.append("\\U00"); result.append(hex(ch)); } else if (ch > 0xffff) { result.append("\\U000"); result.append(hex(ch)); - } else if (ch > 0x7f) { + } else if (ch < 0x20 || 0x7e < ch) { result.append("\\u"); result.append(hex(ch)); } else { @@ -957,9 +953,6 @@ public class TestFmwk extends AbstractTestLog { } return result.toString(); } - public static String prettify(StringBuffer s) { - return prettify(s.toString()); - } private static java.util.GregorianCalendar cal;