2010-12-31 18:21:36 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
2011-01-05 21:05:47 +00:00
|
|
|
* Copyright (C) 2010-2011, International Business Machines
|
2010-12-31 18:21:36 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*******************************************************************************
|
2011-01-05 21:05:47 +00:00
|
|
|
* file name: ucharstrietest.cpp
|
2010-12-31 18:21:36 +00:00
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2010nov16
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/uniset.h"
|
2011-01-05 21:05:47 +00:00
|
|
|
#include "ucharstrie.h"
|
|
|
|
#include "ucharstriebuilder.h"
|
2010-12-31 18:21:36 +00:00
|
|
|
#include "intltest.h"
|
|
|
|
|
|
|
|
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
|
|
|
|
|
|
|
struct StringAndValue {
|
|
|
|
const char *s;
|
|
|
|
int32_t value;
|
|
|
|
};
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
class UCharsTrieTest : public IntlTest {
|
2010-12-31 18:21:36 +00:00
|
|
|
public:
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieTest() {}
|
|
|
|
virtual ~UCharsTrieTest();
|
2010-12-31 18:21:36 +00:00
|
|
|
|
|
|
|
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
|
|
|
void TestBuilder();
|
|
|
|
void TestEmpty();
|
|
|
|
void Test_a();
|
|
|
|
void Test_a_ab();
|
|
|
|
void TestShortestBranch();
|
|
|
|
void TestBranches();
|
|
|
|
void TestLongSequence();
|
|
|
|
void TestLongBranch();
|
|
|
|
void TestValuesForState();
|
|
|
|
void TestCompact();
|
|
|
|
void TestNextForCodePoint();
|
|
|
|
void TestFirstForCodePoint();
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
|
2010-12-31 18:21:36 +00:00
|
|
|
void TestLargeTrie();
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString &result);
|
|
|
|
void TestHasUniqueValue();
|
|
|
|
void TestGetNextUChars();
|
|
|
|
void TestIteratorFromBranch();
|
|
|
|
void TestIteratorFromLinearMatch();
|
|
|
|
void TestTruncatingIteratorFromRoot();
|
|
|
|
void TestTruncatingIteratorFromLinearMatchShort();
|
|
|
|
void TestTruncatingIteratorFromLinearMatchLong();
|
|
|
|
|
|
|
|
void checkData(const StringAndValue data[], int32_t dataLength);
|
2011-01-05 21:05:47 +00:00
|
|
|
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
2010-12-31 18:21:36 +00:00
|
|
|
UBool buildTrie(const StringAndValue data[], int32_t dataLength,
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result);
|
2010-12-31 18:21:36 +00:00
|
|
|
void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
|
|
|
void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
|
|
|
void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
|
|
|
void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
|
|
|
void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
2011-01-06 18:40:26 +00:00
|
|
|
void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
2010-12-31 18:21:36 +00:00
|
|
|
};
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
extern IntlTest *createUCharsTrieTest() {
|
|
|
|
return new UCharsTrieTest();
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieTest::~UCharsTrieTest() {
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
2010-12-31 18:21:36 +00:00
|
|
|
if(exec) {
|
2011-01-05 21:05:47 +00:00
|
|
|
logln("TestSuite UCharsTrieTest: ");
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
TESTCASE_AUTO_BEGIN;
|
|
|
|
TESTCASE_AUTO(TestBuilder);
|
|
|
|
TESTCASE_AUTO(TestEmpty);
|
|
|
|
TESTCASE_AUTO(Test_a);
|
|
|
|
TESTCASE_AUTO(Test_a_ab);
|
|
|
|
TESTCASE_AUTO(TestShortestBranch);
|
|
|
|
TESTCASE_AUTO(TestBranches);
|
|
|
|
TESTCASE_AUTO(TestLongSequence);
|
|
|
|
TESTCASE_AUTO(TestLongBranch);
|
|
|
|
TESTCASE_AUTO(TestValuesForState);
|
|
|
|
TESTCASE_AUTO(TestCompact);
|
|
|
|
TESTCASE_AUTO(TestNextForCodePoint);
|
|
|
|
TESTCASE_AUTO(TestFirstForCodePoint);
|
|
|
|
TESTCASE_AUTO(TestLargeTrie);
|
|
|
|
TESTCASE_AUTO(TestHasUniqueValue);
|
|
|
|
TESTCASE_AUTO(TestGetNextUChars);
|
|
|
|
TESTCASE_AUTO(TestIteratorFromBranch);
|
|
|
|
TESTCASE_AUTO(TestIteratorFromLinearMatch);
|
|
|
|
TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
|
|
|
|
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
|
|
|
|
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
|
|
|
|
TESTCASE_AUTO_END;
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestBuilder() {
|
2010-12-31 18:21:36 +00:00
|
|
|
IcuTestErrorCode errorCode(*this, "TestBuilder()");
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
2010-12-31 18:21:36 +00:00
|
|
|
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
2010-12-31 18:21:36 +00:00
|
|
|
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrieBuilder.build() did not detect duplicates");
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestEmpty() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "", 0 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::Test_a() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", 1 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::Test_a_ab() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", 1 },
|
|
|
|
{ "ab", 100 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestShortestBranch() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", 1000 },
|
|
|
|
{ "b", 2000 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestBranches() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", 0x10 },
|
|
|
|
{ "cc", 0x40 },
|
|
|
|
{ "e", 0x100 },
|
|
|
|
{ "ggg", 0x400 },
|
|
|
|
{ "i", 0x1000 },
|
|
|
|
{ "kkkk", 0x4000 },
|
|
|
|
{ "n", 0x10000 },
|
|
|
|
{ "ppppp", 0x40000 },
|
|
|
|
{ "r", 0x100000 },
|
|
|
|
{ "sss", 0x200000 },
|
|
|
|
{ "t", 0x400000 },
|
|
|
|
{ "uu", 0x800000 },
|
|
|
|
{ "vv", 0x7fffffff },
|
|
|
|
{ "zz", 0x80000000 }
|
|
|
|
};
|
|
|
|
for(int32_t length=2; length<=LENGTHOF(data); ++length) {
|
|
|
|
infoln("TestBranches length=%d", (int)length);
|
|
|
|
checkData(data, length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestLongSequence() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", -1 },
|
|
|
|
// sequence of linear-match nodes
|
|
|
|
{ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
|
|
|
|
// more than 256 units
|
|
|
|
{ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestLongBranch() {
|
2010-12-31 18:21:36 +00:00
|
|
|
// Split-branch and interesting compact-integer values.
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", -2 },
|
|
|
|
{ "b", -1 },
|
|
|
|
{ "c", 0 },
|
|
|
|
{ "d2", 1 },
|
|
|
|
{ "f", 0x3f },
|
|
|
|
{ "g", 0x40 },
|
|
|
|
{ "h", 0x41 },
|
|
|
|
{ "j23", 0x1900 },
|
|
|
|
{ "j24", 0x19ff },
|
|
|
|
{ "j25", 0x1a00 },
|
|
|
|
{ "k2", 0x1a80 },
|
|
|
|
{ "k3", 0x1aff },
|
|
|
|
{ "l234567890", 0x1b00 },
|
|
|
|
{ "l234567890123", 0x1b01 },
|
|
|
|
{ "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
|
|
|
|
{ "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
|
|
|
|
{ "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
|
|
|
|
{ "r", 0x333333 },
|
|
|
|
{ "s2345", 0x4444444 },
|
|
|
|
{ "t234567890", 0x77777777 },
|
|
|
|
{ "z", 0x80000001 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestValuesForState() {
|
2010-12-31 18:21:36 +00:00
|
|
|
// Check that saveState() and resetToState() interact properly
|
|
|
|
// with next() and current().
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", -1 },
|
|
|
|
{ "ab", -2 },
|
|
|
|
{ "abc", -3 },
|
|
|
|
{ "abcd", -4 },
|
|
|
|
{ "abcde", -5 },
|
|
|
|
{ "abcdef", -6 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestCompact() {
|
2010-12-31 18:21:36 +00:00
|
|
|
// Duplicate trailing strings and values provide opportunities for compacting.
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "+", 0 },
|
|
|
|
{ "+august", 8 },
|
|
|
|
{ "+december", 12 },
|
|
|
|
{ "+july", 7 },
|
|
|
|
{ "+june", 6 },
|
|
|
|
{ "+november", 11 },
|
|
|
|
{ "+october", 10 },
|
|
|
|
{ "+september", 9 },
|
|
|
|
{ "-", 0 },
|
|
|
|
{ "-august", 8 },
|
|
|
|
{ "-december", 12 },
|
|
|
|
{ "-july", 7 },
|
|
|
|
{ "-june", 6 },
|
|
|
|
{ "-november", 11 },
|
|
|
|
{ "-october", 10 },
|
|
|
|
{ "-september", 9 },
|
|
|
|
// The l+n branch (with its sub-nodes) is a duplicate but will be written
|
|
|
|
// both times because each time it follows a different linear-match node.
|
|
|
|
{ "xjuly", 7 },
|
|
|
|
{ "xjune", 6 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestFirstForCodePoint() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "a", 1 },
|
|
|
|
{ "a\\uD800", 2 },
|
|
|
|
{ "a\\U00010000", 3 },
|
|
|
|
{ "\\uD840", 4 },
|
|
|
|
{ "\\U00020000\\udbff", 5 },
|
|
|
|
{ "\\U00020000\\U0010ffff", 6 },
|
|
|
|
{ "\\U00020000\\U0010ffffz", 7 },
|
|
|
|
{ "\\U00050000xy", 8 },
|
|
|
|
{ "\\U00050000xyz", 9 }
|
|
|
|
};
|
|
|
|
checkData(data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestNextForCodePoint() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
|
|
|
|
{ "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
|
|
|
|
{ "\\u4dff\\U000103ff", 99999 }
|
|
|
|
};
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
|
|
|
UStringTrieResult result;
|
|
|
|
if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
2010-12-31 18:21:36 +00:00
|
|
|
trie.getValue()!=2000000000
|
|
|
|
) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
2010-12-31 18:21:36 +00:00
|
|
|
trie.getValue()!=44444
|
|
|
|
) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
|
2010-12-31 18:21:36 +00:00
|
|
|
) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
|
|
|
(result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
2010-12-31 18:21:36 +00:00
|
|
|
trie.getValue()!=99999
|
|
|
|
) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Definitions in the anonymous namespace are invisible outside this file.
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Generate (string, value) pairs.
|
|
|
|
// The first string (before next()) will be empty.
|
|
|
|
class Generator {
|
|
|
|
public:
|
|
|
|
Generator() : value(4711), num(0) {}
|
|
|
|
void next() {
|
|
|
|
UChar c;
|
|
|
|
s.truncate(0);
|
|
|
|
s.append(c=(UChar)(value>>16));
|
|
|
|
s.append((UChar)(value>>4));
|
|
|
|
if(value&1) {
|
|
|
|
s.append((UChar)value);
|
|
|
|
}
|
|
|
|
set.add(c);
|
|
|
|
value+=((value>>5)&0x7ff)*3+1;
|
|
|
|
++num;
|
|
|
|
}
|
|
|
|
const UnicodeString &getString() const { return s; }
|
|
|
|
int32_t getValue() const { return value; }
|
|
|
|
int32_t countUniqueFirstChars() const { return set.size(); }
|
|
|
|
int32_t getIndex() const { return num; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
UnicodeString s;
|
|
|
|
UnicodeSet set;
|
|
|
|
int32_t value;
|
|
|
|
int32_t num;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result,
|
|
|
|
int32_t numUniqueFirst) {
|
2010-12-31 18:21:36 +00:00
|
|
|
IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
|
|
|
|
Generator gen;
|
|
|
|
builder.clear();
|
|
|
|
while(gen.countUniqueFirstChars()<numUniqueFirst) {
|
|
|
|
builder.add(gen.getString(), gen.getValue(), errorCode);
|
|
|
|
gen.next();
|
|
|
|
}
|
|
|
|
infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
|
2011-01-05 21:05:47 +00:00
|
|
|
builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode);
|
2010-12-31 18:21:36 +00:00
|
|
|
logln("serialized trie size: %ld UChars\n", (long)result.length());
|
|
|
|
return errorCode.isSuccess();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Exercise a large branch node.
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestLargeTrie() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
|
|
|
if(!buildLargeTrie(builder, trieUChars, 1111)) {
|
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
Generator gen;
|
|
|
|
while(gen.countUniqueFirstChars()<1111) {
|
|
|
|
UnicodeString x(gen.getString());
|
|
|
|
int32_t value=gen.getValue();
|
|
|
|
if(!x.isEmpty()) {
|
2011-01-05 21:05:47 +00:00
|
|
|
if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) {
|
|
|
|
errln("next(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
|
2010-12-31 18:21:36 +00:00
|
|
|
x[0], (long)gen.getIndex());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
x.remove(0, 1);
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UStringTrieResult result=trie.next(x.getBuffer(), x.length());
|
|
|
|
if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
|
2010-12-31 18:21:36 +00:00
|
|
|
errln("next(%d chars U+%04X U+%04X)!=hasValue or "
|
|
|
|
"next()!=current() or getValue() wrong "
|
|
|
|
"for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
gen.next();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
enum {
|
|
|
|
u_a=0x61,
|
|
|
|
u_b=0x62,
|
|
|
|
u_c=0x63,
|
|
|
|
u_j=0x6a,
|
|
|
|
u_n=0x6e,
|
|
|
|
u_r=0x72,
|
|
|
|
u_u=0x75,
|
|
|
|
u_y=0x79
|
|
|
|
};
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
|
|
|
UnicodeString &result) {
|
2010-12-31 18:21:36 +00:00
|
|
|
// All types of nodes leading to the same value,
|
|
|
|
// for code coverage of recursive functions.
|
|
|
|
// In particular, we need a lot of branches on some single level
|
|
|
|
// to exercise a split-branch node.
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "august", 8 },
|
|
|
|
{ "jan", 1 },
|
|
|
|
{ "jan.", 1 },
|
|
|
|
{ "jana", 1 },
|
|
|
|
{ "janbb", 1 },
|
|
|
|
{ "janc", 1 },
|
|
|
|
{ "janddd", 1 },
|
|
|
|
{ "janee", 1 },
|
|
|
|
{ "janef", 1 },
|
|
|
|
{ "janf", 1 },
|
|
|
|
{ "jangg", 1 },
|
|
|
|
{ "janh", 1 },
|
|
|
|
{ "janiiii", 1 },
|
|
|
|
{ "janj", 1 },
|
|
|
|
{ "jankk", 1 },
|
|
|
|
{ "jankl", 1 },
|
|
|
|
{ "jankmm", 1 },
|
|
|
|
{ "janl", 1 },
|
|
|
|
{ "janm", 1 },
|
|
|
|
{ "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
|
|
|
|
{ "jano", 1 },
|
|
|
|
{ "janpp", 1 },
|
|
|
|
{ "janqqq", 1 },
|
|
|
|
{ "janr", 1 },
|
|
|
|
{ "januar", 1 },
|
|
|
|
{ "january", 1 },
|
|
|
|
{ "july", 7 },
|
|
|
|
{ "jun", 6 },
|
|
|
|
{ "jun.", 6 },
|
|
|
|
{ "june", 6 }
|
|
|
|
};
|
|
|
|
return buildTrie(data, LENGTHOF(data), builder, buildOption, result);
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestHasUniqueValue() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
int32_t uniqueValue;
|
|
|
|
if(trie.hasUniqueValue(uniqueValue)) {
|
|
|
|
errln("unique value at root");
|
|
|
|
}
|
|
|
|
trie.next(u_j);
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_n);
|
|
|
|
// hasUniqueValue() directly after next()
|
|
|
|
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) {
|
|
|
|
errln("not unique value 1 after \"jan\"");
|
|
|
|
}
|
|
|
|
trie.first(u_j);
|
|
|
|
trie.next(u_u);
|
|
|
|
if(trie.hasUniqueValue(uniqueValue)) {
|
|
|
|
errln("unique value after \"ju\"");
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
|
2010-12-31 18:21:36 +00:00
|
|
|
errln("not normal value 6 after \"jun\"");
|
|
|
|
}
|
|
|
|
// hasUniqueValue() after getValue()
|
|
|
|
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) {
|
|
|
|
errln("not unique value 6 after \"jun\"");
|
|
|
|
}
|
|
|
|
// hasUniqueValue() from within a linear-match node
|
|
|
|
trie.first(u_a);
|
|
|
|
trie.next(u_u);
|
|
|
|
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) {
|
|
|
|
errln("not unique value 8 after \"au\"");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class UnicodeStringAppendable : public Appendable {
|
|
|
|
public:
|
|
|
|
UnicodeStringAppendable(UnicodeString &dest) : str(dest) {}
|
|
|
|
virtual Appendable &append(UChar c) { str.append(c); return *this; }
|
|
|
|
UnicodeStringAppendable &reset() { str.remove(); return *this; }
|
|
|
|
private:
|
|
|
|
UnicodeString &str;
|
|
|
|
};
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestGetNextUChars() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString buffer;
|
|
|
|
UnicodeStringAppendable app(buffer);
|
|
|
|
int32_t count=trie.getNextUChars(app);
|
|
|
|
if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
|
|
|
|
errln("months getNextUChars()!=[aj] at root");
|
|
|
|
}
|
|
|
|
trie.next(u_j);
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_n);
|
|
|
|
// getNextUChars() directly after next()
|
|
|
|
count=trie.getNextUChars(app.reset());
|
|
|
|
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
|
|
|
|
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
|
|
|
|
}
|
|
|
|
// getNextUChars() after getValue()
|
2011-01-05 21:05:47 +00:00
|
|
|
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
2010-12-31 18:21:36 +00:00
|
|
|
count=trie.getNextUChars(app.reset());
|
|
|
|
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
|
|
|
|
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
|
|
|
|
}
|
|
|
|
// getNextUChars() from a linear-match node
|
|
|
|
trie.next(u_u);
|
|
|
|
count=trie.getNextUChars(app.reset());
|
|
|
|
if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
|
|
|
|
errln("months getNextUChars()!=[a] after \"janu\"");
|
|
|
|
}
|
|
|
|
trie.next(u_a);
|
|
|
|
count=trie.getNextUChars(app.reset());
|
|
|
|
if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
|
|
|
|
errln("months getNextUChars()!=[r] after \"janua\"");
|
|
|
|
}
|
|
|
|
trie.next(u_r);
|
|
|
|
trie.next(u_y);
|
|
|
|
// getNextUChars() after a final match
|
|
|
|
count=trie.getNextUChars(app.reset());
|
|
|
|
if(count!=0 || buffer.length()!=0) {
|
|
|
|
errln("months getNextUChars()!=[] after \"january\"");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestIteratorFromBranch() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
// Go to a branch node.
|
|
|
|
trie.next(u_j);
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_n);
|
|
|
|
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
|
|
|
// following "jan".
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "", 1 },
|
|
|
|
{ ".", 1 },
|
|
|
|
{ "a", 1 },
|
|
|
|
{ "bb", 1 },
|
|
|
|
{ "c", 1 },
|
|
|
|
{ "ddd", 1 },
|
|
|
|
{ "ee", 1 },
|
|
|
|
{ "ef", 1 },
|
|
|
|
{ "f", 1 },
|
|
|
|
{ "gg", 1 },
|
|
|
|
{ "h", 1 },
|
|
|
|
{ "iiii", 1 },
|
|
|
|
{ "j", 1 },
|
|
|
|
{ "kk", 1 },
|
|
|
|
{ "kl", 1 },
|
|
|
|
{ "kmm", 1 },
|
|
|
|
{ "l", 1 },
|
|
|
|
{ "m", 1 },
|
|
|
|
{ "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
|
|
|
|
{ "o", 1 },
|
|
|
|
{ "pp", 1 },
|
|
|
|
{ "qqq", 1 },
|
|
|
|
{ "r", 1 },
|
|
|
|
{ "uar", 1 },
|
|
|
|
{ "uary", 1 }
|
|
|
|
};
|
|
|
|
checkIterator(iter, data, LENGTHOF(data));
|
|
|
|
// Reset, and we should get the same result.
|
|
|
|
logln("after iter.reset()");
|
|
|
|
checkIterator(iter.reset(), data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestIteratorFromLinearMatch() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
// Go into a linear-match node.
|
|
|
|
trie.next(u_j);
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_n);
|
|
|
|
trie.next(u_u);
|
|
|
|
trie.next(u_a);
|
|
|
|
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
|
|
|
// following "janua".
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "r", 1 },
|
|
|
|
{ "ry", 1 }
|
|
|
|
};
|
|
|
|
checkIterator(iter, data, LENGTHOF(data));
|
|
|
|
// Reset, and we should get the same result.
|
|
|
|
logln("after iter.reset()");
|
|
|
|
checkIterator(iter.reset(), data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
|
|
|
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 4, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
|
|
|
|
// of each string, and no string duplicates from the truncation.
|
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "augu", -1 },
|
|
|
|
{ "jan", 1 },
|
|
|
|
{ "jan.", 1 },
|
|
|
|
{ "jana", 1 },
|
|
|
|
{ "janb", -1 },
|
|
|
|
{ "janc", 1 },
|
|
|
|
{ "jand", -1 },
|
|
|
|
{ "jane", -1 },
|
|
|
|
{ "janf", 1 },
|
|
|
|
{ "jang", -1 },
|
|
|
|
{ "janh", 1 },
|
|
|
|
{ "jani", -1 },
|
|
|
|
{ "janj", 1 },
|
|
|
|
{ "jank", -1 },
|
|
|
|
{ "janl", 1 },
|
|
|
|
{ "janm", 1 },
|
|
|
|
{ "jann", -1 },
|
|
|
|
{ "jano", 1 },
|
|
|
|
{ "janp", -1 },
|
|
|
|
{ "janq", -1 },
|
|
|
|
{ "janr", 1 },
|
|
|
|
{ "janu", -1 },
|
|
|
|
{ "july", 7 },
|
|
|
|
{ "jun", 6 },
|
|
|
|
{ "jun.", 6 },
|
|
|
|
{ "june", 6 }
|
|
|
|
};
|
|
|
|
checkIterator(iter, data, LENGTHOF(data));
|
|
|
|
// Reset, and we should get the same result.
|
|
|
|
logln("after iter.reset()");
|
|
|
|
checkIterator(iter.reset(), data, LENGTHOF(data));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "abcdef", 10 },
|
|
|
|
{ "abcdepq", 200 },
|
|
|
|
{ "abcdeyz", 3000 }
|
|
|
|
};
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
// Go into a linear-match node.
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_b);
|
|
|
|
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
|
|
|
// Truncate within the linear-match node.
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trie, 2, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
static const StringAndValue expected[]={
|
|
|
|
{ "cd", -1 }
|
|
|
|
};
|
|
|
|
checkIterator(iter, expected, LENGTHOF(expected));
|
|
|
|
// Reset, and we should get the same result.
|
|
|
|
logln("after iter.reset()");
|
|
|
|
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
2010-12-31 18:21:36 +00:00
|
|
|
static const StringAndValue data[]={
|
|
|
|
{ "abcdef", 10 },
|
|
|
|
{ "abcdepq", 200 },
|
|
|
|
{ "abcdeyz", 3000 }
|
|
|
|
};
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
// Go into a linear-match node.
|
|
|
|
trie.next(u_a);
|
|
|
|
trie.next(u_b);
|
|
|
|
trie.next(u_c);
|
|
|
|
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
|
|
|
// Truncate after the linear-match node.
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trie, 3, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
static const StringAndValue expected[]={
|
|
|
|
{ "def", 10 },
|
|
|
|
{ "dep", -1 },
|
|
|
|
{ "dey", -1 }
|
|
|
|
};
|
|
|
|
checkIterator(iter, expected, LENGTHOF(expected));
|
|
|
|
// Reset, and we should get the same result.
|
|
|
|
logln("after iter.reset()");
|
|
|
|
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
2010-12-31 18:21:36 +00:00
|
|
|
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
2011-01-05 21:05:47 +00:00
|
|
|
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
2010-12-31 18:21:36 +00:00
|
|
|
logln("checkData(dataLength=%d, small)", (int)dataLength);
|
2011-01-05 21:05:47 +00:00
|
|
|
checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
|
|
|
|
UCharsTrieBuilder builder;
|
2010-12-31 18:21:36 +00:00
|
|
|
UnicodeString trieUChars;
|
|
|
|
if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) {
|
|
|
|
return; // buildTrie() reported an error
|
|
|
|
}
|
|
|
|
checkFirst(trieUChars, data, dataLength);
|
|
|
|
checkNext(trieUChars, data, dataLength);
|
|
|
|
checkNextWithState(trieUChars, data, dataLength);
|
|
|
|
checkNextString(trieUChars, data, dataLength);
|
|
|
|
checkIterator(trieUChars, data, dataLength);
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
|
|
|
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) {
|
2010-12-31 18:21:36 +00:00
|
|
|
IcuTestErrorCode errorCode(*this, "buildTrie()");
|
|
|
|
// Add the items to the trie builder in an interesting (not trivial, not random) order.
|
|
|
|
int32_t index, step;
|
|
|
|
if(dataLength&1) {
|
|
|
|
// Odd number of items.
|
|
|
|
index=dataLength/2;
|
|
|
|
step=2;
|
|
|
|
} else if((dataLength%3)!=0) {
|
|
|
|
// Not a multiple of 3.
|
|
|
|
index=dataLength/5;
|
|
|
|
step=3;
|
|
|
|
} else {
|
|
|
|
index=dataLength-1;
|
|
|
|
step=-1;
|
|
|
|
}
|
|
|
|
builder.clear();
|
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
builder.add(UnicodeString(data[index].s, -1, US_INV).unescape(),
|
|
|
|
data[index].value, errorCode);
|
|
|
|
index=(index+step)%dataLength;
|
|
|
|
}
|
|
|
|
builder.build(buildOption, result, errorCode);
|
|
|
|
if(!errorCode.logIfFailureAndReset("add()/build()")) {
|
|
|
|
builder.add("zzz", 999, errorCode);
|
|
|
|
if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
|
|
|
|
errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
logln("serialized trie size: %ld UChars\n", (long)result.length());
|
|
|
|
return errorCode.isSuccess();
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars,
|
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
if(*data[i].s==0) {
|
|
|
|
continue; // skip empty string
|
|
|
|
}
|
|
|
|
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
|
|
|
UChar32 c=expectedString[0];
|
|
|
|
UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
|
2011-01-05 21:05:47 +00:00
|
|
|
UStringTrieResult firstResult=trie.first(c);
|
|
|
|
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
|
|
|
UStringTrieResult nextResult=trie.next(nextCp);
|
2010-12-31 18:21:36 +00:00
|
|
|
if(firstResult!=trie.reset().next(c) ||
|
|
|
|
firstResult!=trie.current() ||
|
2011-01-05 21:05:47 +00:00
|
|
|
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
2010-12-31 18:21:36 +00:00
|
|
|
nextResult!=trie.next(nextCp)
|
|
|
|
) {
|
|
|
|
errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
|
|
|
|
c, data[i].s);
|
|
|
|
}
|
|
|
|
c=expectedString.char32At(0);
|
|
|
|
int32_t cLength=U16_LENGTH(c);
|
|
|
|
nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
|
|
|
|
firstResult=trie.firstForCodePoint(c);
|
2011-01-05 21:05:47 +00:00
|
|
|
firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
2010-12-31 18:21:36 +00:00
|
|
|
nextResult=trie.nextForCodePoint(nextCp);
|
|
|
|
if(firstResult!=trie.reset().nextForCodePoint(c) ||
|
|
|
|
firstResult!=trie.current() ||
|
2011-01-05 21:05:47 +00:00
|
|
|
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
2010-12-31 18:21:36 +00:00
|
|
|
nextResult!=trie.nextForCodePoint(nextCp)
|
|
|
|
) {
|
|
|
|
errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
|
|
|
|
c, data[i].s);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkNext(const UnicodeString &trieUChars,
|
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
|
|
|
UCharsTrie::State state;
|
2010-12-31 18:21:36 +00:00
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
|
|
|
int32_t stringLength= (i&1) ? -1 : expectedString.length();
|
2011-01-05 21:05:47 +00:00
|
|
|
UStringTrieResult result;
|
|
|
|
if( !USTRINGTRIE_HAS_VALUE(
|
2010-12-31 18:21:36 +00:00
|
|
|
result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
|
|
|
|
result!=trie.current()
|
|
|
|
) {
|
|
|
|
errln("trie does not seem to contain %s", data[i].s);
|
|
|
|
} else if(trie.getValue()!=data[i].value) {
|
|
|
|
errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
|
|
|
|
data[i].s,
|
|
|
|
(long)trie.getValue(), (long)trie.getValue(),
|
|
|
|
(long)data[i].value, (long)data[i].value);
|
|
|
|
} else if(result!=trie.current() || trie.getValue()!=data[i].value) {
|
|
|
|
errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
|
|
|
|
}
|
|
|
|
trie.reset();
|
|
|
|
stringLength=expectedString.length();
|
|
|
|
result=trie.current();
|
|
|
|
for(int32_t j=0; j<stringLength; ++j) {
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
|
|
|
|
break;
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
|
2010-12-31 18:21:36 +00:00
|
|
|
trie.getValue();
|
2011-01-05 21:05:47 +00:00
|
|
|
if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
|
|
|
|
errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
|
2010-12-31 18:21:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result=trie.next(expectedString[j]);
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!USTRINGTRIE_MATCHES(result)) {
|
|
|
|
errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
|
2010-12-31 18:21:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(result!=trie.current()) {
|
|
|
|
errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!USTRINGTRIE_HAS_VALUE(result)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
errln("trie.next()!=hasValue at the end of %s", data[i].s);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
trie.getValue();
|
|
|
|
if(result!=trie.current()) {
|
|
|
|
errln("trie.current() != current()+getValue()+current() after end of %s",
|
|
|
|
data[i].s);
|
|
|
|
}
|
|
|
|
// Compare the final current() with whether next() can actually continue.
|
|
|
|
trie.saveState(state);
|
|
|
|
UBool nextContinues=FALSE;
|
|
|
|
for(int32_t c=0x20; c<0xe000; ++c) {
|
|
|
|
if(c==0x80) {
|
|
|
|
c=0xd800; // Check for ASCII and surrogates but not all of the BMP.
|
|
|
|
}
|
|
|
|
if(trie.resetToState(state).next(c)) {
|
|
|
|
nextContinues=TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-01-05 21:05:47 +00:00
|
|
|
if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
|
|
|
|
errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
|
|
|
|
"(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
trie.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
|
|
|
UCharsTrie::State noState, state;
|
2010-12-31 18:21:36 +00:00
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
if((i&1)==0) {
|
|
|
|
// This should have no effect.
|
|
|
|
trie.resetToState(noState);
|
|
|
|
}
|
|
|
|
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
|
|
|
int32_t stringLength=expectedString.length();
|
|
|
|
int32_t partialLength=stringLength/3;
|
|
|
|
for(int32_t j=0; j<partialLength; ++j) {
|
2011-01-05 21:05:47 +00:00
|
|
|
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
|
|
|
|
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trie.saveState(state);
|
2011-01-05 21:05:47 +00:00
|
|
|
UStringTrieResult resultAtState=trie.current();
|
|
|
|
UStringTrieResult result;
|
2010-12-31 18:21:36 +00:00
|
|
|
int32_t valueAtState=-99;
|
2011-01-05 21:05:47 +00:00
|
|
|
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
|
2010-12-31 18:21:36 +00:00
|
|
|
valueAtState=trie.getValue();
|
|
|
|
}
|
|
|
|
result=trie.next(0); // mismatch
|
2011-01-05 21:05:47 +00:00
|
|
|
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
|
2010-12-31 18:21:36 +00:00
|
|
|
errln("trie.next(0) matched after part of %s", data[i].s);
|
|
|
|
}
|
|
|
|
if( resultAtState!=trie.resetToState(state).current() ||
|
2011-01-05 21:05:47 +00:00
|
|
|
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
2010-12-31 18:21:36 +00:00
|
|
|
) {
|
|
|
|
errln("trie.next(part of %s) changes current()/getValue() after "
|
|
|
|
"saveState/next(0)/resetToState",
|
|
|
|
data[i].s);
|
2011-01-05 21:05:47 +00:00
|
|
|
} else if(!USTRINGTRIE_HAS_VALUE(
|
2010-12-31 18:21:36 +00:00
|
|
|
result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
|
|
|
|
stringLength-partialLength)) ||
|
|
|
|
result!=trie.current()) {
|
|
|
|
errln("trie.next(rest of %s) does not seem to contain %s after "
|
|
|
|
"saveState/next(0)/resetToState",
|
|
|
|
data[i].s);
|
2011-01-05 21:05:47 +00:00
|
|
|
} else if(!USTRINGTRIE_HAS_VALUE(
|
2010-12-31 18:21:36 +00:00
|
|
|
result=trie.resetToState(state).
|
|
|
|
next(expectedString.getTerminatedBuffer()+partialLength,
|
|
|
|
stringLength-partialLength)) ||
|
|
|
|
result!=trie.current()) {
|
|
|
|
errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
|
|
|
|
data[i].s);
|
|
|
|
} else if(trie.getValue()!=data[i].value) {
|
|
|
|
errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
|
|
|
|
data[i].s,
|
|
|
|
(long)trie.getValue(), (long)trie.getValue(),
|
|
|
|
(long)data[i].value, (long)data[i].value);
|
|
|
|
}
|
|
|
|
trie.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// next(string) is also tested in other functions,
|
|
|
|
// but here we try to go partway through the string, and then beyond it.
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
|
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
|
|
|
UCharsTrie trie(trieUChars.getBuffer());
|
2010-12-31 18:21:36 +00:00
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
|
|
|
int32_t stringLength=expectedString.length();
|
|
|
|
if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Test that we stop properly at the end of the string.
|
|
|
|
if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
|
|
|
|
stringLength+1-stringLength/2)) {
|
2011-01-05 21:05:47 +00:00
|
|
|
errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
2010-12-31 18:21:36 +00:00
|
|
|
}
|
|
|
|
trie.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-05 21:05:47 +00:00
|
|
|
void UCharsTrieTest::checkIterator(const UnicodeString &trieUChars,
|
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
2010-12-31 18:21:36 +00:00
|
|
|
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
2011-01-06 18:40:26 +00:00
|
|
|
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
|
2010-12-31 18:21:36 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
checkIterator(iter, data, dataLength);
|
|
|
|
}
|
|
|
|
|
2011-01-06 18:40:26 +00:00
|
|
|
void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
|
2011-01-05 21:05:47 +00:00
|
|
|
const StringAndValue data[], int32_t dataLength) {
|
2010-12-31 18:21:36 +00:00
|
|
|
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
|
|
|
for(int32_t i=0; i<dataLength; ++i) {
|
|
|
|
if(!iter.hasNext()) {
|
|
|
|
errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
UBool hasNext=iter.next(errorCode);
|
|
|
|
if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(!hasNext) {
|
|
|
|
errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
|
|
|
if(iter.getString()!=expectedString) {
|
|
|
|
char buffer[1000];
|
|
|
|
UnicodeString invString(prettify(iter.getString()));
|
|
|
|
invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV);
|
|
|
|
errln("trie iterator next().getString()=%s but expected %s for item %d",
|
|
|
|
buffer, data[i].s, (int)i);
|
|
|
|
}
|
|
|
|
if(iter.getValue()!=data[i].value) {
|
|
|
|
errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
|
|
|
|
(long)iter.getValue(), (long)iter.getValue(),
|
|
|
|
(long)data[i].value, (long)data[i].value,
|
|
|
|
(int)i, data[i].s);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(iter.hasNext()) {
|
|
|
|
errln("trie iterator hasNext()=TRUE after all items");
|
|
|
|
}
|
|
|
|
UBool hasNext=iter.next(errorCode);
|
|
|
|
errorCode.logIfFailureAndReset("trie iterator next() after all items");
|
|
|
|
if(hasNext) {
|
|
|
|
errln("trie iterator next()=TRUE after all items");
|
|
|
|
}
|
|
|
|
}
|