U+%04lx): 0x%lx instead of 0x%lx\n",
+ testName, (unsigned long)bytes, (long)c, (long)value, (long)values[i]);
+ }
+ if(i8!=(p-s)) {
+ log_err("error: wrong end index from UTRIE2_U8_NEXT(%s)(%lx->U+%04lx): %ld != %ld\n",
+ testName, (unsigned long)bytes, (long)c, (long)(p-s), (long)i8);
+ continue;
+ }
+ ++i;
+ }
+
+ /* try backward */
+ p=limit;
+ i=countValues;
+ while(sU+%04lx): 0x%lx instead of 0x%lx\n",
+ testName, (unsigned long)bytes, (long)c, (long)value, (long)values[i]);
+ }
+ bytes=0;
+ if(value!=values[i] || i8!=(p-s)) {
+ int32_t k=i8;
+ while(kU+%04lx): %ld != %ld\n",
+ testName, (unsigned long)bytes, (long)c, (long)(p-s), (long)i8);
+ continue;
+ }
+ }
+}
+
+static void
+testFrozenTrie(const char *testName,
+ UTrie2 *trie, UTrie2ValueBits valueBits,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ UErrorCode errorCode;
+ uint32_t value, value2;
+
+ if(!utrie2_isFrozen(trie)) {
+ log_err("error: utrie2_isFrozen(frozen %s) returned FALSE (not frozen)\n",
+ testName);
+ return;
+ }
+
+ testTrieGetters(testName, trie, valueBits, checkRanges, countCheckRanges);
+ testTrieEnum(testName, trie, checkRanges, countCheckRanges);
+ testTrieUTF16(testName, trie, valueBits, checkRanges, countCheckRanges);
+ testTrieUTF8(testName, trie, valueBits, checkRanges, countCheckRanges);
+
+ errorCode=U_ZERO_ERROR;
+ value=utrie2_get32(trie, 1);
+ utrie2_set32(trie, 1, 234, &errorCode);
+ value2=utrie2_get32(trie, 1);
+ if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) {
+ log_err("error: utrie2_set32(frozen %s) failed: it set %s != U_NO_WRITE_PERMISSION\n",
+ testName, u_errorName(errorCode));
+ return;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ utrie2_setRange32(trie, 1, 5, 234, TRUE, &errorCode);
+ value2=utrie2_get32(trie, 1);
+ if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) {
+ log_err("error: utrie2_setRange32(frozen %s) failed: it set %s != U_NO_WRITE_PERMISSION\n",
+ testName, u_errorName(errorCode));
+ return;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ value=utrie2_get32FromLeadSurrogateCodeUnit(trie, 0xd801);
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd801, 234, &errorCode);
+ value2=utrie2_get32FromLeadSurrogateCodeUnit(trie, 0xd801);
+ if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) {
+ log_err("error: utrie2_set32ForLeadSurrogateCodeUnit(frozen %s) failed: "
+ "it set %s != U_NO_WRITE_PERMISSION\n",
+ testName, u_errorName(errorCode));
+ return;
+ }
+}
+
+static void
+testNewTrie(const char *testName, const UTrie2 *trie,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ /* The valueBits are ignored for an unfrozen trie. */
+ testTrieGetters(testName, trie, UTRIE2_COUNT_VALUE_BITS, checkRanges, countCheckRanges);
+ testTrieEnum(testName, trie, checkRanges, countCheckRanges);
+}
+
+static void
+testTrieSerialize(const char *testName,
+ UTrie2 *trie, UTrie2ValueBits valueBits,
+ UBool withSwap,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ uint32_t storage[10000];
+ int32_t length1, length2, length3;
+ UTrie2ValueBits otherValueBits;
+ UErrorCode errorCode;
+
+ /* clone the trie so that the caller can reuse the original */
+ errorCode=U_ZERO_ERROR;
+ trie=utrie2_clone(trie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_clone(unfrozen %s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ return;
+ }
+
+ /*
+ * This is not a loop, but simply a block that we can exit with "break"
+ * when something goes wrong.
+ */
+ do {
+ errorCode=U_ZERO_ERROR;
+ utrie2_serialize(trie, storage, sizeof(storage), &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("error: utrie2_serialize(unfrozen %s) set %s != U_ILLEGAL_ARGUMENT_ERROR\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ utrie2_freeze(trie, valueBits, &errorCode);
+ if(U_FAILURE(errorCode) || !utrie2_isFrozen(trie)) {
+ log_err("error: utrie2_freeze(%s) failed: %s isFrozen: %d\n",
+ testName, u_errorName(errorCode), utrie2_isFrozen(trie));
+ break;
+ }
+ otherValueBits= valueBits==UTRIE2_16_VALUE_BITS ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS;
+ utrie2_freeze(trie, otherValueBits, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("error: utrie2_freeze(already-frozen with other valueBits %s) "
+ "set %s != U_ILLEGAL_ARGUMENT_ERROR\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ if(withSwap) {
+ /* clone a frozen trie */
+ UTrie2 *clone=utrie2_clone(trie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: cloning a frozen UTrie2 failed (%s) - %s\n",
+ testName, u_errorName(errorCode));
+ errorCode=U_ZERO_ERROR; /* continue with the original */
+ } else {
+ utrie2_close(trie);
+ trie=clone;
+ }
+ }
+ length1=utrie2_serialize(trie, NULL, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
+ log_err("error: utrie2_serialize(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ length2=utrie2_serialize(trie, storage, sizeof(storage), &errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ log_err("error: utrie2_serialize(%s) needs more memory\n", testName);
+ break;
+ }
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
+ break;
+ }
+ if(length1!=length2) {
+ log_err("error: trie serialization (%s) lengths different: "
+ "preflight vs. serialize\n", testName);
+ break;
+ }
+
+ testFrozenTrie(testName, trie, valueBits, checkRanges, countCheckRanges);
+ utrie2_close(trie);
+ trie=NULL;
+
+ if(withSwap) {
+ uint32_t swapped[10000];
+ int32_t swappedLength;
+
+ UDataSwapper *ds;
+
+ /* swap to opposite-endian */
+ uprv_memset(swapped, 0x55, length2);
+ ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+ !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+ swappedLength=utrie2_swap(ds, storage, -1, NULL, &errorCode);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: utrie2_swap(%s to OE preflighting) failed (%s) "
+ "or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ udata_closeSwapper(ds);
+ break;
+ }
+ swappedLength=utrie2_swap(ds, storage, length2, swapped, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: utrie2_swap(%s to OE) failed (%s) or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+
+ /* swap back to platform-endian */
+ uprv_memset(storage, 0xaa, length2);
+ ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+ swappedLength=utrie2_swap(ds, swapped, -1, NULL, &errorCode);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: utrie2_swap(%s to PE preflighting) failed (%s) "
+ "or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ udata_closeSwapper(ds);
+ break;
+ }
+ swappedLength=utrie2_swap(ds, swapped, length2, storage, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode) || swappedLength!=length2) {
+ log_err("error: utrie2_swap(%s to PE) failed (%s) or before/after lengths different\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ }
+
+ trie=utrie2_openFromSerialized(valueBits, storage, length2, &length3, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_openFromSerialized(%s) failed, %s\n", testName, u_errorName(errorCode));
+ break;
+ }
+ if((valueBits==UTRIE2_16_VALUE_BITS)!=(trie->data32==NULL)) {
+ log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
+ break;
+ }
+ if(length2!=length3) {
+ log_err("error: trie serialization (%s) lengths different: "
+ "serialize vs. unserialize\n", testName);
+ break;
+ }
+ /* overwrite the storage that is not supposed to be needed */
+ uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3));
+
+ utrie2_freeze(trie, valueBits, &errorCode);
+ if(U_FAILURE(errorCode) || !utrie2_isFrozen(trie)) {
+ log_err("error: utrie2_freeze(unserialized %s) failed: %s isFrozen: %d\n",
+ testName, u_errorName(errorCode), utrie2_isFrozen(trie));
+ break;
+ }
+ utrie2_freeze(trie, otherValueBits, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("error: utrie2_freeze(unserialized with other valueBits %s) "
+ "set %s != U_ILLEGAL_ARGUMENT_ERROR\n",
+ testName, u_errorName(errorCode));
+ break;
+ }
+ errorCode=U_ZERO_ERROR;
+ if(withSwap) {
+ /* clone an unserialized trie */
+ UTrie2 *clone=utrie2_clone(trie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_clone(unserialized %s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ errorCode=U_ZERO_ERROR;
+ /* no need to break: just test the original trie */
+ } else {
+ utrie2_close(trie);
+ trie=clone;
+ uprv_memset(storage, 0, sizeof(storage));
+ }
+ }
+ testFrozenTrie(testName, trie, valueBits, checkRanges, countCheckRanges);
+ {
+ /* clone-as-thawed an unserialized trie */
+ UTrie2 *clone=utrie2_cloneAsThawed(trie, &errorCode);
+ if(U_FAILURE(errorCode) || utrie2_isFrozen(clone)) {
+ log_err("error: utrie2_cloneAsThawed(unserialized %s) failed - "
+ "%s (isFrozen: %d)\n",
+ testName, u_errorName(errorCode), clone!=NULL && utrie2_isFrozen(trie));
+ break;
+ } else {
+ utrie2_close(trie);
+ trie=clone;
+ }
+ }
+ {
+ uint32_t value, value2;
+
+ value=utrie2_get32(trie, 0xa1);
+ utrie2_set32(trie, 0xa1, 789, &errorCode);
+ value2=utrie2_get32(trie, 0xa1);
+ utrie2_set32(trie, 0xa1, value, &errorCode);
+ if(U_FAILURE(errorCode) || value2!=789) {
+ log_err("error: modifying a cloneAsThawed UTrie2 (%s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ }
+ }
+ testNewTrie(testName, trie, checkRanges, countCheckRanges);
+ } while(0);
+
+ utrie2_close(trie);
+}
+
+static UTrie2 *
+testTrieSerializeAllValueBits(const char *testName,
+ UTrie2 *trie, UBool withClone,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ char name[40];
+
+ /* verify that all the expected values are in the unfrozen trie */
+ testNewTrie(testName, trie, checkRanges, countCheckRanges);
+
+ /*
+ * Test with both valueBits serializations,
+ * and that utrie2_serialize() can be called multiple times.
+ */
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".16");
+ testTrieSerialize(name, trie,
+ UTRIE2_16_VALUE_BITS, withClone,
+ checkRanges, countCheckRanges);
+
+ if(withClone) {
+ /*
+ * try cloning after the first serialization;
+ * clone-as-thawed just to sometimes try it on an unfrozen trie
+ */
+ UErrorCode errorCode=U_ZERO_ERROR;
+ UTrie2 *clone=utrie2_cloneAsThawed(trie, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_cloneAsThawed(%s) after serialization failed - %s\n",
+ testName, u_errorName(errorCode));
+ } else {
+ utrie2_close(trie);
+ trie=clone;
+
+ testNewTrie(testName, trie, checkRanges, countCheckRanges);
+ }
+ }
+
+ uprv_strcpy(name, testName);
+ uprv_strcat(name, ".32");
+ testTrieSerialize(name, trie,
+ UTRIE2_32_VALUE_BITS, withClone,
+ checkRanges, countCheckRanges);
+
+ return trie; /* could be the clone */
+}
+
+static UTrie2 *
+makeTrieWithRanges(const char *testName, UBool withClone,
+ const SetRange setRanges[], int32_t countSetRanges,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ UTrie2 *trie;
+ uint32_t initialValue, errorValue;
+ uint32_t value;
+ UChar32 start, limit;
+ int32_t i;
+ UErrorCode errorCode;
+ UBool overwrite;
+
+ log_verbose("\ntesting Trie '%s'\n", testName);
+ errorCode=U_ZERO_ERROR;
+ getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
+ trie=utrie2_open(initialValue, errorValue, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_open(%s) failed: %s\n", testName, u_errorName(errorCode));
+ return NULL;
+ }
+
+ /* set values from setRanges[] */
+ for(i=0; i>UTRIE2_SHIFT_2)/2; ++i) {
+ utrie2_setRange32(trie, 0x740, 0x840-1, 1, TRUE, &errorCode);
+ utrie2_setRange32(trie, 0x780, 0x880-1, 1, TRUE, &errorCode);
+ utrie2_setRange32(trie, 0x740, 0x840-1, 2, TRUE, &errorCode);
+ utrie2_setRange32(trie, 0x780, 0x880-1, 3, TRUE, &errorCode);
+ }
+ /* make blocks that will be free during compaction */
+ utrie2_setRange32(trie, 0x1000, 0x3000-1, 2, TRUE, &errorCode);
+ utrie2_setRange32(trie, 0x2000, 0x4000-1, 3, TRUE, &errorCode);
+ utrie2_setRange32(trie, 0x1000, 0x4000-1, 1, TRUE, &errorCode);
+ /* set some values for lead surrogate code units */
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd800, 90, &errorCode);
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd999, 94, &errorCode);
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xdbff, 99, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: setting lots of ranges into a trie (%s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ utrie2_close(trie);
+ return;
+ }
+
+ trie=testTrieSerializeAllValueBits(testName, trie, FALSE,
+ checkRanges, LENGTHOF(checkRanges));
+ utrie2_close(trie);
+}
+
+static void
+GrowDataArrayTest(void) {
+ static const CheckRange
+ checkRanges[]={
+ { 0, 1 },
+ { 0x720, 2 },
+ { 0x7a0, 3 },
+ { 0x8a0, 4 },
+ { 0x110000, 5 }
+ };
+ static const char *const testName="grow-data";
+
+ UTrie2 *trie;
+ int32_t i;
+ UErrorCode errorCode;
+
+ errorCode=U_ZERO_ERROR;
+ trie=utrie2_open(1, 0xbad, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie2_open(%s) failed: %s\n", testName, u_errorName(errorCode));
+ return;
+ }
+
+ /*
+ * Use utrie2_set32() not utrie2_setRange32() to write non-initialValue-data.
+ * Should grow/reallocate the data array to a sufficient length.
+ */
+ for(i=0; i<0x1000; ++i) {
+ utrie2_set32(trie, i, 2, &errorCode);
+ }
+ for(i=0x720; i<0x1100; ++i) { /* some overlap */
+ utrie2_set32(trie, i, 3, &errorCode);
+ }
+ for(i=0x7a0; i<0x900; ++i) {
+ utrie2_set32(trie, i, 4, &errorCode);
+ }
+ for(i=0x8a0; i<0x110000; ++i) {
+ utrie2_set32(trie, i, 5, &errorCode);
+ }
+ for(i=0xd800; i<0xdc00; ++i) {
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, i, 1, &errorCode);
+ }
+ /* set some values for lead surrogate code units */
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd800, 90, &errorCode);
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd999, 94, &errorCode);
+ utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xdbff, 99, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: setting lots of values into a trie (%s) failed - %s\n",
+ testName, u_errorName(errorCode));
+ utrie2_close(trie);
+ return;
+ }
+
+ trie=testTrieSerializeAllValueBits(testName, trie, FALSE,
+ checkRanges, LENGTHOF(checkRanges));
+ utrie2_close(trie);
+}
+
+/* versions 1 and 2 --------------------------------------------------------- */
+
+static void
+GetVersionTest(void) {
+ uint32_t data[4];
+ if( /* version 1 */
+ (data[0]=0x54726965, 1!=utrie2_getVersion(data, sizeof(data), FALSE)) ||
+ (data[0]=0x54726965, 1!=utrie2_getVersion(data, sizeof(data), TRUE)) ||
+ (data[0]=0x65697254, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) ||
+ (data[0]=0x65697254, 1!=utrie2_getVersion(data, sizeof(data), TRUE)) ||
+ /* version 2 */
+ (data[0]=0x54726932, 2!=utrie2_getVersion(data, sizeof(data), FALSE)) ||
+ (data[0]=0x54726932, 2!=utrie2_getVersion(data, sizeof(data), TRUE)) ||
+ (data[0]=0x32697254, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) ||
+ (data[0]=0x32697254, 2!=utrie2_getVersion(data, sizeof(data), TRUE)) ||
+ /* illegal arguments */
+ (data[0]=0x54726932, 0!=utrie2_getVersion(NULL, sizeof(data), FALSE)) ||
+ (data[0]=0x54726932, 0!=utrie2_getVersion(data, 3, FALSE)) ||
+ (data[0]=0x54726932, 0!=utrie2_getVersion((char *)data+1, sizeof(data), FALSE)) ||
+ /* unknown signature values */
+ (data[0]=0x11223344, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) ||
+ (data[0]=0x54726933, 0!=utrie2_getVersion(data, sizeof(data), FALSE))
+ ) {
+ log_err("error: utrie2_getVersion() is not working as expected\n");
+ }
+}
+
+static UNewTrie *
+makeNewTrie1WithRanges(const char *testName,
+ const SetRange setRanges[], int32_t countSetRanges,
+ const CheckRange checkRanges[], int32_t countCheckRanges) {
+ UNewTrie *newTrie;
+ uint32_t initialValue, errorValue;
+ uint32_t value;
+ UChar32 start, limit;
+ int32_t i;
+ UErrorCode errorCode;
+ UBool overwrite, ok;
+
+ log_verbose("\ntesting Trie '%s'\n", testName);
+ errorCode=U_ZERO_ERROR;
+ getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
+ newTrie=utrie_open(NULL, NULL, 2000,
+ initialValue, initialValue,
+ FALSE);
+ if(U_FAILURE(errorCode)) {
+ log_err("error: utrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
+ return NULL;
+ }
+
+ /* set values from setRanges[] */
+ ok=TRUE;
+ for(i=0; itrie2",
+ setRanges2, LENGTHOF(setRanges2),
+ checkRanges2, LENGTHOF(checkRanges2));
+}
+
+void
+addTrie2Test(TestNode** root) {
+ addTest(root, &TrieTest, "tsutil/trie2test/TrieTest");
+ addTest(root, &EnumNewTrieForLeadSurrogateTest,
+ "tsutil/trie2test/EnumNewTrieForLeadSurrogateTest");
+ addTest(root, &DummyTrieTest, "tsutil/trie2test/DummyTrieTest");
+ addTest(root, &FreeBlocksTest, "tsutil/trie2test/FreeBlocksTest");
+ addTest(root, &GrowDataArrayTest, "tsutil/trie2test/GrowDataArrayTest");
+ addTest(root, &GetVersionTest, "tsutil/trie2test/GetVersionTest");
+ addTest(root, &Trie12ConversionTest, "tsutil/trie2test/Trie12ConversionTest");
+}
diff --git a/icu4c/source/test/cintltst/ucnvseltst.c b/icu4c/source/test/cintltst/ucnvseltst.c
index 97d161f76f..2fa5ac30b2 100644
--- a/icu4c/source/test/cintltst/ucnvseltst.c
+++ b/icu4c/source/test/cintltst/ucnvseltst.c
@@ -765,15 +765,41 @@ static void TestSerializationAndUnserialization()
/* first time */
status = U_ZERO_ERROR;
sel = ucnvsel_open((const char**)encodings, testCaseIdx-prev, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_SET, &status);
+ if (U_FAILURE(status)) {
+ log_err("ucnvsel_open(test case %d) failed: %s\n", curCase, u_errorName(status));
+ uprv_free(encodings);
+ uprv_free(names);
+ return;
+ }
buffer = NULL;
ser_len = ucnvsel_serialize(sel, NULL, 0, &status);
- status = U_ZERO_ERROR;
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ log_err("ucnvsel_serialize(test case %d preflighting) failed: %s\n", curCase, u_errorName(status));
+ ucnvsel_close(sel);
+ uprv_free(encodings);
+ uprv_free(names);
+ return;
+ }
buffer = uprv_malloc(ser_len);
+ status = U_ZERO_ERROR;
ucnvsel_serialize(sel, buffer, ser_len, &status);
-
ucnvsel_close(sel);
+ if (U_FAILURE(status)) {
+ log_err("ucnvsel_serialize(test case %d) failed: %s\n", curCase, u_errorName(status));
+ uprv_free(encodings);
+ uprv_free(names);
+ uprv_free(buffer);
+ return;
+ }
sel = ucnvsel_unserialize( buffer, ser_len,&status);
+ if (U_FAILURE(status)) {
+ log_err("ucnvsel_unserialize(test case %d) failed: %s\n", curCase, u_errorName(status));
+ uprv_free(encodings);
+ uprv_free(names);
+ uprv_free(buffer);
+ return;
+ }
/* count how many bytes (Is there a portable function that is more efficient than this?) */
f1 = fopenOrError("ConverterSelectorTestUTF16.txt");
@@ -805,11 +831,21 @@ static void TestSerializationAndUnserialization()
break;
/* test, both with length, and NULL terminated */
res1 = ucnvsel_selectForString(sel, text+i, -1, &status);
+ if (U_FAILURE(status)) {
+ log_err("ucnvsel_selectForString(test case %d, string %d with NUL) failed: %s\n",
+ curCase, curTestCase, u_errorName(status));
+ continue;
+ }
/* make sure result is correct! */
verifyResultUTF16(text+i, (const char**) encodings, num_rndm_encodings, res1, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_SET);
uenum_close(res1);
res1 = ucnvsel_selectForString(sel, text+i, u_strlen(text+i), &status);
+ if (U_FAILURE(status)) {
+ log_err("ucnvsel_selectForString(test case %d, string %d with length) failed: %s\n",
+ curCase, curTestCase, u_errorName(status));
+ continue;
+ }
/* make sure result is correct! */
verifyResultUTF16(text+i, (const char**)encodings, num_rndm_encodings, res1, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_SET);
uenum_close(res1);
diff --git a/icu4c/source/test/perf/Makefile.in b/icu4c/source/test/perf/Makefile.in
index 1d255a766e..c422c5007f 100644
--- a/icu4c/source/test/perf/Makefile.in
+++ b/icu4c/source/test/perf/Makefile.in
@@ -18,7 +18,7 @@ subdir = test
## Files to remove for 'make clean'
CLEANFILES = *~
-SUBDIRS = collationperf charperf normperf ubrkperf unisetperf usetperf ustrperf utfperf
+SUBDIRS = collationperf charperf normperf ubrkperf unisetperf usetperf ustrperf utfperf utrie2perf
## List of phony targets
.PHONY : everything all all-local all-recursive install install-local \
diff --git a/icu4c/source/test/perf/perf.sln b/icu4c/source/test/perf/perf.sln
index 99043babc5..868fcf6fcd 100644
--- a/icu4c/source/test/perf/perf.sln
+++ b/icu4c/source/test/perf/perf.sln
@@ -1,5 +1,5 @@
Microsoft Visual Studio Solution File, Format Version 9.00
-# Visual Studio 2005
+# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "charperf", "charperf\charperf.vcproj", "{D850A4B6-7D94-476E-9392-E9272DA4EAAF}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "normperf", "normperf\normperf.vcproj", "{56CCC661-8D33-4F0A-B62F-C619CE843C68}"
@@ -20,6 +20,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "unisetperf", "unisetperf\un
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "strsrchperf", "strsrchperf\strsrchperf.vcproj", "{241DED26-1635-45E6-9564-7742AC8043B5}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "utrie2perf", "utrie2perf\utrie2perf.vcproj", "{B9458CB3-9B09-402A-8C4C-43B6D0EA9691}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
@@ -108,6 +110,12 @@ Global
{241DED26-1635-45E6-9564-7742AC8043B5}.Release|Win32.Build.0 = Release|Win32
{241DED26-1635-45E6-9564-7742AC8043B5}.Release|x64.ActiveCfg = Release|x64
{241DED26-1635-45E6-9564-7742AC8043B5}.Release|x64.Build.0 = Release|x64
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Debug|Win32.ActiveCfg = Debug|Win32
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Debug|Win32.Build.0 = Debug|Win32
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Debug|x64.ActiveCfg = Debug|Win32
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Release|Win32.ActiveCfg = Release|Win32
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Release|Win32.Build.0 = Release|Win32
+ {B9458CB3-9B09-402A-8C4C-43B6D0EA9691}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/icu4c/source/test/perf/utrie2perf/Makefile.in b/icu4c/source/test/perf/utrie2perf/Makefile.in
new file mode 100644
index 0000000000..e9098ebe56
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/Makefile.in
@@ -0,0 +1,79 @@
+## Makefile.in for ICU - test/perf/utrie2perf
+## Copyright (c) 2001-2008, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = test/perf/utrie2perf
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = utrie2perf
+
+CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw
+LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = utrie2perf.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+ test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+ $(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+ $(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+ $(LINK.cc) -o $@ $^ $(LIBS)
+ $(POST_BUILD_STEP)
+
+invoke:
+ ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+ifneq ($(patsubst %install,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+endif
+
diff --git a/icu4c/source/test/perf/utrie2perf/utrie2perf.bat b/icu4c/source/test/perf/utrie2perf/utrie2perf.bat
new file mode 100755
index 0000000000..fa3883c769
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/utrie2perf.bat
@@ -0,0 +1,19 @@
+rem Copyright (C) 2008, International Business Machines Corporation and others.
+rem All Rights Reserved.
+
+set PERF=c:\svn\icuproj\icu\utf8\source\test\perf\utrie2perf\x86\Release\utrie2perf
+
+for %%f in (udhr_eng.txt
+ udhr_deu.txt
+ udhr_fra.txt
+ udhr_rus.txt
+ udhr_tha.txt
+ udhr_jpn.txt
+ udhr_cmn.txt
+ udhr_jpn.html) do (
+ %PERF% CheckFCD -f \temp\udhr\%%f -v -e UTF-8 --passes 3 --iterations 30000
+rem %PERF% CheckFCDAlwaysGet -f \temp\udhr\%%f -v -e UTF-8 --passes 3 --iterations 30000
+rem %PERF% CheckFCDUTF8 -f \temp\udhr\%%f -v -e UTF-8 --passes 3 --iterations 30000
+ %PERF% ToNFC -f \temp\udhr\%%f -v -e UTF-8 --passes 3 --iterations 30000
+ %PERF% GetBiDiClass -f \temp\udhr\%%f -v -e UTF-8 --passes 3 --iterations 30000
+)
diff --git a/icu4c/source/test/perf/utrie2perf/utrie2perf.cpp b/icu4c/source/test/perf/utrie2perf/utrie2perf.cpp
new file mode 100644
index 0000000000..18c386c756
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/utrie2perf.cpp
@@ -0,0 +1,261 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2002-2008, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * file name: utrie2perf.cpp
+ * encoding: US-ASCII
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2008sep07
+ * created by: Markus W. Scherer
+ *
+ * Performance test program for UTrie2.
+ */
+
+#include
+#include
+#include "unicode/uchar.h"
+#include "unicode/unorm.h"
+#include "unicode/uperf.h"
+#include "uoptions.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+#if 0
+// Left over from when icu/branches/markus/utf8 could use both old UTrie
+// and new UTrie2, switched with #if in unorm.cpp and ubidi_props.c.
+// Comparative benchmarks were done in that branch on revision r24630
+// and earlier.
+U_CAPI void U_EXPORT2
+unorm_initUTrie2(UErrorCode *pErrorCode);
+
+U_CAPI void U_EXPORT2
+ubidi_initUTrie2(UErrorCode *pErrorCode);
+#endif
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+
+U_NAMESPACE_END
+
+// Test object.
+class UTrie2PerfTest : public UPerfTest {
+public:
+ UTrie2PerfTest(int32_t argc, const char *argv[], UErrorCode &status)
+ : UPerfTest(argc, argv, NULL, 0, "", status),
+ utf8(NULL), utf8Length(0), countInputCodePoints(0) {
+ if (U_SUCCESS(status)) {
+#if 0 // See comment at unorm_initUTrie2() forward declaration.
+ unorm_initUTrie2(&status);
+ ubidi_initUTrie2(&status);
+#endif
+ int32_t inputLength;
+ UPerfTest::getBuffer(inputLength, status);
+ if(U_SUCCESS(status) && inputLength>0) {
+ countInputCodePoints = u_countChar32(buffer, bufferLen);
+
+ // Preflight the UTF-8 length and allocate utf8.
+ u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
+ if(status==U_BUFFER_OVERFLOW_ERROR) {
+ utf8=(char *)malloc(utf8Length);
+ if(utf8!=NULL) {
+ status=U_ZERO_ERROR;
+ u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
+ } else {
+ status=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+
+ if(verbose) {
+ printf("code points:%ld len16:%ld len8:%ld "
+ "B/cp:%.3g\n",
+ (long)countInputCodePoints, (long)bufferLen, (long)utf8Length,
+ (double)utf8Length/countInputCodePoints);
+ }
+ }
+ }
+ }
+
+ virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
+
+ const UChar *getBuffer() const { return buffer; }
+ int32_t getBufferLen() const { return bufferLen; }
+
+ char *utf8;
+ int32_t utf8Length;
+
+ // Number of code points in the input text.
+ int32_t countInputCodePoints;
+};
+
+// Performance test function object.
+class Command : public UPerfFunction {
+protected:
+ Command(const UTrie2PerfTest &testcase) : testcase(testcase) {}
+
+public:
+ virtual ~Command() {}
+
+ // virtual void call(UErrorCode* pErrorCode) { ... }
+
+ virtual long getOperationsPerIteration() {
+ // Number of code points tested.
+ return testcase.countInputCodePoints;
+ }
+
+ // virtual long getEventsPerIteration();
+
+ const UTrie2PerfTest &testcase;
+ UNormalizationCheckResult qcResult;
+};
+
+class CheckFCD : public Command {
+protected:
+ CheckFCD(const UTrie2PerfTest &testcase) : Command(testcase) {}
+public:
+ static UPerfFunction* get(const UTrie2PerfTest &testcase) {
+ return new CheckFCD(testcase);
+ }
+ virtual void call(UErrorCode* pErrorCode) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ qcResult=unorm_quickCheck(testcase.getBuffer(), testcase.getBufferLen(),
+ UNORM_FCD, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "error: unorm_quickCheck(UNORM_FCD) failed: %s\n",
+ u_errorName(errorCode));
+ }
+ }
+};
+
+#if 0 // See comment at unorm_initUTrie2() forward declaration.
+
+class CheckFCDAlwaysGet : public Command {
+protected:
+ CheckFCDAlwaysGet(const UTrie2PerfTest &testcase) : Command(testcase) {}
+public:
+ static UPerfFunction* get(const UTrie2PerfTest &testcase) {
+ return new CheckFCDAlwaysGet(testcase);
+ }
+ virtual void call(UErrorCode* pErrorCode) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ qcResult=unorm_quickCheck(testcase.getBuffer(), testcase.getBufferLen(),
+ UNORM_FCD_ALWAYS_GET, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "error: unorm_quickCheck(UNORM_FCD) failed: %s\n",
+ u_errorName(errorCode));
+ }
+ }
+};
+
+U_CAPI UBool U_EXPORT2
+unorm_checkFCDUTF8(const uint8_t *src, int32_t srcLength, const UnicodeSet *nx);
+
+class CheckFCDUTF8 : public Command {
+protected:
+ CheckFCDUTF8(const UTrie2PerfTest &testcase) : Command(testcase) {}
+public:
+ static UPerfFunction* get(const UTrie2PerfTest &testcase) {
+ return new CheckFCDUTF8(testcase);
+ }
+ virtual void call(UErrorCode* pErrorCode) {
+ UBool isFCD=unorm_checkFCDUTF8((const uint8_t *)testcase.utf8, testcase.utf8Length, NULL);
+ if(isFCD>1) {
+ fprintf(stderr, "error: bogus result from unorm_checkFCDUTF8()\n");
+ }
+ }
+};
+
+#endif
+
+class ToNFC : public Command {
+protected:
+ ToNFC(const UTrie2PerfTest &testcase) : Command(testcase) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ destCapacity=unorm_normalize(testcase.getBuffer(), testcase.getBufferLen(),
+ UNORM_NFC, 0,
+ NULL, 0,
+ &errorCode);
+ dest=new UChar[destCapacity];
+ }
+ ~ToNFC() {
+ delete [] dest;
+ }
+public:
+ static UPerfFunction* get(const UTrie2PerfTest &testcase) {
+ return new ToNFC(testcase);
+ }
+ virtual void call(UErrorCode* pErrorCode) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ int32_t destLength=unorm_normalize(testcase.getBuffer(), testcase.getBufferLen(),
+ UNORM_NFC, 0,
+ dest, destCapacity,
+ &errorCode);
+ if(U_FAILURE(errorCode) || destLength!=destCapacity) {
+ fprintf(stderr, "error: unorm_normalize(UNORM_NFC) failed: %s\n",
+ u_errorName(errorCode));
+ }
+ }
+
+private:
+ UChar *dest;
+ int32_t destCapacity;
+};
+
+class GetBiDiClass : public Command {
+protected:
+ GetBiDiClass(const UTrie2PerfTest &testcase) : Command(testcase) {}
+public:
+ static UPerfFunction* get(const UTrie2PerfTest &testcase) {
+ return new GetBiDiClass(testcase);
+ }
+ virtual void call(UErrorCode* pErrorCode) {
+ const UChar *buffer=testcase.getBuffer();
+ int32_t length=testcase.getBufferLen();
+ UChar32 c;
+ int32_t i;
+ uint32_t bitSet=0;
+ for(i=0; i0 && bitSet==0) {
+ fprintf(stderr, "error: GetBiDiClass() did not collect bits\n");
+ }
+ }
+};
+
+UPerfFunction* UTrie2PerfTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
+ switch (index) {
+ case 0: name = "CheckFCD"; if (exec) return CheckFCD::get(*this); break;
+ case 1: name = "ToNFC"; if (exec) return ToNFC::get(*this); break;
+ case 2: name = "GetBiDiClass"; if (exec) return GetBiDiClass::get(*this); break;
+#if 0 // See comment at unorm_initUTrie2() forward declaration.
+ case 3: name = "CheckFCDAlwaysGet"; if (exec) return CheckFCDAlwaysGet::get(*this); break;
+ case 4: name = "CheckFCDUTF8"; if (exec) return CheckFCDUTF8::get(*this); break;
+#endif
+ default: name = ""; break;
+ }
+ return NULL;
+}
+
+int main(int argc, const char *argv[]) {
+ UErrorCode status = U_ZERO_ERROR;
+ UTrie2PerfTest test(argc, argv, status);
+
+ if (U_FAILURE(status)){
+ printf("The error is %s\n", u_errorName(status));
+ test.usage();
+ return status;
+ }
+
+ if (test.run() == FALSE){
+ fprintf(stderr, "FAILED: Tests could not be run please check the "
+ "arguments.\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/icu4c/source/test/perf/utrie2perf/utrie2perf.pl b/icu4c/source/test/perf/utrie2perf/utrie2perf.pl
new file mode 100755
index 0000000000..5c70742384
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/utrie2perf.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+# ********************************************************************
+# * COPYRIGHT:
+# * Copyright (c) 2005-2008, International Business Machines Corporation and
+# * others. All Rights Reserved.
+# ********************************************************************
+
+#use strict;
+
+require "../perldriver/Common.pl";
+
+use lib '../perldriver';
+
+use PerfFramework;
+
+my $options = {
+ "title"=>"UTF performance: ICU (".$ICUPreviousVersion." and ".$ICULatestVersion.")",
+ "headers"=>"ICU".$ICUPreviousVersion." ICU".$ICULatestVersion,
+ "operationIs"=>"gb18030 encoding string",
+ "passes"=>"1",
+ "time"=>"2",
+ #"outputType"=>"HTML",
+ "dataDir"=>$ConversionDataPath,
+ "outputDir"=>"../results"
+ };
+
+# programs
+# tests will be done for all the programs. Results will be stored and connected
+my $p1;
+my $p2;
+
+if ($OnWindows) {
+ $p1 = $ICUPathPrevious."/utfperf/$WindowsPlatform/Release/utfperf.exe -e gb18030"; # Previous
+ $p2 = $ICUPathLatest."/utfperf/$WindowsPlatform/Release/utfperf.exe -e gb18030"; # Latest
+} else {
+ $p1 = $ICUPathPrevious."/utfperf/utfperf -e gb18030"; # Previous
+ $p2 = $ICUPathLatest."/utfperf/utfperf -e gb18030"; # Latest
+}
+
+my $tests = {
+ "Roundtrip", ["$p1 Roundtrip", "$p2 Roundtrip"],
+ "FromUnicode", ["$p1 FromUnicode", "$p2 FromUnicode"],
+ "FromUTF8", ["$p1 FromUTF8", "$p2 FromUTF8"],
+ #"UTF-8", ["$p UTF_8"],
+ #"UTF-8 small buffer", ["$p UTF_8_SB"],
+ #"SCSU", ["$p SCSU"],
+ #"SCSU small buffer", ["$p SCSU_SB"],
+ #"BOCU_1", ["$p BOCU_1"],
+ #"BOCU_1 small buffer", ["$p BOCU_1_SB"],
+ };
+
+my $dataFiles = {
+ "",
+ [
+ "xuzhimo.txt"
+ ]
+ };
+
+runTests($options, $tests, $dataFiles);
diff --git a/icu4c/source/test/perf/utrie2perf/utrie2perf.sh b/icu4c/source/test/perf/utrie2perf/utrie2perf.sh
new file mode 100755
index 0000000000..92691a036a
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/utrie2perf.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# Copyright (C) 2008, International Business Machines Corporation and others.
+# All Rights Reserved.
+
+# export LD_LIBRARY_PATH=/home/mscherer/svn.icu/utf8-dev/lib:/home/mscherer/svn.icu/utf8-dev/tools/ctestfw
+
+# Echo shell script commands.
+set -ex
+
+PERF=~/svn.icu/utf8-dev/test/perf/utrie2perf/utrie2perf
+
+for file in udhr_eng.txt \
+ udhr_deu.txt \
+ udhr_fra.txt \
+ udhr_rus.txt \
+ udhr_tha.txt \
+ udhr_jpn.txt \
+ udhr_cmn.txt \
+ udhr_jpn.html; do
+ $PERF CheckFCD -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 30000
+# $PERF CheckFCDAlwaysGet -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 30000
+# $PERF CheckFCDUTF8 -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 30000
+ $PERF ToNFC -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 30000
+ $PERF GetBiDiClass -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 30000
+done
diff --git a/icu4c/source/test/perf/utrie2perf/utrie2perf.vcproj b/icu4c/source/test/perf/utrie2perf/utrie2perf.vcproj
new file mode 100644
index 0000000000..7d71f14842
--- /dev/null
+++ b/icu4c/source/test/perf/utrie2perf/utrie2perf.vcproj
@@ -0,0 +1,395 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/icu4c/source/tools/genbidi/genbidi.c b/icu4c/source/tools/genbidi/genbidi.c
index 4e21925fb5..c05d617631 100644
--- a/icu4c/source/tools/genbidi/genbidi.c
+++ b/icu4c/source/tools/genbidi/genbidi.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2004-2006, International Business Machines
+* Copyright (C) 2004-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -131,17 +131,16 @@ singleEnumLineFn(void *context,
UErrorCode *pErrorCode) {
const SingleEnum *sen;
char *s;
- uint32_t start, limit, uv;
+ uint32_t start, end, uv;
int32_t value;
sen=(const SingleEnum *)context;
- u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
+ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
exit(*pErrorCode);
}
- ++limit;
/* parse property alias */
s=trimTerminateField(fields[1][0], fields[1][1]);
@@ -170,7 +169,7 @@ singleEnumLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
- if(!upvec_setValue(pv, start, limit, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
+ if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
sen->propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@@ -232,17 +231,16 @@ binariesLineFn(void *context,
UErrorCode *pErrorCode) {
const Binaries *bin;
char *s;
- uint32_t start, limit;
+ uint32_t start, end;
int32_t i;
bin=(const Binaries *)context;
- u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
+ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
exit(*pErrorCode);
}
- ++limit;
/* parse binary property name */
s=(char *)u_skipWhitespace(fields[1][0]);
@@ -262,7 +260,7 @@ binariesLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
- if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
+ if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@@ -524,7 +522,7 @@ unicodeDataLineFn(void *context,
/* get Mirrored flag, field 9 */
if(*fields[9][0]=='Y') {
- if(!upvec_setValue(pv, c, c+1, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) {
+ if(!upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) {
fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
(long)c, u_errorName(errorCode));
exit(errorCode);
@@ -578,7 +576,7 @@ parseDB(const char *filename, UErrorCode *pErrorCode) {
for(i=0; iinitialValue, &errorCode);
+ }
+ utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "genbidi error: deleting lead surrogate code unit values failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
}
f=usrc_create(dataDir, "ubidi_props_data.c");
@@ -400,9 +431,9 @@ generateData(const char *dataDir, UBool csource) {
"static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={",
indexes, 32, UBIDI_IX_TOP,
"};\n\n");
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL,
- &trie,
+ trie2,
"\n};\n\n");
usrc_writeArray(f,
"static const uint32_t ubidi_props_mirrors[%ld]={\n",
@@ -419,14 +450,15 @@ generateData(const char *dataDir, UBool csource) {
" ubidi_props_mirrors,\n"
" ubidi_props_jgArray,\n",
f);
- usrc_writeUTrieStruct(f,
+ usrc_writeUTrie2Struct(f,
" {\n",
- &trie, "ubidi_props_trieIndex", NULL, NULL,
+ trie2, "ubidi_props_trieIndex", NULL,
" },\n");
usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
fputs("};\n", f);
fclose(f);
}
+ utrie2_close(trie2);
} else {
/* write the data */
pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
diff --git a/icu4c/source/tools/gencase/gencase.c b/icu4c/source/tools/gencase/gencase.c
index d7f9820e49..0a6a25ece8 100644
--- a/icu4c/source/tools/gencase/gencase.c
+++ b/icu4c/source/tools/gencase/gencase.c
@@ -126,17 +126,16 @@ binariesLineFn(void *context,
UErrorCode *pErrorCode) {
const Binaries *bin;
char *s;
- uint32_t start, limit;
+ uint32_t start, end;
int32_t i;
bin=(const Binaries *)context;
- u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
+ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "gencase: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
exit(*pErrorCode);
}
- ++limit;
/* parse binary property name */
s=(char *)u_skipWhitespace(fields[1][0]);
@@ -156,7 +155,7 @@ binariesLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
- if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
+ if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
fprintf(stderr, "gencase error: unable to set %s, code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
diff --git a/icu4c/source/tools/gencase/store.c b/icu4c/source/tools/gencase/store.c
index f5555698fb..57c2abf028 100644
--- a/icu4c/source/tools/gencase/store.c
+++ b/icu4c/source/tools/gencase/store.c
@@ -26,6 +26,7 @@
#include "cstring.h"
#include "filestrm.h"
#include "utrie.h"
+#include "utrie2.h"
#include "uarrsort.h"
#include "unicode/udata.h"
#include "unewdata.h"
@@ -408,7 +409,7 @@ setProps(Props *p) {
errorCode=U_ZERO_ERROR;
if( value!=oldValue &&
- !upvec_setValue(pv, p->code, p->code+1, 0, value, 0xffffffff, &errorCode)
+ !upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode)
) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
@@ -427,7 +428,7 @@ setProps(Props *p) {
extern void
addCaseSensitive(UChar32 first, UChar32 last) {
UErrorCode errorCode=U_ZERO_ERROR;
- if(!upvec_setValue(pv, first, last+1, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
+ if(!upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
@@ -572,7 +573,7 @@ addClosureMapping(UChar32 src, UChar32 dest) {
}
errorCode=U_ZERO_ERROR;
- if(!upvec_setValue(pv, src, src+1, 0, value, 0xffffffff, &errorCode)) {
+ if(!upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode)) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
@@ -717,7 +718,7 @@ makeCaseClosure() {
UChar *p;
uint32_t *row;
uint32_t value;
- UChar32 start, limit, c, c2;
+ UChar32 start, end, c, c2;
int32_t i, j;
UBool someMappingsAdded;
@@ -751,10 +752,10 @@ makeCaseClosure() {
someMappingsAdded=FALSE;
i=0;
- while((row=upvec_getRow(pv, i, &start, &limit))!=NULL) {
+ while((row=upvec_getRow(pv, i, &start, &end))!=NULL && start=limit) {
+ while((row=upvec_getRow(pv, i, NULL, &end))!=NULL && start>end) {
++i;
}
row=NULL; /* signal to continue with outer loop, without further ++i */
@@ -1038,7 +1039,7 @@ generateData(const char *dataDir, UBool csource) {
static uint8_t trieBlock[40000];
const uint32_t *row;
- UChar32 start, limit;
+ UChar32 start, end;
int32_t i;
UNewDataMemory *pData;
@@ -1053,8 +1054,8 @@ generateData(const char *dataDir, UBool csource) {
exit(U_MEMORY_ALLOCATION_ERROR);
}
- for(i=0; (row=upvec_getRow(pv, i, &start, &limit))!=NULL; ++i) {
- if(!utrie_setRange32(pTrie, start, limit, *row, TRUE)) {
+ for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) {
+ if(startinitialValue, &errorCode);
+ }
+ utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "gencase error: deleting lead surrogate code unit values failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
}
f=usrc_create(dataDir, "ucase_props_data.c");
@@ -1105,9 +1136,9 @@ generateData(const char *dataDir, UBool csource) {
"static const int32_t ucase_props_indexes[UCASE_IX_TOP]={",
indexes, 32, UCASE_IX_TOP,
"};\n\n");
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
- &trie,
+ trie2,
"\n};\n\n");
usrc_writeArray(f,
"static const uint16_t ucase_props_exceptions[%ld]={\n",
@@ -1124,14 +1155,15 @@ generateData(const char *dataDir, UBool csource) {
" ucase_props_exceptions,\n"
" ucase_props_unfold,\n",
f);
- usrc_writeUTrieStruct(f,
+ usrc_writeUTrie2Struct(f,
" {\n",
- &trie, "ucase_props_trieIndex", NULL, NULL,
+ trie2, "ucase_props_trieIndex", NULL,
" },\n");
usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
fputs("};\n", f);
fclose(f);
}
+ utrie2_close(trie2);
} else {
/* write the data */
pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
diff --git a/icu4c/source/tools/gennorm/store.c b/icu4c/source/tools/gennorm/store.c
index 91a50346ab..581a447334 100644
--- a/icu4c/source/tools/gennorm/store.c
+++ b/icu4c/source/tools/gennorm/store.c
@@ -26,6 +26,7 @@
#include "filestrm.h"
#include "unicode/udata.h"
#include "utrie.h"
+#include "utrie2.h"
#include "unicode/uset.h"
#include "toolutil.h"
#include "unewdata.h"
@@ -1787,6 +1788,31 @@ processData() {
}
}
+/* is this a norm32 with a special index for a lead surrogate? */
+static U_INLINE UBool
+isNorm32LeadSurrogate(uint32_t norm32) {
+ return _NORM_MIN_SPECIAL<=norm32 && norm32<_NORM_SURROGATES_TOP;
+}
+
+/* normTrie: 32-bit trie result may contain a special extraData index with the folding offset */
+static int32_t U_CALLCONV
+getFoldingNormOffset(uint32_t norm32) {
+ if(isNorm32LeadSurrogate(norm32)) {
+ return
+ UTRIE_BMP_INDEX_LENGTH+
+ (((int32_t)norm32>>(_NORM_EXTRA_SHIFT-UTRIE_SURROGATE_BLOCK_BITS))&
+ (0x3ff<0) {
- utrie_unserialize(&fcdTrie2, fcdTrieBlock, fcdTrieSize, &errorCode);
+ utrie_unserialize(&fcdRuntimeTrie, fcdTrieBlock, fcdTrieSize, &errorCode);
}
if(auxTrieSize>0) {
- utrie_unserialize(&auxTrie2, auxTrieBlock, auxTrieSize, &errorCode);
+ utrie_unserialize(&auxRuntimeTrie, auxTrieBlock, auxTrieSize, &errorCode);
+ auxRuntimeTrie.getFoldingOffset=getFoldingAuxOffset;
}
if(U_FAILURE(errorCode)) {
fprintf(
@@ -1976,6 +2005,41 @@ generateData(const char *dataDir, UBool csource) {
exit(errorCode);
}
+ /* use UTrie2 */
+ dataInfo.formatVersion[0]=3;
+ dataInfo.formatVersion[2]=0;
+ dataInfo.formatVersion[3]=0;
+ normRuntimeTrie2=utrie2_fromUTrie(&normRuntimeTrie, 0, &errorCode);
+ if(fcdTrieSize>0) {
+ fcdRuntimeTrie2=utrie2_fromUTrie(&fcdRuntimeTrie, 0, &errorCode);
+ }
+ if(auxTrieSize>0) {
+ auxRuntimeTrie2=utrie2_fromUTrie(&auxRuntimeTrie, 0, &errorCode);
+ }
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "gennorm error: utrie2_fromUTrie() failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
+ if(auxTrieSize>0) {
+ /* delete lead surrogate code unit values */
+ UChar lead;
+ auxRuntimeTrie2=utrie2_cloneAsThawed(auxRuntimeTrie2, &errorCode);
+ for(lead=0xd800; lead<0xdc00; ++lead) {
+ utrie2_set32ForLeadSurrogateCodeUnit(auxRuntimeTrie2, lead, auxRuntimeTrie2->initialValue, &errorCode);
+ }
+ utrie2_freeze(auxRuntimeTrie2, UTRIE2_16_VALUE_BITS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "gennorm error: deleting lead surrogate code unit values failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
+ }
+
f=usrc_create(dataDir, "unorm_props_data.c");
if(f!=NULL) {
usrc_writeArray(f,
@@ -1990,14 +2054,14 @@ generateData(const char *dataDir, UBool csource) {
"static const int32_t indexes[_NORM_INDEX_TOP]={\n",
indexes, 32, _NORM_INDEX_TOP,
"\n};\n\n");
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t normTrie_index[%ld]={\n",
"static const uint32_t normTrie_data32[%ld]={\n",
- &normTrie2,
+ normRuntimeTrie2,
"\n};\n\n");
- usrc_writeUTrieStruct(f,
- "static const UTrie normTrie={\n",
- &normTrie2, "normTrie_index", "normTrie_data32", "getFoldingNormOffset",
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 normTrie={\n",
+ normRuntimeTrie2, "normTrie_index", "normTrie_data32",
"};\n\n");
usrc_writeArray(f,
"static const uint16_t extraData[%ld]={\n",
@@ -2008,28 +2072,28 @@ generateData(const char *dataDir, UBool csource) {
combiningTable, 16, combiningTableTop,
"\n};\n\n");
if(fcdTrieSize>0) {
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t fcdTrie_index[%ld]={\n", NULL,
- &fcdTrie2,
+ fcdRuntimeTrie2,
"\n};\n\n");
- usrc_writeUTrieStruct(f,
- "static const UTrie fcdTrie={\n",
- &fcdTrie2, "fcdTrie_index", NULL, NULL,
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 fcdTrie={\n",
+ fcdRuntimeTrie2, "fcdTrie_index", NULL,
"};\n\n");
} else {
- fputs( "static const UTrie fcdTrie={ NULL };\n\n", f);
+ fputs( "static const UTrie2 fcdTrie={ NULL };\n\n", f);
}
if(auxTrieSize>0) {
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t auxTrie_index[%ld]={\n", NULL,
- &auxTrie2,
+ auxRuntimeTrie2,
"\n};\n\n");
- usrc_writeUTrieStruct(f,
- "static const UTrie auxTrie={\n",
- &auxTrie2, "auxTrie_index", NULL, "getFoldingAuxOffset",
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 auxTrie={\n",
+ auxRuntimeTrie2, "auxTrie_index", NULL,
"};\n\n");
} else {
- fputs( "static const UTrie auxTrie={ NULL };\n\n", f);
+ fputs( "static const UTrie2 auxTrie={ NULL };\n\n", f);
}
usrc_writeArray(f,
"static const uint16_t canonStartSets[%ld]={\n",
@@ -2037,6 +2101,9 @@ generateData(const char *dataDir, UBool csource) {
"\n};\n\n");
fclose(f);
}
+ utrie2_close(normRuntimeTrie2);
+ utrie2_close(fcdRuntimeTrie2);
+ utrie2_close(auxRuntimeTrie2);
#endif
} else {
/* write the data */
diff --git a/icu4c/source/tools/genprops/genprops.c b/icu4c/source/tools/genprops/genprops.c
index bb38812687..616f0687f4 100644
--- a/icu4c/source/tools/genprops/genprops.c
+++ b/icu4c/source/tools/genprops/genprops.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2005, International Business Machines
+* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -339,7 +339,7 @@ unicodeDataLineFn(void *context,
exit(U_PARSE_ERROR);
}
}
- if(!upvec_setValue(pv, p.code, p.code+1, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) {
+ if(!upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) {
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode));
exit(*pErrorCode);
}
@@ -544,7 +544,7 @@ repeatAreaProps() {
/* Hangul have canonical decompositions */
errorCode=U_ZERO_ERROR;
- if(!upvec_setValue(pv, 0xac00, 0xd7a4, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) {
+ if(!upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) {
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode));
exit(errorCode);
}
diff --git a/icu4c/source/tools/genprops/props2.c b/icu4c/source/tools/genprops/props2.c
index ac34aa071f..75273eafb6 100644
--- a/icu4c/source/tools/genprops/props2.c
+++ b/icu4c/source/tools/genprops/props2.c
@@ -34,7 +34,7 @@
/* data --------------------------------------------------------------------- */
-static UNewTrie *trie;
+static UNewTrie *newTrie;
uint32_t *pv;
static int32_t pvCount;
@@ -166,17 +166,16 @@ singleEnumLineFn(void *context,
UErrorCode *pErrorCode) {
const SingleEnum *sen;
char *s;
- uint32_t start, limit, uv;
+ uint32_t start, end, uv;
int32_t value;
sen=(const SingleEnum *)context;
- u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
+ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
exit(*pErrorCode);
}
- ++limit;
/* parse property alias */
s=trimTerminateField(fields[1][0], fields[1][1]);
@@ -205,7 +204,11 @@ singleEnumLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
- if(!upvec_setValue(pv, start, limit, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
+ if(start==0 && end==0x10ffff) {
+ /* Also set bits for initialValue and errorValue. */
+ end=UPVEC_MAX_CP;
+ }
+ if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
sen->propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@@ -330,17 +333,16 @@ binariesLineFn(void *context,
UErrorCode *pErrorCode) {
const Binaries *bin;
char *s;
- uint32_t start, limit, uv;
+ uint32_t start, end, uv;
int32_t i;
bin=(const Binaries *)context;
- u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
+ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
exit(*pErrorCode);
}
- ++limit;
/* parse binary property name */
s=(char *)u_skipWhitespace(fields[1][0]);
@@ -364,7 +366,11 @@ binariesLineFn(void *context,
}
uv=U_MASK(bin->binaries[i].vecShift);
- if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, uv, uv, pErrorCode)) {
+ if(start==0 && end==0x10ffff) {
+ /* Also set bits for initialValue and errorValue. */
+ end=UPVEC_MAX_CP;
+ }
+ if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode)) {
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@@ -407,7 +413,7 @@ initAdditionalProperties() {
U_CFUNC void
exitAdditionalProperties() {
- utrie_close(trie);
+ utrie_close(newTrie);
upvec_close(pv);
}
@@ -478,10 +484,10 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
* W for plane 2
*/
*pErrorCode=U_ZERO_ERROR;
- if( !upvec_setValue(pv, 0xe000, 0xf900, 0, (uint32_t)(U_EA_AMBIGUOUS<15 || (*end!='.' && *end!=' ' && *end!='\t' && *end!=0)) {
+ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
+ if(s==numberLimit || value==0 || value>15 || (*numberLimit!='.' && *numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
@@ -536,10 +539,10 @@ ageLineFn(void *context,
version=value<<4;
/* parse minor version number */
- if(*end=='.') {
- s=(char *)u_skipWhitespace(end+1);
- value=(uint32_t)uprv_strtoul(s, &end, 10);
- if(s==end || value>15 || (*end!=' ' && *end!='\t' && *end!=0)) {
+ if(*numberLimit=='.') {
+ s=(char *)u_skipWhitespace(numberLimit+1);
+ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
+ if(s==numberLimit || value>15 || (*numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
@@ -547,7 +550,11 @@ ageLineFn(void *context,
version|=value;
}
- if(!upvec_setValue(pv, start, limit, 0, version<=0x80000000) {
+ if(numberLimit<=s || (*numberLimit!='.' && u_skipWhitespace(numberLimit)!=fields[1][1]) || value>=0x80000000) {
fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
@@ -641,7 +647,7 @@ numericLineFn(void *context,
/* the exponent may have been set above */
value=makeProps(&newProps);
- for(; startinitialValue, &errorCode);
+ }
+ utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "genbidi error: deleting lead surrogate code unit values failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
+ }
+
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
- &trie2,
+ trie2,
"\n};\n\n");
- usrc_writeUTrieStruct(f,
- "static const UTrie propsVectorsTrie={\n",
- &trie2, "propsVectorsTrie_index", NULL, NULL,
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 propsVectorsTrie={\n",
+ trie2, "propsVectorsTrie_index", NULL,
"};\n\n");
+
+ utrie2_close(trie2);
}
p+=length;
diff --git a/icu4c/source/tools/genprops/store.c b/icu4c/source/tools/genprops/store.c
index 4e642d4d3b..804649526c 100644
--- a/icu4c/source/tools/genprops/store.c
+++ b/icu4c/source/tools/genprops/store.c
@@ -430,6 +430,7 @@ generateData(const char *dataDir, UBool csource) {
if(csource) {
/* write .c file for hardcoded data */
UTrie trie={ NULL };
+ UTrie2 *trie2;
FILE *f;
utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
@@ -438,7 +439,36 @@ generateData(const char *dataDir, UBool csource) {
stderr,
"genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n",
u_errorName(errorCode));
- return;
+ exit(errorCode);
+ }
+
+ /* use UTrie2 */
+ dataInfo.formatVersion[0]=6;
+ dataInfo.formatVersion[2]=0;
+ dataInfo.formatVersion[3]=0;
+ trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "genprops error: utrie2_fromUTrie() failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
+ {
+ /* delete lead surrogate code unit values */
+ UChar lead;
+ trie2=utrie2_cloneAsThawed(trie2, &errorCode);
+ for(lead=0xd800; lead<0xdc00; ++lead) {
+ utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
+ }
+ utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(
+ stderr,
+ "genprops error: deleting lead surrogate code unit values failed - %s\n",
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
}
f=usrc_create(dataDir, "uchar_props_data.c");
@@ -451,13 +481,13 @@ generateData(const char *dataDir, UBool csource) {
"static const UVersionInfo dataVersion={",
dataInfo.dataVersion, 8, 4,
"};\n\n");
- usrc_writeUTrieArrays(f,
+ usrc_writeUTrie2Arrays(f,
"static const uint16_t propsTrie_index[%ld]={\n", NULL,
- &trie,
+ trie2,
"\n};\n\n");
- usrc_writeUTrieStruct(f,
- "static const UTrie propsTrie={\n",
- &trie, "propsTrie_index", NULL, NULL,
+ usrc_writeUTrie2Struct(f,
+ "static const UTrie2 propsTrie={\n",
+ trie2, "propsTrie_index", NULL,
"};\n\n");
additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
@@ -469,6 +499,7 @@ generateData(const char *dataDir, UBool csource) {
"};\n\n");
fclose(f);
}
+ utrie2_close(trie2);
} else {
/* write the data */
pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
diff --git a/icu4c/source/tools/toolutil/writesrc.c b/icu4c/source/tools/toolutil/writesrc.c
index 28303c733e..bf103f940d 100644
--- a/icu4c/source/tools/toolutil/writesrc.c
+++ b/icu4c/source/tools/toolutil/writesrc.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2005-2007, International Business Machines
+* Copyright (C) 2005-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -20,7 +20,7 @@
#include
#include "unicode/utypes.h"
#include "unicode/putil.h"
-#include "utrie.h"
+#include "utrie2.h"
#include "cstring.h"
#include "writesrc.h"
@@ -139,51 +139,63 @@ usrc_writeArray(FILE *f,
}
U_CAPI void U_EXPORT2
-usrc_writeUTrieArrays(FILE *f,
- const char *indexPrefix, const char *dataPrefix,
- const UTrie *pTrie,
- const char *postfix) {
+usrc_writeUTrie2Arrays(FILE *f,
+ const char *indexPrefix, const char *data32Prefix,
+ const UTrie2 *pTrie,
+ const char *postfix) {
if(pTrie->data32==NULL) {
/* 16-bit trie */
usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, postfix);
} else {
/* 32-bit trie */
usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix);
- usrc_writeArray(f, dataPrefix, pTrie->data32, 32, pTrie->dataLength, postfix);
+ usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, postfix);
}
}
U_CAPI void U_EXPORT2
-usrc_writeUTrieStruct(FILE *f,
- const char *prefix,
- const UTrie *pTrie,
- const char *indexName, const char *dataName,
- const char *getFoldingOffsetName,
- const char *postfix) {
+usrc_writeUTrie2Struct(FILE *f,
+ const char *prefix,
+ const UTrie2 *pTrie,
+ const char *indexName, const char *data32Name,
+ const char *postfix) {
if(prefix!=NULL) {
fputs(prefix, f);
}
- if(dataName==NULL) {
- dataName="NULL";
- }
- if(getFoldingOffsetName==NULL) {
- getFoldingOffsetName="utrie_defaultGetFoldingOffset";
+ if(pTrie->data32==NULL) {
+ /* 16-bit trie */
+ fprintf(
+ f,
+ " %s,\n" /* index */
+ " %s+%ld,\n" /* data16 */
+ " NULL,\n", /* data32 */
+ indexName,
+ indexName,
+ (long)pTrie->indexLength);
+ } else {
+ /* 32-bit trie */
+ fprintf(
+ f,
+ " %s,\n" /* index */
+ " NULL,\n" /* data16 */
+ " %s,\n", /* data32 */
+ indexName,
+ data32Name);
}
fprintf(
f,
- " %s,\n"
- " %s,\n"
- " %s,\n"
- " %ld,\n"
- " %ld,\n"
- " %lu,\n"
- " %s\n",
- indexName,
- dataName,
- getFoldingOffsetName,
+ " %ld,\n" /* indexLength */
+ " %ld,\n" /* dataLength */
+ " 0x%hx,\n" /* index2NullOffset */
+ " 0x%hx,\n" /* dataNullOffset */
+ " 0x%lx,\n" /* initialValue */
+ " 0x%lx,\n" /* errorValue */
+ " 0x%lx,\n" /* highStart */
+ " 0x%lx,\n", /* highValueIndex */
(long)pTrie->indexLength, (long)pTrie->dataLength,
- (unsigned long)pTrie->initialValue,
- pTrie->isLatin1Linear ? "TRUE" : "FALSE");
+ (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
+ (long)pTrie->initialValue, (long)pTrie->errorValue,
+ (long)pTrie->highStart, (long)pTrie->highValueIndex);
if(postfix!=NULL) {
fputs(postfix, f);
}
diff --git a/icu4c/source/tools/toolutil/writesrc.h b/icu4c/source/tools/toolutil/writesrc.h
index 613ee9d707..3636dcae49 100644
--- a/icu4c/source/tools/toolutil/writesrc.h
+++ b/icu4c/source/tools/toolutil/writesrc.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2005, International Business Machines
+* Copyright (C) 2005-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -21,7 +21,7 @@
#include
#include "unicode/utypes.h"
-#include "utrie.h"
+#include "utrie2.h"
/**
* Create a source text file and write a header comment with the ICU copyright.
@@ -43,28 +43,26 @@ usrc_writeArray(FILE *f,
const char *postfix);
/**
- * Calls usrc_writeArray() for the index and data arrays of a runtime UTrie.
- * Only the index array is written for a 16-bit UTrie. In this case, dataPrefix
+ * Calls usrc_writeArray() for the index and data arrays of a frozen UTrie2.
+ * Only the index array is written for a 16-bit UTrie2. In this case, dataPrefix
* is ignored and can be NULL.
*/
U_CAPI void U_EXPORT2
-usrc_writeUTrieArrays(FILE *f,
- const char *indexPrefix, const char *dataPrefix,
- const UTrie *pTrie,
- const char *postfix);
+usrc_writeUTrie2Arrays(FILE *f,
+ const char *indexPrefix, const char *dataPrefix,
+ const UTrie2 *pTrie,
+ const char *postfix);
/**
- * Writes the UTrie struct values.
+ * Writes the UTrie2 struct values.
* The {} and declaration etc. need to be included in prefix/postfix or
* printed before and after the array contents.
- * If getFoldingOffsetName==NULL then "utrie_defaultGetFoldingOffset" is printed.
*/
U_CAPI void U_EXPORT2
-usrc_writeUTrieStruct(FILE *f,
- const char *prefix,
- const UTrie *pTrie,
- const char *indexName, const char *dataName,
- const char *getFoldingOffsetName,
- const char *postfix);
+usrc_writeUTrie2Struct(FILE *f,
+ const char *prefix,
+ const UTrie2 *pTrie,
+ const char *indexName, const char *dataName,
+ const char *postfix);
#endif