scuffed-code/icu4c/source/test/cintltst/trietest.c
2002-03-28 18:26:25 +00:00

596 lines
17 KiB
C

/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: trietest.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001nov20
* created by: Markus W. Scherer
*/
#include <stdio.h>
#include "unicode/utypes.h"
#include "utrie.h"
#include "cstring.h"
#if 1
#include "cintltst.h"
#else
/* definitions from standalone utrie development */
#define log_err printf
#define log_verbose printf
#undef u_errorName
#define u_errorName(errorCode) "some error code"
#endif
#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
/* Values for setting possibly overlapping, out-of-order ranges of values */
typedef struct SetRange {
UChar32 start, limit;
uint32_t value;
UBool overwrite;
} SetRange;
/*
* Values for testing:
* value is set from the previous boundary's limit to before
* this boundary's limit
*/
typedef struct CheckRange {
UChar32 limit;
uint32_t value;
} CheckRange;
static uint8_t storage[100000];
static uint32_t U_CALLCONV
_testFoldedValue32(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t foldedValue, value;
UChar32 limit;
UBool inBlockZero;
foldedValue=0;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else {
foldedValue|=value;
++start;
}
}
if(foldedValue!=0) {
return ((uint32_t)offset<<16)|foldedValue;
} else {
return 0;
}
}
static int32_t U_CALLCONV
_testFoldingOffset32(uint32_t data) {
return (int32_t)(data>>16);
}
static uint32_t U_CALLCONV
_testFoldedValue16(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t foldedValue, value;
UChar32 limit;
UBool inBlockZero;
foldedValue=0;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else {
foldedValue|=value;
++start;
}
}
if(foldedValue!=0) {
return (uint32_t)(offset|0x8000);
} else {
return 0;
}
}
static int32_t U_CALLCONV
_testFoldingOffset16(uint32_t data) {
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
static uint32_t U_CALLCONV
_testEnumValue(const void *context, uint32_t value) {
return value^0x5555;
}
static UBool U_CALLCONV
_testEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
const CheckRange **pb=(const CheckRange **)context;
const CheckRange *b=(*pb)++;
value^=0x5555;
if(start!=(b-1)->limit || limit!=b->limit || value!=b->value) {
log_err("error: utrie_enum() delivers wrong range [U+%04lx..U+%04lx[.0x%lx instead of [U+%04lx..U+%04lx[.0x%lx\n",
start, limit, value,
(b-1)->limit, b->limit, b->value);
}
return TRUE;
}
static void
testTrieIteration(const char *testName,
const UTrie *trie,
const CheckRange checkRanges[], int32_t countCheckRanges) {
UChar s[100];
uint32_t values[30];
const UChar *p, *limit;
uint32_t value;
UChar32 c;
int32_t i, length, countValues;
UChar c2;
/* write a string */
length=countValues=0;
for(i=0; i<countCheckRanges; ++i) {
c=checkRanges[i].limit;
if(c!=0) {
--c;
UTF_APPEND_CHAR_UNSAFE(s, length, c);
values[countValues++]=checkRanges[i].value;
}
}
limit=s+length;
/* try forward */
p=s;
i=0;
while(p<limit) {
c=c2=0x33;
if(trie->data32!=NULL) {
UTRIE_NEXT32(trie, p, limit, c, c2, value);
} else {
UTRIE_NEXT16(trie, p, limit, c, c2, value);
}
if(value!=values[i]) {
log_err("error: wrong value from UTRIE_NEXT(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
testName, c, c2, value, values[i]);
}
if(
c2==0 ?
c!=*(p-1) :
!UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
) {
log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
testName, c, c2);
continue;
}
if(c2!=0) {
int32_t offset;
if(trie->data32==NULL) {
value=UTRIE_GET16_FROM_LEAD(trie, c);
offset=trie->getFoldingOffset(value);
if(offset>0) {
value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
} else {
value=trie->initialValue;
}
} else {
value=UTRIE_GET32_FROM_LEAD(trie, c);
offset=trie->getFoldingOffset(value);
if(offset>0) {
value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
} else {
value=trie->initialValue;
}
}
if(value!=values[i]) {
log_err("error: wrong value from UTRIE_GETXX_FROM_OFFSET_TRAIL(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
testName, c, c2, value, values[i]);
}
}
if(c2!=0) {
value=0x44;
if(trie->data32==NULL) {
UTRIE_GET16_FROM_PAIR(trie, c, c2, value);
} else {
UTRIE_GET32_FROM_PAIR(trie, c, c2, value);
}
if(value!=values[i]) {
log_err("error: wrong value from UTRIE_GETXX_FROM_PAIR(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
testName, c, c2, value, values[i]);
}
}
++i;
}
/* try backward */
p=limit;
i=countValues;
while(s<p) {
--i;
c=c2=0x33;
if(trie->data32!=NULL) {
UTRIE_PREVIOUS32(trie, s, p, c, c2, value);
} else {
UTRIE_PREVIOUS16(trie, s, p, c, c2, value);
}
if(value!=values[i]) {
log_err("error: wrong value from UTRIE_PREVIOUS(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
testName, c, c2, value, values[i]);
}
if(
c2==0 ?
c!=*p:
!UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
) {
log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
testName, c, c2);
}
}
}
static void
testTrieRanges(const char *testName,
const SetRange setRanges[], int32_t countSetRanges,
const CheckRange checkRanges[], int32_t countCheckRanges,
UBool dataIs32, UBool latin1Linear) {
UTrieGetFoldingOffset *getFoldingOffset;
const CheckRange *enumRanges;
UNewTrie *newTrie;
UTrie trie={ 0 };
uint32_t value, value2;
UChar32 start, limit;
int32_t i, length;
UErrorCode errorCode;
UBool overwrite, ok;
log_verbose("\ntesting Trie '%s'\n", testName);
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
/* set values from setRanges[] */
ok=TRUE;
for(i=0; i<countSetRanges; ++i) {
start=setRanges[i].start;
limit=setRanges[i].limit;
value=setRanges[i].value;
overwrite=setRanges[i].overwrite;
if((limit-start)==1 && overwrite) {
ok&=utrie_set32(newTrie, start, value);
} else {
ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
}
}
if(!ok) {
log_err("error: setting values into a trie failed (%s)\n", testName);
return;
}
/* verify that all these values are in the new Trie */
start=0;
for(i=0; i<countCheckRanges; ++i) {
limit=checkRanges[i].limit;
value=checkRanges[i].value;
while(start<limit) {
if(value!=utrie_get32(newTrie, start, NULL)) {
log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
testName, start, utrie_get32(newTrie, start, NULL), value);
}
++start;
}
}
if(dataIs32) {
getFoldingOffset=_testFoldingOffset32;
} else {
getFoldingOffset=_testFoldingOffset16;
}
errorCode=U_ZERO_ERROR;
length=utrie_serialize(newTrie, storage, sizeof(storage),
dataIs32 ? _testFoldedValue32 : _testFoldedValue16,
(UBool)!dataIs32,
&errorCode);
if(U_FAILURE(errorCode)) {
log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
utrie_close(newTrie);
return;
}
/* test linear Latin-1 range from utrie_getData() */
if(latin1Linear) {
uint32_t *data;
int32_t dataLength;
data=utrie_getData(newTrie, &dataLength);
start=0;
for(i=0; i<countCheckRanges && start<=0xff; ++i) {
limit=checkRanges[i].limit;
value=checkRanges[i].value;
while(start<limit && start<=0xff) {
if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
}
++start;
}
}
}
utrie_close(newTrie);
errorCode=U_ZERO_ERROR;
if(!utrie_unserialize(&trie, storage, length, &errorCode)) {
log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
return;
}
trie.getFoldingOffset=getFoldingOffset;
if(dataIs32!=(trie.data32!=NULL)) {
log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
}
if(latin1Linear!=trie.isLatin1Linear) {
log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
}
/* verify that all these values are in the unserialized Trie */
start=0;
for(i=0; i<countCheckRanges; ++i) {
limit=checkRanges[i].limit;
value=checkRanges[i].value;
if(start==0xd800) {
/* skip surrogates */
start=limit;
continue;
}
while(start<limit) {
if(start<=0xffff) {
if(dataIs32) {
value2=UTRIE_GET32_FROM_BMP(&trie, start);
} else {
value2=UTRIE_GET16_FROM_BMP(&trie, start);
}
if(value!=value2) {
log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
testName, start, value2, value);
}
if(!UTF_IS_LEAD(start)) {
if(dataIs32) {
value2=UTRIE_GET32_FROM_LEAD(&trie, start);
} else {
value2=UTRIE_GET16_FROM_LEAD(&trie, start);
}
if(value!=value2) {
log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
testName, start, value2, value);
}
}
}
if(dataIs32) {
UTRIE_GET32(&trie, start, value2);
} else {
UTRIE_GET16(&trie, start, value2);
}
if(value!=value2) {
log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
testName, start, value2, value);
}
++start;
}
}
/* enumerate and verify all ranges */
enumRanges=checkRanges+1;
utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);
/* test linear Latin-1 range */
if(trie.isLatin1Linear) {
if(trie.data32!=NULL) {
const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);
for(start=0; start<0x100; ++start) {
if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
}
}
} else {
const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);
for(start=0; start<0x100; ++start) {
if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
}
}
}
}
testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
}
static void
testTrieRanges2(const char *testName,
const SetRange setRanges[], int32_t countSetRanges,
const CheckRange checkRanges[], int32_t countCheckRanges,
UBool dataIs32) {
char name[40];
testTrieRanges(testName,
setRanges, countSetRanges,
checkRanges, countCheckRanges,
dataIs32, FALSE);
uprv_strcpy(name, testName);
uprv_strcat(name, "-latin1Linear");
testTrieRanges(name,
setRanges, countSetRanges,
checkRanges, countCheckRanges,
dataIs32, TRUE);
}
static void
testTrieRanges4(const char *testName,
const SetRange setRanges[], int32_t countSetRanges,
const CheckRange checkRanges[], int32_t countCheckRanges) {
char name[40];
uprv_strcpy(name, testName);
uprv_strcat(name, ".32");
testTrieRanges2(name,
setRanges, countSetRanges,
checkRanges, countCheckRanges,
TRUE);
uprv_strcpy(name, testName);
uprv_strcat(name, ".16");
testTrieRanges2(name,
setRanges, countSetRanges,
checkRanges, countCheckRanges,
FALSE);
}
/* test data ----------------------------------------------------------------*/
/* set consecutive ranges, even with value 0 */
static const SetRange
setRanges1[]={
{0, 0x20, 0, FALSE},
{0x20, 0xa7, 0x1234, FALSE},
{0xa7, 0x3400, 0, FALSE},
{0x3400, 0x9fa6, 0x6162, FALSE},
{0x9fa6, 0xdada, 0x3132, FALSE},
{0xdada, 0xeeee, 0x27, FALSE},
{0xeeee, 0x11111, 1, FALSE},
{0x11111, 0x44444, 0x6162, FALSE},
{0x44444, 0xf0003, 0, FALSE},
{0xf0003, 0xf0004, 0xf, FALSE},
{0xf0004, 0xf0006, 0x10, FALSE},
{0xf0006, 0xf0007, 0x11, FALSE},
{0xf0007, 0xf0020, 0x12, FALSE},
{0xf0020, 0x110000, 0, FALSE}
};
static const CheckRange
checkRanges1[]={
{0, 0}, /* dummy start range to make _testEnumRange() simpler */
{0x20, 0},
{0xa7, 0x1234},
{0x3400, 0},
{0x9fa6, 0x6162},
{0xdada, 0x3132},
{0xeeee, 0x27},
{0x11111,1},
{0x44444,0x6162},
{0xf0003,0},
{0xf0004,0xf},
{0xf0006,0x10},
{0xf0007,0x11},
{0xf0020,0x12},
{0x110000, 0}
};
/* set some interesting overlapping ranges */
static const SetRange
setRanges2[]={
{0x21, 0x7f, 0x5555, TRUE},
{0x2f800,0x2fedc, 0x7a, TRUE},
{0x72, 0xdd, 3, TRUE},
{0xdd, 0xde, 4, FALSE},
{0x2f987,0x2fa98, 5, TRUE},
{0x2f777,0x2f833, 0, TRUE},
{0x2f900,0x2ffee, 1, FALSE},
{0x2ffee,0x2ffef, 2, TRUE}
};
static const CheckRange
checkRanges2[]={
{0, 0}, /* dummy start range to make _testEnumRange() simpler */
{0x21, 0},
{0x72, 0x5555},
{0xdd, 3},
{0xde, 4},
{0x2f833,0},
{0x2f987,0x7a},
{0x2fa98,5},
{0x2fedc,0x7a},
{0x2ffee,1},
{0x2ffef,2},
{0x110000, 0}
};
/* use a non-zero initial value */
static const SetRange
setRanges3[]={
{0x31, 0xa4, 1, FALSE},
{0x3400, 0x6789, 2, FALSE},
{0x30000,0x34567,9, TRUE},
{0x45678,0x56789,3, TRUE}
};
static const CheckRange
checkRanges3[]={
{0, 9}, /* dummy start range, also carries the initial value */
{0x31, 9},
{0xa4, 1},
{0x3400, 9},
{0x6789, 2},
{0x45678,9},
{0x56789,3},
{0x110000,9}
};
static void
TrieTest(void) {
testTrieRanges4("set1",
setRanges1, ARRAY_LENGTH(setRanges1),
checkRanges1, ARRAY_LENGTH(checkRanges1));
testTrieRanges4("set2-overlap",
setRanges2, ARRAY_LENGTH(setRanges2),
checkRanges2, ARRAY_LENGTH(checkRanges2));
testTrieRanges4("set3-initial-9",
setRanges3, ARRAY_LENGTH(setRanges3),
checkRanges3, ARRAY_LENGTH(checkRanges3));
}
#if 1
void
addTrieTest(TestNode** root);
void
addTrieTest(TestNode** root) {
addTest(root, &TrieTest, "tsutil/TrieTest");
}
#else
/* standalone utrie development */
int main(int argc, const char *argv[]) {
TrieTest();
return 0;
}
#endif