ICU-880 Added tests for incremental normalization

X-SVN-Rev: 4706
This commit is contained in:
Andy Heninger 2001-05-17 23:09:35 +00:00
parent d34c838277
commit 51854ca791
3 changed files with 199 additions and 127 deletions

View File

@ -691,13 +691,24 @@ void collIterNormalize(collIterate *collationSource)
if (status == U_BUFFER_OVERFLOW_ERROR) {
freeHeapWritableBuffer(collationSource);
collationSource->writableBuffer = (UChar *)uprv_malloc((normLen+1)*sizeof(UChar));
collationSource->flags |= UCOL_ITER_ALLOCATED;
/* to enable null termination */
collationSource->writableBufSize = normLen + 1;
status = U_ZERO_ERROR;
unorm_normalize(srcP, endP-srcP, UNORM_NFD, 0, collationSource->writableBuffer,
collationSource->writableBufSize, &status);
collationSource->writableBufSize, &status);
if (status != U_ZERO_ERROR) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), normalize #2 failed, status = %d\n", status);
#endif
return;
}
collationSource->writableBuffer[normLen] = 0;
}
else {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), normalize #1 failed, status = %d\n", status);
#endif
return;
}
}
@ -2514,6 +2525,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
there's no guarantee of the right character position after
this bail*/
*status = U_BUFFER_OVERFLOW_ERROR;
source->CEpos = source->CEs;
return UCOL_NULLORDER;
}
CE = ucol_IGetNextCE(coll, &temp, status);
@ -2572,7 +2584,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
/* However, it is used only when stack buffers are not sufficiently big, and then we're messed up performance wise */
/* anyway */
uint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, uint32_t *secSize, uint32_t newSize, UErrorCode *status) {
#ifdef UCOLL_DEBUG
#ifdef UCOL_DEBUG
fprintf(stderr, ".");
#endif
uint8_t *newStart = NULL;
@ -3621,8 +3633,13 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
if(resultLength == 0 || primaries == NULL) {
return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
int32_t t = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
if(normSource != normBuffer) {
uprv_free(normSource);
}
return t;
}
uint8_t *primarySafeEnd = primaries + resultLength - 2;
uint32_t minBufferSize = UCOL_MAX_BUFFER;
@ -4437,10 +4454,10 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
}
if (sAlloc) {
delete sBuf;
uprv_free(sBuf);
}
if (tAlloc) {
delete tBuf;
uprv_free(tBuf);
}
return result;
@ -4474,18 +4491,17 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
if (b->buf != b->localArray) {
delete b->buf;
uprv_free(b->buf);
}
b->buf = newBuf;
b->endp = b->buf + newSize;
b->pos = b->buf + oldSize;
}
inline void UCOL_CEBUF_CHECK(ucol_CEBuf *b, collIterate *ci) {
if ((b)->pos == (b)->endp) ucol_CEBuf_Expand(b, ci);
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci) {
if (b->pos == b->endp) {
ucol_CEBuf_Expand(b, ci);
}
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce) {
*(b)->pos++ = ce;
};
@ -4638,20 +4654,11 @@ ucol_strcoll( const UCollator *coll,
uint32_t sOrder=0, tOrder=0;
if(!shifted) {
for(;;) {
// TODO: Verify that at most one CE an be added per buf per time through here.
UCOL_CEBUF_CHECK(&sCEs , &sColl);
UCOL_CEBUF_CHECK(&sCEs , &sColl);
/* Get the next collation element in each of the strings, unless */
/* we've been requested to skip it. */
while(sOrder == 0) {
// UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
sOrder = ucol_IGetNextCE(coll, &sColl, &status);
//if(!isContinuation(sOrder)) {
// sOrder ^= caseSwitch;
//}
// *(sCEs++) = sOrder;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
sOrder &= 0xFFFF0000;
}
@ -4661,7 +4668,7 @@ ucol_strcoll( const UCollator *coll,
//if(!isContinuation(tOrder)) {
// tOrder ^= caseSwitch;
//}
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
tOrder &= 0xFFFF0000;
}
@ -4689,7 +4696,7 @@ ucol_strcoll( const UCollator *coll,
// UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
sOrder = ucol_IGetNextCE(coll, &sColl, &status);
if(sOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
break;
} else if((sOrder & 0xFFFFFFBF) == 0) {
continue;
@ -4697,13 +4704,13 @@ ucol_strcoll( const UCollator *coll,
if((sOrder & 0xFFFF0000) > 0) { /* There is primary value */
if(sInShifted) {
sOrder &= 0xFFFF0000;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
// *(sCEs++) = sOrder;
continue;
} else {
//sOrder ^= caseSwitch;
// *(sCEs++) = sOrder;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
break;
}
} else { /* Just lower level values */
@ -4711,26 +4718,26 @@ ucol_strcoll( const UCollator *coll,
continue;
} else {
//sOrder ^= caseSwitch;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
// *(sCEs++) = sOrder;
continue;
}
}
} else { /* regular */
if(sOrder > LVT) {
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
// *(sCEs++) = sOrder;
break;
} else {
if((sOrder & 0xFFFF0000) > 0) {
sInShifted = TRUE;
sOrder &= 0xFFFF0000;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
// *(sCEs++) = sOrder;
continue;
} else {
//sOrder ^= caseSwitch;
UCOL_CEBUF_PUT(&sCEs, sOrder);
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
sInShifted = FALSE;
// *(sCEs++) = sOrder;
continue;
@ -4745,7 +4752,7 @@ ucol_strcoll( const UCollator *coll,
// UCOL_GETNEXTCE(tOrder, coll, tColl, &status);
tOrder = ucol_IGetNextCE(coll, &tColl, &status);
if(tOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
break;
} else if((tOrder & 0xFFFFFFBF) == 0) {
@ -4754,12 +4761,12 @@ ucol_strcoll( const UCollator *coll,
if((tOrder & 0xFFFF0000) > 0) { /* There is primary value */
if(tInShifted) {
tOrder &= 0xFFFF0000;
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
continue;
} else {
//tOrder ^= caseSwitch;
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
break;
}
@ -4768,14 +4775,14 @@ ucol_strcoll( const UCollator *coll,
continue;
} else {
//tOrder ^= caseSwitch;
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
continue;
}
}
} else { /* regular */
if(tOrder > LVT) {
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
// *(tCEs++) = tOrder;
break;
} else {
@ -4783,11 +4790,11 @@ ucol_strcoll( const UCollator *coll,
tInShifted = TRUE;
tOrder &= 0xFFFF0000;
// *(tCEs++) = tOrder;
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
continue;
} else {
//tOrder ^= caseSwitch;
UCOL_CEBUF_PUT(&tCEs, tOrder);
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
tInShifted = FALSE;
// *(tCEs++) = tOrder;
continue;

View File

@ -235,26 +235,42 @@ void doTestVariant(UCollator* myCollation, const UChar source[], const UChar tar
{
int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
int temp=0, gSortklen1=0,gSortklen2=0;
UCollationResult compareResult, keyResult, incResult = result;
uint8_t *sortKey1, *sortKey2;
UCollationResult compareResult, compareResulta, keyResult, incResult = result;
uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
uint32_t sLen = u_strlen(source);
uint32_t tLen = u_strlen(target);
char buffer[256];
uint32_t len;
compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
if (compareResult != compareResulta) {
log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
}
sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
sortKey1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
sortKey2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
/* Check that sort key generated with null terminated string is identical */
/* to that generted with a length specified. */
if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
log_err("Sort Keys from null terminated and explicit length strings differ.\n");
}
/*memcmp(sortKey1, sortKey2,sortklenmax);*/
temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
@ -281,6 +297,9 @@ void doTestVariant(UCollator* myCollation, const UChar source[], const UChar tar
reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, incResult, result );
free(sortKey1);
free(sortKey2);
free(sortKey1a);
free(sortKey2a);
}
void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)

View File

@ -1763,6 +1763,51 @@ static void TestJ815() {
}
static void TestIncrementalNormalize() {
UChar baseA =0x41;
UChar baseB = 0x42;
UChar ccMix[] = {0x316, 0x321, 0x300};
// 0x316 is combining grave accent below, cc=220
// 0x321 is combining palatalized hook below, cc=202
// 0x300 is combining grave accent, cc=230
int maxSLen = 2000;
int sLen;
int i;
UChar *strA;
UChar *strB;
UCollator *coll;
UErrorCode status = U_ZERO_ERROR;
strA = uprv_malloc((maxSLen+1) * sizeof(UChar));
strB = uprv_malloc((maxSLen+1) * sizeof(UChar));
coll = ucol_open("en_US", &status);
ucol_setNormalization(coll, UNORM_NFD);
// for (sLen = 4; sLen<maxSLen; sLen++) {
for (sLen = 1000; sLen<1001; sLen++) {
strA[0] = baseA;
strB[0] = baseA;
for (i=1; i<=sLen-1; i++) {
strA[i] = ccMix[i % 3];
strB[sLen-i] = ccMix[i % 3];
}
strA[sLen] = 0;
strB[sLen] = 0;
doTest(coll, strA, strB, UCOL_EQUAL);
}
ucol_close(coll);
uprv_free(strA);
uprv_free(strB);
}
void addMiscCollTest(TestNode** root)
{
addTest(root, &TestCase, "tscoll/cmsccoll/TestCase");
@ -1785,6 +1830,7 @@ void addMiscCollTest(TestNode** root)
/*addTest(root, &TestUnmappedSpaces, "tscoll/cmsccoll/TestUnmappedSpaces");*/
/*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/
/*addTest(root, &TestVariableTop, "tscoll/cmsccoll/TestVariableTop");*/
addTest(root, &TestIncrementalNormalize, "tscoll/cmsccoll/TestIncrementalNormalize");
addTest(root, &TestComposeDecompose, "tscoll/cmsccoll/TestComposeDecompose");
}