ICU-880 Added tests for incremental normalization
X-SVN-Rev: 4706
This commit is contained in:
parent
d34c838277
commit
51854ca791
@ -691,13 +691,24 @@ void collIterNormalize(collIterate *collationSource)
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
freeHeapWritableBuffer(collationSource);
|
||||
collationSource->writableBuffer = (UChar *)uprv_malloc((normLen+1)*sizeof(UChar));
|
||||
collationSource->flags |= UCOL_ITER_ALLOCATED;
|
||||
/* to enable null termination */
|
||||
collationSource->writableBufSize = normLen + 1;
|
||||
status = U_ZERO_ERROR;
|
||||
unorm_normalize(srcP, endP-srcP, UNORM_NFD, 0, collationSource->writableBuffer,
|
||||
collationSource->writableBufSize, &status);
|
||||
collationSource->writableBufSize, &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, "collIterNormalize(), normalize #2 failed, status = %d\n", status);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
collationSource->writableBuffer[normLen] = 0;
|
||||
}
|
||||
else {
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, "collIterNormalize(), normalize #1 failed, status = %d\n", status);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2514,6 +2525,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
there's no guarantee of the right character position after
|
||||
this bail*/
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
source->CEpos = source->CEs;
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
CE = ucol_IGetNextCE(coll, &temp, status);
|
||||
@ -2572,7 +2584,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
/* However, it is used only when stack buffers are not sufficiently big, and then we're messed up performance wise */
|
||||
/* anyway */
|
||||
uint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, uint32_t *secSize, uint32_t newSize, UErrorCode *status) {
|
||||
#ifdef UCOLL_DEBUG
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, ".");
|
||||
#endif
|
||||
uint8_t *newStart = NULL;
|
||||
@ -3621,8 +3633,13 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
|
||||
|
||||
if(resultLength == 0 || primaries == NULL) {
|
||||
return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
|
||||
int32_t t = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
|
||||
if(normSource != normBuffer) {
|
||||
uprv_free(normSource);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
uint8_t *primarySafeEnd = primaries + resultLength - 2;
|
||||
|
||||
uint32_t minBufferSize = UCOL_MAX_BUFFER;
|
||||
@ -4437,10 +4454,10 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
|
||||
}
|
||||
|
||||
if (sAlloc) {
|
||||
delete sBuf;
|
||||
uprv_free(sBuf);
|
||||
}
|
||||
if (tAlloc) {
|
||||
delete tBuf;
|
||||
uprv_free(tBuf);
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -4474,18 +4491,17 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
|
||||
newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
|
||||
uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
|
||||
if (b->buf != b->localArray) {
|
||||
delete b->buf;
|
||||
uprv_free(b->buf);
|
||||
}
|
||||
b->buf = newBuf;
|
||||
b->endp = b->buf + newSize;
|
||||
b->pos = b->buf + oldSize;
|
||||
}
|
||||
|
||||
inline void UCOL_CEBUF_CHECK(ucol_CEBuf *b, collIterate *ci) {
|
||||
if ((b)->pos == (b)->endp) ucol_CEBuf_Expand(b, ci);
|
||||
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci) {
|
||||
if (b->pos == b->endp) {
|
||||
ucol_CEBuf_Expand(b, ci);
|
||||
}
|
||||
|
||||
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce) {
|
||||
*(b)->pos++ = ce;
|
||||
};
|
||||
|
||||
@ -4638,20 +4654,11 @@ ucol_strcoll( const UCollator *coll,
|
||||
uint32_t sOrder=0, tOrder=0;
|
||||
if(!shifted) {
|
||||
for(;;) {
|
||||
// TODO: Verify that at most one CE an be added per buf per time through here.
|
||||
UCOL_CEBUF_CHECK(&sCEs , &sColl);
|
||||
UCOL_CEBUF_CHECK(&sCEs , &sColl);
|
||||
|
||||
/* Get the next collation element in each of the strings, unless */
|
||||
/* we've been requested to skip it. */
|
||||
while(sOrder == 0) {
|
||||
// UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
|
||||
sOrder = ucol_IGetNextCE(coll, &sColl, &status);
|
||||
//if(!isContinuation(sOrder)) {
|
||||
// sOrder ^= caseSwitch;
|
||||
//}
|
||||
// *(sCEs++) = sOrder;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
sOrder &= 0xFFFF0000;
|
||||
}
|
||||
|
||||
@ -4661,7 +4668,7 @@ ucol_strcoll( const UCollator *coll,
|
||||
//if(!isContinuation(tOrder)) {
|
||||
// tOrder ^= caseSwitch;
|
||||
//}
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
tOrder &= 0xFFFF0000;
|
||||
}
|
||||
@ -4689,7 +4696,7 @@ ucol_strcoll( const UCollator *coll,
|
||||
// UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
|
||||
sOrder = ucol_IGetNextCE(coll, &sColl, &status);
|
||||
if(sOrder == UCOL_NO_MORE_CES) {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
break;
|
||||
} else if((sOrder & 0xFFFFFFBF) == 0) {
|
||||
continue;
|
||||
@ -4697,13 +4704,13 @@ ucol_strcoll( const UCollator *coll,
|
||||
if((sOrder & 0xFFFF0000) > 0) { /* There is primary value */
|
||||
if(sInShifted) {
|
||||
sOrder &= 0xFFFF0000;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
// *(sCEs++) = sOrder;
|
||||
continue;
|
||||
} else {
|
||||
//sOrder ^= caseSwitch;
|
||||
// *(sCEs++) = sOrder;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
break;
|
||||
}
|
||||
} else { /* Just lower level values */
|
||||
@ -4711,26 +4718,26 @@ ucol_strcoll( const UCollator *coll,
|
||||
continue;
|
||||
} else {
|
||||
//sOrder ^= caseSwitch;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
// *(sCEs++) = sOrder;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if(sOrder > LVT) {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
// *(sCEs++) = sOrder;
|
||||
break;
|
||||
} else {
|
||||
if((sOrder & 0xFFFF0000) > 0) {
|
||||
sInShifted = TRUE;
|
||||
sOrder &= 0xFFFF0000;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
// *(sCEs++) = sOrder;
|
||||
continue;
|
||||
} else {
|
||||
//sOrder ^= caseSwitch;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, &sColl);
|
||||
sInShifted = FALSE;
|
||||
// *(sCEs++) = sOrder;
|
||||
continue;
|
||||
@ -4745,7 +4752,7 @@ ucol_strcoll( const UCollator *coll,
|
||||
// UCOL_GETNEXTCE(tOrder, coll, tColl, &status);
|
||||
tOrder = ucol_IGetNextCE(coll, &tColl, &status);
|
||||
if(tOrder == UCOL_NO_MORE_CES) {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
break;
|
||||
} else if((tOrder & 0xFFFFFFBF) == 0) {
|
||||
@ -4754,12 +4761,12 @@ ucol_strcoll( const UCollator *coll,
|
||||
if((tOrder & 0xFFFF0000) > 0) { /* There is primary value */
|
||||
if(tInShifted) {
|
||||
tOrder &= 0xFFFF0000;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
continue;
|
||||
} else {
|
||||
//tOrder ^= caseSwitch;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
break;
|
||||
}
|
||||
@ -4768,14 +4775,14 @@ ucol_strcoll( const UCollator *coll,
|
||||
continue;
|
||||
} else {
|
||||
//tOrder ^= caseSwitch;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if(tOrder > LVT) {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
// *(tCEs++) = tOrder;
|
||||
break;
|
||||
} else {
|
||||
@ -4783,11 +4790,11 @@ ucol_strcoll( const UCollator *coll,
|
||||
tInShifted = TRUE;
|
||||
tOrder &= 0xFFFF0000;
|
||||
// *(tCEs++) = tOrder;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
continue;
|
||||
} else {
|
||||
//tOrder ^= caseSwitch;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, &tColl);
|
||||
tInShifted = FALSE;
|
||||
// *(tCEs++) = tOrder;
|
||||
continue;
|
||||
|
@ -235,26 +235,42 @@ void doTestVariant(UCollator* myCollation, const UChar source[], const UChar tar
|
||||
{
|
||||
int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
|
||||
int temp=0, gSortklen1=0,gSortklen2=0;
|
||||
UCollationResult compareResult, keyResult, incResult = result;
|
||||
uint8_t *sortKey1, *sortKey2;
|
||||
UCollationResult compareResult, compareResulta, keyResult, incResult = result;
|
||||
uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
|
||||
uint32_t sLen = u_strlen(source);
|
||||
uint32_t tLen = u_strlen(target);
|
||||
char buffer[256];
|
||||
uint32_t len;
|
||||
|
||||
|
||||
compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
|
||||
compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
|
||||
compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
|
||||
if (compareResult != compareResulta) {
|
||||
log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
|
||||
}
|
||||
|
||||
sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
|
||||
sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
|
||||
|
||||
sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
|
||||
sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
|
||||
|
||||
sortKey1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
|
||||
sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
|
||||
ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
|
||||
|
||||
sortKey2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
|
||||
sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
||||
ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
|
||||
ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
|
||||
|
||||
/* Check that sort key generated with null terminated string is identical */
|
||||
/* to that generted with a length specified. */
|
||||
if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
|
||||
uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
|
||||
log_err("Sort Keys from null terminated and explicit length strings differ.\n");
|
||||
}
|
||||
|
||||
/*memcmp(sortKey1, sortKey2,sortklenmax);*/
|
||||
temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
|
||||
@ -281,6 +297,9 @@ void doTestVariant(UCollator* myCollation, const UChar source[], const UChar tar
|
||||
reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, incResult, result );
|
||||
free(sortKey1);
|
||||
free(sortKey2);
|
||||
free(sortKey1a);
|
||||
free(sortKey2a);
|
||||
|
||||
}
|
||||
|
||||
void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
|
||||
|
@ -1763,6 +1763,51 @@ static void TestJ815() {
|
||||
}
|
||||
|
||||
|
||||
static void TestIncrementalNormalize() {
|
||||
|
||||
UChar baseA =0x41;
|
||||
UChar baseB = 0x42;
|
||||
UChar ccMix[] = {0x316, 0x321, 0x300};
|
||||
// 0x316 is combining grave accent below, cc=220
|
||||
// 0x321 is combining palatalized hook below, cc=202
|
||||
// 0x300 is combining grave accent, cc=230
|
||||
|
||||
int maxSLen = 2000;
|
||||
int sLen;
|
||||
int i;
|
||||
|
||||
UChar *strA;
|
||||
UChar *strB;
|
||||
UCollator *coll;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
strA = uprv_malloc((maxSLen+1) * sizeof(UChar));
|
||||
strB = uprv_malloc((maxSLen+1) * sizeof(UChar));
|
||||
|
||||
coll = ucol_open("en_US", &status);
|
||||
ucol_setNormalization(coll, UNORM_NFD);
|
||||
|
||||
// for (sLen = 4; sLen<maxSLen; sLen++) {
|
||||
for (sLen = 1000; sLen<1001; sLen++) {
|
||||
strA[0] = baseA;
|
||||
strB[0] = baseA;
|
||||
for (i=1; i<=sLen-1; i++) {
|
||||
strA[i] = ccMix[i % 3];
|
||||
strB[sLen-i] = ccMix[i % 3];
|
||||
}
|
||||
strA[sLen] = 0;
|
||||
strB[sLen] = 0;
|
||||
|
||||
doTest(coll, strA, strB, UCOL_EQUAL);
|
||||
}
|
||||
|
||||
ucol_close(coll);
|
||||
uprv_free(strA);
|
||||
uprv_free(strB);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void addMiscCollTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestCase, "tscoll/cmsccoll/TestCase");
|
||||
@ -1785,6 +1830,7 @@ void addMiscCollTest(TestNode** root)
|
||||
/*addTest(root, &TestUnmappedSpaces, "tscoll/cmsccoll/TestUnmappedSpaces");*/
|
||||
/*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/
|
||||
/*addTest(root, &TestVariableTop, "tscoll/cmsccoll/TestVariableTop");*/
|
||||
addTest(root, &TestIncrementalNormalize, "tscoll/cmsccoll/TestIncrementalNormalize");
|
||||
addTest(root, &TestComposeDecompose, "tscoll/cmsccoll/TestComposeDecompose");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user