1382 lines
46 KiB
C
1382 lines
46 KiB
C
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/********************************************************************
|
|
* COPYRIGHT:
|
|
* Copyright (c) 1997-2014, International Business Machines Corporation and
|
|
* others. All Rights Reserved.
|
|
********************************************************************/
|
|
/*******************************************************************************
|
|
*
|
|
* File CALLCOLL.C
|
|
*
|
|
* Modification History:
|
|
* Name Description
|
|
* Madhu Katragadda Ported for C API
|
|
********************************************************************************
|
|
*/
|
|
|
|
/*
|
|
* Important: This file is included into intltest/allcoll.cpp so that the
|
|
* test data is shared. This makes it easier to maintain the test data,
|
|
* especially since the Unicode data must be portable and quoted character
|
|
* literals will not work.
|
|
* If it is included, then there will be a #define INCLUDE_CALLCOLL_C
|
|
* that must prevent the actual code in here from being part of the
|
|
* allcoll.cpp compilation.
|
|
*/
|
|
|
|
/**
|
|
* CollationDummyTest is a third level test class. This tests creation of
|
|
* a customized collator object. For example, number 1 to be sorted
|
|
* equlivalent to word 'one'.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
|
|
#include "unicode/ucol.h"
|
|
#include "unicode/uloc.h"
|
|
#include "unicode/ures.h"
|
|
#include "unicode/udata.h"
|
|
#include "unicode/ucoleitr.h"
|
|
#include "unicode/ustring.h"
|
|
#include "unicode/uclean.h"
|
|
#include "unicode/putil.h"
|
|
#include "unicode/uenum.h"
|
|
|
|
#include "cintltst.h"
|
|
#include "ccolltst.h"
|
|
#include "callcoll.h"
|
|
#include "calldata.h"
|
|
#include "cstring.h"
|
|
#include "cmemory.h"
|
|
|
|
/* set to 1 to test offsets in backAndForth() */
|
|
#define TEST_OFFSETS 0
|
|
|
|
/* perform test with strength PRIMARY */
|
|
static void TestPrimary(void);
|
|
|
|
/* perform test with strength SECONDARY */
|
|
static void TestSecondary(void);
|
|
|
|
/* perform test with strength tertiary */
|
|
static void TestTertiary(void);
|
|
|
|
/*perform tests with strength Identical */
|
|
static void TestIdentical(void);
|
|
|
|
/* perform extra tests */
|
|
static void TestExtra(void);
|
|
|
|
/* Test jitterbug 581 */
|
|
static void TestJB581(void);
|
|
|
|
/* Test jitterbug 1401 */
|
|
static void TestJB1401(void);
|
|
|
|
/* Test [variable top] in the rule syntax */
|
|
static void TestVariableTop(void);
|
|
|
|
/* Test surrogates */
|
|
static void TestSurrogates(void);
|
|
|
|
static void TestInvalidRules(void);
|
|
|
|
static void TestJitterbug1098(void);
|
|
|
|
static void TestFCDCrash(void);
|
|
|
|
static void TestJ5298(void);
|
|
|
|
static void TestBadKey(void);
|
|
|
|
const UCollationResult results[] = {
|
|
UCOL_LESS,
|
|
UCOL_LESS, /*UCOL_GREATER,*/
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_GREATER,
|
|
UCOL_GREATER,
|
|
UCOL_LESS, /* 10 */
|
|
UCOL_GREATER,
|
|
UCOL_LESS,
|
|
UCOL_GREATER,
|
|
UCOL_GREATER,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
/* test primary > 17 */
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL, /* 20 */
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_LESS,
|
|
/* test secondary > 26 */
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL, /* 30 */
|
|
UCOL_EQUAL,
|
|
UCOL_LESS,
|
|
UCOL_EQUAL, /* 34 */
|
|
UCOL_EQUAL,
|
|
UCOL_EQUAL,
|
|
UCOL_LESS /* 37 */
|
|
};
|
|
|
|
|
|
static
|
|
void uprv_appendByteToHexString(char *dst, uint8_t val) {
|
|
uint32_t len = (uint32_t)uprv_strlen(dst);
|
|
*(dst+len) = T_CString_itosOffset((val >> 4));
|
|
*(dst+len+1) = T_CString_itosOffset((val & 0xF));
|
|
*(dst+len+2) = 0;
|
|
}
|
|
|
|
/* this function makes a string with representation of a sortkey */
|
|
static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
|
|
int32_t strength = UCOL_PRIMARY;
|
|
uint32_t res_size = 0;
|
|
UBool doneCase = FALSE;
|
|
UErrorCode errorCode = U_ZERO_ERROR;
|
|
|
|
char *current = buffer;
|
|
const uint8_t *currentSk = sortkey;
|
|
|
|
uprv_strcpy(current, "[");
|
|
|
|
while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
|
|
if(strength > UCOL_PRIMARY) {
|
|
uprv_strcat(current, " . ");
|
|
}
|
|
while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
|
|
uprv_appendByteToHexString(current, *currentSk++);
|
|
uprv_strcat(current, " ");
|
|
}
|
|
if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
|
|
doneCase = TRUE;
|
|
} else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
|
|
strength ++;
|
|
}
|
|
if (*currentSk) {
|
|
uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
|
|
}
|
|
if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
|
|
uprv_strcat(current, " . ");
|
|
while(*currentSk != 0) {
|
|
uprv_appendByteToHexString(current, *currentSk++);
|
|
uprv_strcat(current, " ");
|
|
}
|
|
|
|
uprv_appendByteToHexString(current, *currentSk++);
|
|
}
|
|
uprv_strcat(current, "]");
|
|
|
|
if(res_size > *len) {
|
|
return NULL;
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
void addAllCollTest(TestNode** root)
|
|
{
|
|
addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
|
|
addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
|
|
addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
|
|
addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
|
|
addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
|
|
addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
|
|
addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
|
|
addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
|
|
addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
|
|
addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
|
|
addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
|
|
addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
|
|
addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
|
|
addTest(root, &TestBadKey, "tscoll/callcoll/TestBadKey");
|
|
}
|
|
|
|
UBool hasCollationElements(const char *locName) {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);;
|
|
|
|
if(U_SUCCESS(status)) {
|
|
status = U_ZERO_ERROR;
|
|
loc = ures_getByKey(loc, "collations", loc, &status);
|
|
ures_close(loc);
|
|
if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
|
|
int32_t partialSKResult = 0;
|
|
UCharIterator sIter, tIter;
|
|
uint32_t sState[2], tState[2];
|
|
int32_t sSize = pieceSize, tSize = pieceSize;
|
|
/*int32_t i = 0;*/
|
|
uint8_t sBuf[16384], tBuf[16384];
|
|
if(pieceSize > 16384) {
|
|
log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
return UCOL_EQUAL;
|
|
}
|
|
*status = U_ZERO_ERROR;
|
|
sState[0] = 0; sState[1] = 0;
|
|
tState[0] = 0; tState[1] = 0;
|
|
while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
|
|
uiter_setString(&sIter, source, sLen);
|
|
uiter_setString(&tIter, target, tLen);
|
|
sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
|
|
tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
|
|
|
|
if(sState[0] != 0 || tState[0] != 0) {
|
|
/*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
|
|
}
|
|
/*log_verbose("%i ", i++);*/
|
|
|
|
partialSKResult = memcmp(sBuf, tBuf, pieceSize);
|
|
}
|
|
|
|
if(partialSKResult < 0) {
|
|
return UCOL_LESS;
|
|
} else if(partialSKResult > 0) {
|
|
return UCOL_GREATER;
|
|
} else {
|
|
return UCOL_EQUAL;
|
|
}
|
|
}
|
|
|
|
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
|
|
{
|
|
int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
|
|
int temp=0, gSortklen1=0,gSortklen2=0;
|
|
UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
|
|
uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
|
|
uint32_t sLen = u_strlen(source);
|
|
uint32_t tLen = u_strlen(target);
|
|
char buffer[256];
|
|
uint32_t len;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
|
|
|
|
UCharIterator sIter, tIter;
|
|
|
|
compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
|
|
if (compareResult != result) {
|
|
log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
|
|
compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
|
|
if (compareResulta != result) {
|
|
log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
|
|
compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
|
|
uiter_setString(&sIter, source, sLen);
|
|
uiter_setString(&tIter, target, tLen);
|
|
compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
|
|
if(compareResultIter != result) {
|
|
log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
|
|
/* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
|
|
{
|
|
char utf8Source[256], utf8Target[256];
|
|
int32_t utf8SourceLen = 0, utf8TargetLen = 0;
|
|
|
|
u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
|
|
if(U_FAILURE(status)) { /* probably buffer is not big enough */
|
|
log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
|
|
} else {
|
|
u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
|
|
if(U_SUCCESS(status)) {
|
|
{
|
|
/* ucol_strcollUTF8 */
|
|
compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err("Error in ucol_strcollUTF8 with explicit length\n");
|
|
status = U_ZERO_ERROR;
|
|
} else if (compareResulta != result) {
|
|
log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
|
|
compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
|
|
status = U_ZERO_ERROR;
|
|
} else if (compareResulta != result) {
|
|
log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
|
|
compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
}
|
|
|
|
{
|
|
/* char iterator over UTF8 */
|
|
UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;
|
|
|
|
uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
|
|
uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
|
|
compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
|
|
|
|
ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
sIter.move(&sIter, 0, UITER_START);
|
|
tIter.move(&tIter, 0, UITER_START);
|
|
compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
|
|
|
|
ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
|
|
if(compareResultUTF8Iter != compareResultIter) {
|
|
log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
|
|
log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
|
|
}
|
|
}
|
|
} else {
|
|
log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
|
|
}
|
|
if(U_FAILURE(status)) {
|
|
log_verbose("UTF-8 strcoll failed! Ignoring result\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* testing the partial sortkeys */
|
|
if(1) { /*!QUICK*/
|
|
int32_t i = 0;
|
|
int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
|
|
int32_t partialSizesSize = 1;
|
|
if(getTestOption(QUICK_OPTION) <= 0) {
|
|
partialSizesSize = 7;
|
|
}
|
|
/*log_verbose("partial sortkey test piecesize=");*/
|
|
for(i = 0; i < partialSizesSize; i++) {
|
|
UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
|
|
/*log_verbose("%i ", partialSizes[i]);*/
|
|
|
|
partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
|
|
if(partialSKResult != result) {
|
|
log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
|
|
partialSKResult, result,
|
|
aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
|
|
}
|
|
|
|
if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
|
|
/*log_verbose("N ");*/
|
|
ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
|
|
ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
|
|
if(partialSKResult != partialNormalizedSKResult) {
|
|
log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
|
|
aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
|
|
}
|
|
}
|
|
}
|
|
/*log_verbose("\n");*/
|
|
}
|
|
|
|
sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
|
|
sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
|
|
|
|
sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
|
|
sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
|
|
(void)sortklenmin; /* Suppress set but not used warning. */
|
|
|
|
sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
|
sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
|
ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
|
|
ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
|
|
|
|
sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
|
sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
|
|
ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
|
|
ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
|
|
|
|
/* Check that sort key generated with null terminated string is identical */
|
|
/* to that generated with a length specified. */
|
|
if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
|
|
uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
|
|
log_err("Sort Keys from null terminated and explicit length strings differ.\n");
|
|
}
|
|
|
|
/*memcmp(sortKey1, sortKey2,sortklenmax);*/
|
|
temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
|
|
gSortklen1 = (int)uprv_strlen((const char *)sortKey1)+1;
|
|
gSortklen2 = (int)uprv_strlen((const char *)sortKey2)+1;
|
|
if(sortklen1 != gSortklen1){
|
|
log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
|
|
log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
|
|
}
|
|
if(sortklen2!= gSortklen2){
|
|
log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
|
|
log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
|
|
}
|
|
|
|
if(temp < 0) {
|
|
keyResult=UCOL_LESS;
|
|
}
|
|
else if(temp > 0) {
|
|
keyResult= UCOL_GREATER;
|
|
}
|
|
else {
|
|
keyResult = UCOL_EQUAL;
|
|
}
|
|
reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
|
|
free(sortKey1);
|
|
free(sortKey2);
|
|
free(sortKey1a);
|
|
free(sortKey2a);
|
|
|
|
}
|
|
|
|
void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
|
|
{
|
|
if(myCollation) {
|
|
doTestVariant(myCollation, source, target, result);
|
|
if(result == UCOL_LESS) {
|
|
doTestVariant(myCollation, target, source, UCOL_GREATER);
|
|
} else if(result == UCOL_GREATER) {
|
|
doTestVariant(myCollation, target, source, UCOL_LESS);
|
|
} else {
|
|
doTestVariant(myCollation, target, source, UCOL_EQUAL);
|
|
}
|
|
} else {
|
|
log_data_err("No collator! Any data around?\n");
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Return an integer array containing all of the collation orders
|
|
* returned by calls to next on the specified iterator
|
|
*/
|
|
OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
|
|
{
|
|
UErrorCode status;
|
|
int32_t order;
|
|
int32_t maxSize = 100;
|
|
int32_t size = 0;
|
|
int32_t offset = ucol_getOffset(iter);
|
|
OrderAndOffset *temp;
|
|
OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
|
|
status= U_ZERO_ERROR;
|
|
|
|
|
|
while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
|
|
{
|
|
if (size == maxSize)
|
|
{
|
|
maxSize *= 2;
|
|
temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
|
|
|
|
memcpy(temp, orders, size * sizeof(OrderAndOffset));
|
|
free(orders);
|
|
orders = temp;
|
|
|
|
}
|
|
|
|
orders[size].order = order;
|
|
orders[size].offset = offset;
|
|
|
|
offset = ucol_getOffset(iter);
|
|
size += 1;
|
|
}
|
|
|
|
if (maxSize > size && size > 0)
|
|
{
|
|
temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
|
|
|
|
memcpy(temp, orders, size * sizeof(OrderAndOffset));
|
|
free(orders);
|
|
orders = temp;
|
|
|
|
|
|
}
|
|
|
|
*orderLength = size;
|
|
return orders;
|
|
}
|
|
|
|
|
|
void
|
|
backAndForth(UCollationElements *iter)
|
|
{
|
|
/* Run through the iterator forwards and stick it into an array */
|
|
int32_t idx, o;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
int32_t orderLength = 0;
|
|
OrderAndOffset *orders = getOrders(iter, &orderLength);
|
|
|
|
|
|
/* Now go through it backwards and make sure we get the same values */
|
|
idx = orderLength;
|
|
ucol_reset(iter);
|
|
|
|
/* synwee : changed */
|
|
while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
|
|
#if TEST_OFFSETS
|
|
int32_t offset =
|
|
#endif
|
|
ucol_getOffset(iter);
|
|
|
|
idx -= 1;
|
|
if (o != orders[idx].order) {
|
|
if (o == 0)
|
|
idx ++;
|
|
else {
|
|
while (idx > 0 && orders[-- idx].order == 0) {
|
|
/* nothing... */
|
|
}
|
|
|
|
if (o != orders[idx].order) {
|
|
log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx,
|
|
orders[idx].order, o);
|
|
goto bail;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if TEST_OFFSETS
|
|
if (offset != orders[idx].offset) {
|
|
log_err("Mismatched offset at index %d: %d vs. %d\n", idx,
|
|
orders[idx].offset, offset);
|
|
goto bail;
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
while (idx != 0 && orders[idx - 1].order == 0) {
|
|
idx -= 1;
|
|
}
|
|
|
|
if (idx != 0) {
|
|
log_err("Didn't get back to beginning - index is %d\n", idx);
|
|
|
|
ucol_reset(iter);
|
|
log_err("\nnext: ");
|
|
|
|
if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
|
|
log_err("Error at %x\n", o);
|
|
}
|
|
|
|
log_err("\nprev: ");
|
|
|
|
if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
|
|
log_err("Error at %x\n", o);
|
|
}
|
|
|
|
log_verbose("\n");
|
|
}
|
|
|
|
bail:
|
|
free(orders);
|
|
}
|
|
|
|
void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) {
|
|
UChar t1[2048] = {0};
|
|
UChar t2[2048] = {0};
|
|
UCollationElements *iter;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
uint32_t i = 0, j = 0;
|
|
log_verbose("testing sequence:\n");
|
|
for(i = 0; i < size; i++) {
|
|
log_verbose("%s\n", s[i]);
|
|
}
|
|
|
|
iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err("Creation of iterator failed\n");
|
|
}
|
|
for(i = 0; i < size-1; i++) {
|
|
for(j = i+1; j < size; j++) {
|
|
u_unescape(s[i], t1, 2048);
|
|
u_unescape(s[j], t2, 2048);
|
|
doTest(coll, t1, t2, result);
|
|
/* synwee : added collation element iterator test */
|
|
ucol_setText(iter, t1, u_strlen(t1), &status);
|
|
backAndForth(iter);
|
|
ucol_setText(iter, t2, u_strlen(t2), &status);
|
|
backAndForth(iter);
|
|
}
|
|
}
|
|
ucol_closeElements(iter);
|
|
}
|
|
|
|
void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) {
|
|
genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
|
|
}
|
|
|
|
void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UCollator *coll = ucol_open(locale, &status);
|
|
|
|
log_verbose("Locale starter for %s\n", locale);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
genericOrderingTest(coll, s, size);
|
|
} else if(status == U_FILE_ACCESS_ERROR) {
|
|
log_data_err("Is your data around?\n");
|
|
return;
|
|
} else {
|
|
log_err("Unable to open collator for locale %s\n", locale);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
|
|
void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UCollator *coll = ucol_open(locale, &status);
|
|
|
|
log_verbose("Locale starter for %s\n", locale);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
genericOrderingTestWithResult(coll, s, size, result);
|
|
} else if(status == U_FILE_ACCESS_ERROR) {
|
|
log_data_err("Is your data around?\n");
|
|
return;
|
|
} else {
|
|
log_err("Unable to open collator for locale %s\n", locale);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
|
|
/* currently not used with options */
|
|
void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar rlz[RULE_BUFFER_LEN] = { 0 };
|
|
uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
|
|
uint32_t i;
|
|
|
|
UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
|
|
|
|
log_verbose("Rules starter for %s\n", rules);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
log_verbose("Setting attributes\n");
|
|
for(i = 0; i < attsize; i++) {
|
|
ucol_setAttribute(coll, attrs[i], values[i], &status);
|
|
}
|
|
|
|
genericOrderingTestWithResult(coll, s, size, result);
|
|
} else {
|
|
log_err_status(status, "Unable to open collator with rules %s\n", rules);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
|
|
void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
uint32_t i;
|
|
|
|
UCollator *coll = ucol_open(locale, &status);
|
|
|
|
log_verbose("Locale starter for %s\n", locale);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
log_verbose("Setting attributes\n");
|
|
for(i = 0; i < attsize; i++) {
|
|
ucol_setAttribute(coll, attrs[i], values[i], &status);
|
|
}
|
|
|
|
genericOrderingTestWithResult(coll, s, size, result);
|
|
} else {
|
|
log_err_status(status, "Unable to open collator for locale %s\n", locale);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
|
|
void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
|
|
genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS);
|
|
}
|
|
|
|
void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar rlz[RULE_BUFFER_LEN] = { 0 };
|
|
uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
|
|
|
|
UCollator *coll = NULL;
|
|
coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
|
|
log_verbose("Rules starter for %s\n", rules);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
genericOrderingTestWithResult(coll, s, size, result);
|
|
ucol_close(coll);
|
|
} else if(status == U_FILE_ACCESS_ERROR) {
|
|
log_data_err("Is your data around?\n");
|
|
} else {
|
|
log_err("Unable to open collator with rules %s\n", rules);
|
|
}
|
|
}
|
|
|
|
void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) {
|
|
genericRulesStarterWithResult(rules, s, size, UCOL_LESS);
|
|
}
|
|
|
|
static void TestTertiary()
|
|
{
|
|
int32_t len,i;
|
|
UCollator *myCollation;
|
|
UErrorCode status=U_ZERO_ERROR;
|
|
static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
|
|
UChar rules[sizeof(str)];
|
|
len = (int32_t)strlen(str);
|
|
u_uastrcpy(rules, str);
|
|
|
|
myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
|
|
return;
|
|
}
|
|
|
|
ucol_setStrength(myCollation, UCOL_TERTIARY);
|
|
for (i = 0; i < 17 ; i++)
|
|
{
|
|
doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
|
|
}
|
|
ucol_close(myCollation);
|
|
myCollation = 0;
|
|
}
|
|
|
|
static void TestPrimary( )
|
|
{
|
|
int32_t len,i;
|
|
UCollator *myCollation;
|
|
UErrorCode status=U_ZERO_ERROR;
|
|
static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
|
|
UChar rules[sizeof(str)];
|
|
len = (int32_t)strlen(str);
|
|
u_uastrcpy(rules, str);
|
|
|
|
myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
|
|
return;
|
|
}
|
|
ucol_setStrength(myCollation, UCOL_PRIMARY);
|
|
|
|
for (i = 17; i < 26 ; i++)
|
|
{
|
|
|
|
doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
|
|
}
|
|
ucol_close(myCollation);
|
|
myCollation = 0;
|
|
}
|
|
|
|
static void TestSecondary()
|
|
{
|
|
int32_t i;
|
|
int32_t len;
|
|
UCollator *myCollation;
|
|
UErrorCode status=U_ZERO_ERROR;
|
|
static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
|
|
UChar rules[sizeof(str)];
|
|
len = (int32_t)strlen(str);
|
|
u_uastrcpy(rules, str);
|
|
|
|
myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
|
|
return;
|
|
}
|
|
ucol_setStrength(myCollation, UCOL_SECONDARY);
|
|
for (i = 26; i < 34 ; i++)
|
|
{
|
|
doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
|
|
}
|
|
ucol_close(myCollation);
|
|
myCollation = 0;
|
|
}
|
|
|
|
static void TestIdentical()
|
|
{
|
|
int32_t i;
|
|
int32_t len;
|
|
UCollator *myCollation;
|
|
UErrorCode status=U_ZERO_ERROR;
|
|
static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
|
|
UChar rules[sizeof(str)];
|
|
len = (int32_t)strlen(str);
|
|
u_uastrcpy(rules, str);
|
|
|
|
myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
|
|
return;
|
|
}
|
|
for(i= 34; i<37; i++)
|
|
{
|
|
doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
|
|
}
|
|
ucol_close(myCollation);
|
|
myCollation = 0;
|
|
}
|
|
|
|
static void TestExtra()
|
|
{
|
|
int32_t i, j;
|
|
int32_t len;
|
|
UCollator *myCollation;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
|
|
UChar rules[sizeof(str)];
|
|
len = (int32_t)strlen(str);
|
|
u_uastrcpy(rules, str);
|
|
|
|
myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
|
|
return;
|
|
}
|
|
ucol_setStrength(myCollation, UCOL_TERTIARY);
|
|
for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
|
|
{
|
|
for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
|
|
{
|
|
|
|
doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
|
|
}
|
|
}
|
|
ucol_close(myCollation);
|
|
myCollation = 0;
|
|
}
|
|
|
|
static void TestJB581(void)
|
|
{
|
|
int32_t bufferLen = 0;
|
|
UChar source [100];
|
|
UChar target [100];
|
|
UCollationResult result = UCOL_EQUAL;
|
|
uint8_t sourceKeyArray [100];
|
|
uint8_t targetKeyArray [100];
|
|
int32_t sourceKeyOut = 0,
|
|
targetKeyOut = 0;
|
|
UCollator *myCollator = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
/*u_uastrcpy(source, "This is a test.");*/
|
|
/*u_uastrcpy(target, "THISISATEST.");*/
|
|
u_uastrcpy(source, "THISISATEST.");
|
|
u_uastrcpy(target, "Thisisatest.");
|
|
|
|
myCollator = ucol_open("en_US", &status);
|
|
if (U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
|
|
return;
|
|
}
|
|
result = ucol_strcoll(myCollator, source, -1, target, -1);
|
|
/* result is 1, secondary differences only for ignorable space characters*/
|
|
if (result != 1)
|
|
{
|
|
log_err("Comparing two strings with only secondary differences in C failed.\n");
|
|
}
|
|
/* To compare them with just primary differences */
|
|
ucol_setStrength(myCollator, UCOL_PRIMARY);
|
|
result = ucol_strcoll(myCollator, source, -1, target, -1);
|
|
/* result is 0 */
|
|
if (result != 0)
|
|
{
|
|
log_err("Comparing two strings with no differences in C failed.\n");
|
|
}
|
|
/* Now, do the same comparison with keys */
|
|
sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
|
|
(void)sourceKeyOut; /* Suppress set but not used warning. */
|
|
targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
|
|
bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
|
|
if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
|
|
{
|
|
log_err("Comparing two strings with sort keys in C failed.\n");
|
|
}
|
|
ucol_close(myCollator);
|
|
}
|
|
|
|
static void TestJB1401(void)
|
|
{
|
|
UCollator *myCollator = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
static UChar NFD_UnsafeStartChars[] = {
|
|
0x0f73, /* Tibetan Vowel Sign II */
|
|
0x0f75, /* Tibetan Vowel Sign UU */
|
|
0x0f81, /* Tibetan Vowel Sign Reversed II */
|
|
0
|
|
};
|
|
int i;
|
|
|
|
|
|
myCollator = ucol_open("en_US", &status);
|
|
if (U_FAILURE(status)){
|
|
log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
|
|
return;
|
|
}
|
|
ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
if (U_FAILURE(status)){
|
|
log_err("ERROR: Failed to set normalization mode ON for collator.\n");
|
|
return;
|
|
}
|
|
|
|
for (i=0; ; i++) {
|
|
UChar c;
|
|
UChar X[4];
|
|
UChar Y[20];
|
|
UChar Z[20];
|
|
|
|
/* Get the next funny character to be tested, and set up the
|
|
* three test strings X, Y, Z, consisting of an A-grave + test char,
|
|
* in original form, NFD, and then NFC form.
|
|
*/
|
|
c = NFD_UnsafeStartChars[i];
|
|
if (c==0) {break;}
|
|
|
|
X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
|
|
|
|
unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
|
|
unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
|
|
if (U_FAILURE(status)){
|
|
log_err("ERROR: Failed to normalize test of character %x\n", c);
|
|
return;
|
|
}
|
|
|
|
/* Collation test. All three strings should be equal.
|
|
* doTest does both strcoll and sort keys, with params in both orders.
|
|
*/
|
|
doTest(myCollator, X, Y, UCOL_EQUAL);
|
|
doTest(myCollator, X, Z, UCOL_EQUAL);
|
|
doTest(myCollator, Y, Z, UCOL_EQUAL);
|
|
|
|
/* Run collation element iterators over the three strings. Results should be same for each.
|
|
*/
|
|
{
|
|
UCollationElements *ceiX, *ceiY, *ceiZ;
|
|
int32_t ceX, ceY, ceZ;
|
|
int j;
|
|
|
|
ceiX = ucol_openElements(myCollator, X, -1, &status);
|
|
ceiY = ucol_openElements(myCollator, Y, -1, &status);
|
|
ceiZ = ucol_openElements(myCollator, Z, -1, &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err("ERROR: uucol_openElements failed.\n");
|
|
return;
|
|
}
|
|
|
|
for (j=0;; j++) {
|
|
ceX = ucol_next(ceiX, &status);
|
|
ceY = ucol_next(ceiY, &status);
|
|
ceZ = ucol_next(ceiZ, &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
|
|
break;
|
|
}
|
|
if (ceX != ceY || ceY != ceZ) {
|
|
log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
|
|
break;
|
|
}
|
|
if (ceX == UCOL_NULLORDER) {
|
|
break;
|
|
}
|
|
}
|
|
ucol_closeElements(ceiX);
|
|
ucol_closeElements(ceiY);
|
|
ucol_closeElements(ceiZ);
|
|
}
|
|
}
|
|
ucol_close(myCollator);
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
|
|
* tag has the value shifted, any codepoints before [variable top] should give
|
|
* a primary ce of 0.
|
|
*/
|
|
static void TestVariableTop(void)
|
|
{
|
|
#if 0
|
|
/*
|
|
* Starting with ICU 53, setting the variable top via a pseudo relation string
|
|
* is not supported any more.
|
|
* It was replaced by the [maxVariable symbol] setting.
|
|
* See ICU tickets #9958 and #8032.
|
|
*/
|
|
static const char str[] = "&z = [variable top]";
|
|
int len = strlen(str);
|
|
UChar rules[sizeof(str)];
|
|
UCollator *myCollation;
|
|
UCollator *enCollation;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar source[1];
|
|
UChar ch;
|
|
uint8_t result[20];
|
|
uint8_t expected[20];
|
|
|
|
u_uastrcpy(rules, str);
|
|
|
|
enCollation = ucol_open("en_US", &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err_status(status, "ERROR: in creation of collator :%s\n",
|
|
myErrorName(status));
|
|
return;
|
|
}
|
|
myCollation = ucol_openRules(rules, len, UCOL_OFF,
|
|
UCOL_PRIMARY,NULL, &status);
|
|
if (U_FAILURE(status)) {
|
|
ucol_close(enCollation);
|
|
log_err("ERROR: in creation of rule based collator :%s\n",
|
|
myErrorName(status));
|
|
return;
|
|
}
|
|
|
|
ucol_setStrength(enCollation, UCOL_PRIMARY);
|
|
ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
|
|
&status);
|
|
ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
|
|
&status);
|
|
|
|
if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
|
|
UCOL_SHIFTED || U_FAILURE(status)) {
|
|
log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
|
|
}
|
|
|
|
uprv_memset(expected, 0, 20);
|
|
|
|
/* space is supposed to be a variable */
|
|
source[0] = ' ';
|
|
len = ucol_getSortKey(enCollation, source, 1, result,
|
|
sizeof(result));
|
|
|
|
if (uprv_memcmp(expected, result, len) != 0) {
|
|
log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
|
|
}
|
|
|
|
ch = 'a';
|
|
while (ch < 'z') {
|
|
source[0] = ch;
|
|
len = ucol_getSortKey(myCollation, source, 1, result,
|
|
sizeof(result));
|
|
if (uprv_memcmp(expected, result, len) != 0) {
|
|
log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
|
|
ch);
|
|
}
|
|
ch ++;
|
|
}
|
|
|
|
ucol_close(enCollation);
|
|
ucol_close(myCollation);
|
|
enCollation = NULL;
|
|
myCollation = NULL;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Tests surrogate support.
|
|
* NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
|
|
* Therefore, another (unassigned) code point was used for this test.
|
|
*/
|
|
static void TestSurrogates(void)
|
|
{
|
|
static const char str[] =
|
|
"&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
|
|
int len = (int)strlen(str);
|
|
int rlen = 0;
|
|
UChar rules[sizeof(str)];
|
|
UCollator *myCollation;
|
|
UCollator *enCollation;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar source[][4] =
|
|
{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
|
|
UChar target[][4] =
|
|
{{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
|
|
int count = 0;
|
|
uint8_t enresult[20], myresult[20];
|
|
int enlen, mylen;
|
|
|
|
/* tests for open rules with surrogate rules */
|
|
rlen = u_unescape(str, rules, len);
|
|
|
|
enCollation = ucol_open("en_US", &status);
|
|
if (U_FAILURE(status)) {
|
|
log_err_status(status, "ERROR: in creation of collator :%s\n",
|
|
myErrorName(status));
|
|
return;
|
|
}
|
|
myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
|
|
UCOL_TERTIARY,NULL, &status);
|
|
if (U_FAILURE(status)) {
|
|
ucol_close(enCollation);
|
|
log_err("ERROR: in creation of rule based collator :%s\n",
|
|
myErrorName(status));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
this test is to verify the supplementary sort key order in the english
|
|
collator
|
|
*/
|
|
log_verbose("start of english collation supplementary characters test\n");
|
|
while (count < 2) {
|
|
doTest(enCollation, source[count], target[count], UCOL_LESS);
|
|
count ++;
|
|
}
|
|
doTest(enCollation, source[count], target[count], UCOL_GREATER);
|
|
|
|
log_verbose("start of tailored collation supplementary characters test\n");
|
|
count = 0;
|
|
/* tests getting collation elements for surrogates for tailored rules */
|
|
while (count < 4) {
|
|
doTest(myCollation, source[count], target[count], UCOL_LESS);
|
|
count ++;
|
|
}
|
|
|
|
/* tests that \uD800\uDC02 still has the same value, not changed */
|
|
enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
|
|
mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
|
|
if (enlen != mylen ||
|
|
uprv_memcmp(enresult, myresult, enlen) != 0) {
|
|
log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
|
|
}
|
|
|
|
ucol_close(enCollation);
|
|
ucol_close(myCollation);
|
|
enCollation = NULL;
|
|
myCollation = NULL;
|
|
}
|
|
|
|
/*
|
|
*### TODO: Add more invalid rules to test all different scenarios.
|
|
*
|
|
*/
|
|
static void
|
|
TestInvalidRules(){
|
|
#define MAX_ERROR_STATES 2
|
|
|
|
static const char* rulesArr[MAX_ERROR_STATES] = {
|
|
"& C < ch, cH, Ch[this should fail]<d",
|
|
"& C < ch, cH, & Ch[variable top]"
|
|
};
|
|
static const char* preContextArr[MAX_ERROR_STATES] = {
|
|
" C < ch, cH, Ch",
|
|
"& C < ch, cH",
|
|
|
|
};
|
|
static const char* postContextArr[MAX_ERROR_STATES] = {
|
|
"[this should fa",
|
|
", & Ch[variable"
|
|
};
|
|
int i;
|
|
|
|
for(i = 0;i<MAX_ERROR_STATES;i++){
|
|
UChar rules[1000] = { '\0' };
|
|
UChar preContextExp[1000] = { '\0' };
|
|
UChar postContextExp[1000] = { '\0' };
|
|
UParseError parseError;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UCollator* coll=0;
|
|
u_charsToUChars(rulesArr[i], rules, (int32_t)uprv_strlen(rulesArr[i]) + 1);
|
|
u_charsToUChars(preContextArr[i], preContextExp, (int32_t)uprv_strlen(preContextArr[i]) + 1);
|
|
u_charsToUChars(postContextArr[i], postContextExp, (int32_t)uprv_strlen(postContextArr[i]) + 1);
|
|
/* clean up stuff in parseError */
|
|
u_memset(parseError.preContext, 0x0000, U_PARSE_CONTEXT_LEN);
|
|
u_memset(parseError.postContext, 0x0000, U_PARSE_CONTEXT_LEN);
|
|
/* open the rules and test */
|
|
coll = ucol_openRules(rules, u_strlen(rules), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
|
|
(void)coll; /* Suppress set but not used warning. */
|
|
if(u_strcmp(parseError.preContext,preContextExp)!=0){
|
|
log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
|
|
aescstrdup(parseError.preContext, -1));
|
|
}
|
|
if(u_strcmp(parseError.postContext,postContextExp)!=0){
|
|
log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
|
|
aescstrdup(parseError.postContext, -1));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
TestJitterbug1098(){
|
|
UChar rule[1000];
|
|
UCollator* c1 = NULL;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UParseError parseError;
|
|
char preContext[200]={0};
|
|
char postContext[200]={0};
|
|
int i=0;
|
|
const char* rules[] = {
|
|
"&''<\\\\",
|
|
"&\\'<\\\\",
|
|
"&\\\"<'\\'",
|
|
"&'\"'<\\'",
|
|
NULL
|
|
|
|
};
|
|
const UCollationResult results1098[] = {
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
UCOL_LESS,
|
|
};
|
|
const UChar input[][2]= {
|
|
{0x0027,0x005c},
|
|
{0x0027,0x005c},
|
|
{0x0022,0x005c},
|
|
{0x0022,0x0027},
|
|
};
|
|
UChar X[2] ={0};
|
|
UChar Y[2] ={0};
|
|
u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
|
|
u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
|
|
for(;rules[i]!=0;i++){
|
|
u_uastrcpy(rule, rules[i]);
|
|
c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
|
|
if(U_FAILURE(status)){
|
|
log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status));
|
|
|
|
if (status == U_PARSE_ERROR) {
|
|
u_UCharsToChars(parseError.preContext,preContext,20);
|
|
u_UCharsToChars(parseError.postContext,postContext,20);
|
|
log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
|
|
}
|
|
|
|
return;
|
|
}
|
|
X[0] = input[i][0];
|
|
Y[0] = input[i][1];
|
|
doTest(c1,X,Y,results1098[i]);
|
|
ucol_close(c1);
|
|
}
|
|
}
|
|
|
|
static void
|
|
TestFCDCrash(void) {
|
|
static const char *test[] = {
|
|
"Gr\\u00F6\\u00DFe",
|
|
"Grossist"
|
|
};
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UCollator *coll = ucol_open("es", &status);
|
|
if(U_FAILURE(status)) {
|
|
log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
|
|
return;
|
|
}
|
|
ucol_close(coll);
|
|
coll = NULL;
|
|
ctest_resetICU();
|
|
coll = ucol_open("de_DE", &status);
|
|
if(U_FAILURE(status)) {
|
|
log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
|
|
return;
|
|
}
|
|
ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
genericOrderingTest(coll, test, 2);
|
|
ucol_close(coll);
|
|
}
|
|
|
|
/*static UBool
|
|
find(UEnumeration* list, const char* str, UErrorCode* status){
|
|
const char* value = NULL;
|
|
int32_t length=0;
|
|
if(U_FAILURE(*status)){
|
|
return FALSE;
|
|
}
|
|
uenum_reset(list, status);
|
|
while( (value= uenum_next(list, &length, status))!=NULL){
|
|
if(strcmp(value, str)==0){
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}*/
|
|
|
|
static void TestJ5298(void)
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
char input[256], output[256];
|
|
UBool isAvailable;
|
|
int32_t i = 0;
|
|
UEnumeration* values = NULL;
|
|
const char *keywordValue = NULL;
|
|
log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
|
|
values = ucol_getKeywordValues("collation", &status);
|
|
while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
|
|
if (strncmp(keywordValue, "private-", 8) == 0) {
|
|
log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue);
|
|
}
|
|
}
|
|
for (i = 0; i < ucol_countAvailable(); i++) {
|
|
uenum_reset(values, &status);
|
|
while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
|
|
strcpy(input, ucol_getAvailable(i));
|
|
if (strcmp(keywordValue, "standard") != 0) {
|
|
strcat(input, "@collation=");
|
|
strcat(input, keywordValue);
|
|
}
|
|
|
|
ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status);
|
|
if (strcmp(input, output) == 0) { /* Unique locale, print it out */
|
|
log_verbose("%s, \n", output);
|
|
}
|
|
}
|
|
}
|
|
uenum_close(values);
|
|
log_verbose("\n");
|
|
}
|
|
|
|
static const char* badKeyLocales[] = {
|
|
"@calendar=japanese;collation=search", // ucol_open OK
|
|
"@calendar=japanese", // ucol_open OK
|
|
"en@calendar=x", // ucol_open OK
|
|
"ja@calendar=x", // ucol_open OK
|
|
"en@collation=x", // ucol_open OK
|
|
"ja@collation=x", // ucol_open OK
|
|
"ja@collation=private-kana", // ucol_open fails, verify it does not crash
|
|
"en@collation=\x80", // (x80 undef in ASCII,EBCDIC) ucol_open fails, verify it does not crash
|
|
NULL
|
|
};
|
|
|
|
// Mainly this is to check that we don't have a crash, but we check
|
|
// for correct NULL return and FAILURE/SUCCESS status as a bonus.
|
|
static void TestBadKey(void)
|
|
{
|
|
const char* badLoc;
|
|
const char** badLocsPtr = badKeyLocales;
|
|
while ((badLoc = *badLocsPtr++) != NULL) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UCollator* uc = ucol_open(badLoc, &status);
|
|
if ( U_SUCCESS(status) ) {
|
|
if (uc == NULL) {
|
|
log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc);
|
|
}
|
|
ucol_close(uc);
|
|
} else if (uc != NULL) {
|
|
log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc);
|
|
}
|
|
}
|
|
}
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|