ICU-4355 Allow for collation binary images to work without UCA
X-SVN-Rev: 18149
This commit is contained in:
parent
acdb160b26
commit
4c163f6aa3
@ -68,6 +68,14 @@ static UDataMemory* UCA_DATA_MEM = NULL;
|
||||
// It is cleaned in ucol_cleanup
|
||||
static const uint16_t *fcdTrieIndex=NULL;
|
||||
|
||||
// These are values from UCA required for
|
||||
// implicit generation and supressing sort key compression
|
||||
// they should regularly be in the UCA, but if one
|
||||
// is running without UCA, it could be a problem
|
||||
static int32_t maxRegularPrimary = 0xA0;
|
||||
static int32_t minImplicitPrimary = 0xE0;
|
||||
static int32_t maxImplicitPrimary = 0xE4;
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isAcceptableUCA(void * /*context*/,
|
||||
@ -344,16 +352,20 @@ ucol_openBinary(const uint8_t *bin, int32_t length,
|
||||
if(U_FAILURE(*status)){
|
||||
return NULL;
|
||||
}
|
||||
/*
|
||||
if(base == NULL) {
|
||||
// we don't support null base yet
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
*/
|
||||
// We need these and we could be running without UCA
|
||||
uprv_uca_initImplicitConstants(0, 0, status);
|
||||
UCATableHeader *colData = (UCATableHeader *)bin;
|
||||
// do we want version check here? We're trying to figure out whether collators are compatible
|
||||
if(uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
if(base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
colData->version[0] != UCOL_BUILDER_VERSION)
|
||||
colData->version[0] != UCOL_BUILDER_VERSION))
|
||||
{
|
||||
*status = U_COLLATOR_VERSION_MISMATCH;
|
||||
return NULL;
|
||||
@ -1030,7 +1042,8 @@ static void initImplicitConstants(int minPrimary, int maxPrimary,
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_uca_initImplicitConstants(int32_t minPrimary, int32_t maxPrimary, UErrorCode *status) {
|
||||
// 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
|
||||
initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
|
||||
//initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
|
||||
initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -1299,6 +1312,9 @@ inline UBool collIterFCD(collIterate *collationSource) {
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
|
||||
static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
|
||||
static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
|
||||
|
||||
/* there should be a macro version of this function in the header file */
|
||||
/* This is the first function that tries to fetch a collation element */
|
||||
/* If it's not succesfull or it encounters a more difficult situation */
|
||||
@ -1460,6 +1476,9 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
|
||||
}
|
||||
}
|
||||
}
|
||||
if(order == UCOL_NOT_FOUND) {
|
||||
order = getImplicit(ch, collationSource);
|
||||
}
|
||||
return order; /* return the CE */
|
||||
}
|
||||
|
||||
@ -1814,23 +1833,27 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
|
||||
if (result > UCOL_NOT_FOUND) {
|
||||
result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
|
||||
}
|
||||
if (result == UCOL_NOT_FOUND) {
|
||||
if (result == UCOL_NOT_FOUND) { // Not found in master list
|
||||
if (!isAtStartPrevIterate(data) &&
|
||||
ucol_contractionEndCP(ch, data->coll)) {
|
||||
result = UCOL_CONTRACTION;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if(coll->UCA) {
|
||||
result = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
|
||||
}
|
||||
}
|
||||
|
||||
if (result > UCOL_NOT_FOUND && coll->UCA) {
|
||||
if (result > UCOL_NOT_FOUND) {
|
||||
if(coll->UCA) {
|
||||
result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(result == UCOL_NOT_FOUND) {
|
||||
result = getPrevImplicit(ch, data);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -3980,7 +4003,7 @@ ucol_getSortKeyWithAllocation(const UCollator *coll,
|
||||
/* or if we run out of space while making a sortkey and want to return ASAP */
|
||||
int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
//const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
|
||||
uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
|
||||
uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
|
||||
@ -4081,7 +4104,8 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
|
||||
leadPrimary = 0;
|
||||
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
|
||||
//(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) {
|
||||
(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
//(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
|
||||
/* not compressible */
|
||||
leadPrimary = 0;
|
||||
currentSize+=2;
|
||||
@ -4321,7 +4345,7 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
UBool allocateSKBuffer,
|
||||
UErrorCode *status)
|
||||
{
|
||||
const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
//const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
|
||||
uint32_t i = 0; /* general purpose counter */
|
||||
|
||||
@ -4538,7 +4562,8 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
*primaries++ = primary1;
|
||||
leadPrimary = 0;
|
||||
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
|
||||
(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
//(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
|
||||
/* not compressible */
|
||||
leadPrimary = 0;
|
||||
*primaries++ = primary1;
|
||||
@ -4927,7 +4952,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
{
|
||||
U_ALIGN_CODE(16);
|
||||
|
||||
const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
//const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
|
||||
uint32_t i = 0; /* general purpose counter */
|
||||
|
||||
/* Stack allocated buffers for buffers we use */
|
||||
@ -5066,7 +5091,8 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
leadPrimary = 0;
|
||||
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
|
||||
//(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24)))
|
||||
(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
//(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
|
||||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
|
||||
/* not compressible */
|
||||
leadPrimary = 0;
|
||||
*primaries++ = primary1;
|
||||
@ -8252,10 +8278,16 @@ ucol_cloneBinary(const UCollator *coll,
|
||||
if(U_FAILURE(*status)) {
|
||||
return length;
|
||||
}
|
||||
if(capacity < 0) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return length;
|
||||
}
|
||||
if(coll->hasRealData == TRUE) {
|
||||
length = coll->image->size;
|
||||
if(length <= capacity) {
|
||||
uprv_memcpy(buffer, coll->image, length);
|
||||
} else {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
|
||||
@ -8291,6 +8323,8 @@ ucol_cloneBinary(const UCollator *coll,
|
||||
|
||||
/* copy the collator options */
|
||||
uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
|
||||
} else {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
|
@ -124,6 +124,7 @@ void addCollAPITest(TestNode** root)
|
||||
addTest(root, &TestMergeSortKeys, "tscoll/capitst/TestMergeSortKeys");
|
||||
addTest(root, &TestShortString, "tscoll/capitst/TestShortString");
|
||||
addTest(root, &TestGetContractionsAndUnsafes, "tscoll/capitst/TestGetContractionsAndUnsafes");
|
||||
addTest(root, &TestOpenBinary, "tscoll/capitst/TestOpenBinary");
|
||||
}
|
||||
|
||||
void TestGetSetAttr(void) {
|
||||
@ -770,10 +771,11 @@ void TestCloneBinary(){
|
||||
}
|
||||
|
||||
size = ucol_cloneBinary(col, NULL, 0, &err);
|
||||
if(U_FAILURE(err)) {
|
||||
if(!size) {
|
||||
log_err("ucol_cloneBinary - couldn't check size. Error: %s\n", u_errorName(err));
|
||||
return;
|
||||
}
|
||||
err = U_ZERO_ERROR;
|
||||
|
||||
buffer = (uint8_t *) malloc(size);
|
||||
ucol_cloneBinary(col, buffer, size, &err);
|
||||
@ -798,7 +800,7 @@ void TestCloneBinary(){
|
||||
k2 = (uint8_t *) malloc(sizeof(uint8_t) * l2);
|
||||
ucol_getSortKey(col, t, -1, k1, l1);
|
||||
ucol_getSortKey(col, t, -1, k2, l2);
|
||||
if (strcmp(k1,k2) != 0){
|
||||
if (strcmp((char *)k1,(char *)k2) != 0){
|
||||
log_err("ucol_openBinary - new collator should equal to old one\n");
|
||||
};
|
||||
free(k1);
|
||||
@ -2015,4 +2017,57 @@ TestGetContractionsAndUnsafes(void)
|
||||
uset_close(set);
|
||||
}
|
||||
|
||||
static void
|
||||
TestOpenBinary(void)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
/*
|
||||
char rule[] = "&h < d < c < b";
|
||||
char *wUCA[] = { "a", "h", "d", "c", "b", "i" };
|
||||
char *noUCA[] = {"d", "c", "b", "a", "h", "i" };
|
||||
*/
|
||||
/* we have to use Cyrillic letters because latin-1 always gets copied */
|
||||
char rule[] = "&\\u0452 < \\u0434 < \\u0433 < \\u0432"; /* &dje < d < g < v */
|
||||
char *wUCA[] = { "\\u0430", "\\u0452", "\\u0434", "\\u0433", "\\u0432", "\\u0435" }; /* a, dje, d, g, v, e */
|
||||
char *noUCA[] = {"\\u0434", "\\u0433", "\\u0432", "\\u0430", "\\u0435", "\\u0452" }; /* d, g, v, a, e, dje */
|
||||
|
||||
UChar uRules[256];
|
||||
int32_t uRulesLen = u_unescape(rule, uRules, 256);
|
||||
|
||||
UCollator *coll = ucol_openRules(uRules, uRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
|
||||
UCollator *UCA = ucol_open("root", &status);
|
||||
UCollator *cloneNOUCA = NULL, *cloneWUCA = NULL;
|
||||
|
||||
uint8_t imageBuffer[32768];
|
||||
uint8_t *image = imageBuffer;
|
||||
int32_t imageBufferCapacity = 32768;
|
||||
|
||||
int32_t imageSize = ucol_cloneBinary(coll, image, imageBufferCapacity, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
image = (uint8_t *)malloc(imageSize*sizeof(uint8_t));
|
||||
status = U_ZERO_ERROR;
|
||||
imageSize = ucol_cloneBinary(coll, imageBuffer, imageSize, &status);
|
||||
}
|
||||
|
||||
|
||||
cloneWUCA = ucol_openBinary(image, imageSize, UCA, &status);
|
||||
cloneNOUCA = ucol_openBinary(image, imageSize, NULL, &status);
|
||||
|
||||
genericOrderingTest(coll, wUCA, sizeof(wUCA)/sizeof(wUCA[0]));
|
||||
|
||||
genericOrderingTest(cloneWUCA, wUCA, sizeof(wUCA)/sizeof(wUCA[0]));
|
||||
genericOrderingTest(cloneNOUCA, noUCA, sizeof(noUCA)/sizeof(noUCA[0]));
|
||||
|
||||
|
||||
|
||||
if(image != imageBuffer) {
|
||||
free(image);
|
||||
}
|
||||
ucol_close(coll);
|
||||
ucol_close(cloneNOUCA);
|
||||
ucol_close(cloneWUCA);
|
||||
ucol_close(UCA);
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
@ -21,6 +21,7 @@
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "cintltst.h"
|
||||
#include "callcoll.h"
|
||||
#define MAX_TOKEN_LEN 16
|
||||
|
||||
|
||||
@ -120,7 +121,10 @@
|
||||
*/
|
||||
static void TestGetContractionsAndUnsafes(void);
|
||||
|
||||
|
||||
/**
|
||||
* Test funny stuff with open binary
|
||||
*/
|
||||
static void TestOpenBinary(void);
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
#endif
|
||||
|
@ -210,6 +210,7 @@ DataDrivenCollatorTest::processTest(TestData *testData) {
|
||||
if(col != NULL){
|
||||
RuleBasedCollator* rbc = (RuleBasedCollator*)col;
|
||||
cloneSize = rbc->cloneBinary(NULL, 0, intStatus);
|
||||
intStatus = U_ZERO_ERROR;
|
||||
cloneBuf = (uint8_t*) malloc(cloneSize);
|
||||
cloneSize = rbc->cloneBinary(cloneBuf, cloneSize, intStatus);
|
||||
clone = new RuleBasedCollator(cloneBuf, cloneSize, UCA, intStatus);
|
||||
|
Loading…
Reference in New Issue
Block a user