ICU-5190 Decrease size of collator. Performance isn't addressed, but it helps to reduce what to look at for cloning.

X-SVN-Rev: 19625
This commit is contained in:
George Rhoten 2006-05-15 05:08:49 +00:00
parent a91ec74f43
commit 3893bd1ab2
3 changed files with 116 additions and 124 deletions

View File

@ -667,14 +667,14 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
result->hiraganaQisDefault = TRUE;
result->numericCollationisDefault = TRUE;
result->scriptOrder = NULL;
/*result->scriptOrder = NULL;*/
result->rules = NULL;
result->rulesLength = 0;
/* get the version info from UCATableHeader and populate the Collator struct*/
result->dataInfo.dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
result->dataInfo.dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
result->minUnsafeCP = 0;
@ -4396,7 +4396,7 @@ ucol_calcSortKey(const UCollator *coll,
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
//UBool qShifted = shifted && (compareQuad == 0);
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
const uint8_t *scriptOrder = coll->scriptOrder;
/*const uint8_t *scriptOrder = coll->scriptOrder;*/
uint32_t variableTopValue = coll->variableTopValue;
// TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
@ -4522,11 +4522,9 @@ ucol_calcSortKey(const UCollator *coll,
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
if(notIsContinuation) {
if(scriptOrder != NULL) {
primary1 = scriptOrder[primary1];
}
}
/*if(notIsContinuation && scriptOrder != NULL) {
primary1 = scriptOrder[primary1];
}*/
if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|| (!notIsContinuation && wasShifted))

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-2005, International Business Machines
* Copyright (C) 1998-2006, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -862,16 +862,18 @@ struct UCollator {
uint32_t *latinOneCEs;
char* validLocale;
char* requestedLocale;
const UChar *rules;
const UCollator *UCA;
ResourceCleaner *resCleaner;
UResourceBundle *rb;
UResourceBundle *elements;
const UCATableHeader *image;
/*CompactEIntArray *mapping;*/
UTrie *mapping;
const uint32_t *latinOneMapping;
const uint32_t *expansion;
const UChar *contractionIndex;
const uint32_t *contractionCEs;
const uint8_t *scriptOrder;
/*const uint8_t *scriptOrder;*/
const uint32_t *endExpansionCE; /* array of last ces in an expansion ce.
corresponds to expansionCESize */
@ -885,12 +887,9 @@ struct UCollator {
UChar minUnsafeCP; /* Smallest unsafe Code Point. */
UChar minContrEndCP; /* Smallest code point at end of a contraction */
const UChar *rules;
int32_t rulesLength;
int32_t latinOneTableLen;
/*UErrorCode errorCode;*/ /* internal error code */
uint32_t variableTopValue;
UColAttributeValue frenchCollation;
UColAttributeValue alternateHandling; /* attribute for handling variable elements*/
@ -931,10 +930,7 @@ struct UCollator {
uint8_t tertiaryTopCount;
uint8_t tertiaryBottomCount;
UDataInfo dataInfo; /* Data info of UCA table */
const UCollator *UCA;
ResourceCleaner *resCleaner;
UVersionInfo dataVersion; /* Data info of UCA table */
};
U_CDECL_END

View File

@ -74,124 +74,122 @@ U_CFUNC UCollator*
ucol_open_internal(const char *loc,
UErrorCode *status)
{
const UCollator* UCA = ucol_initUCA(status);
const UCollator* UCA = ucol_initUCA(status);
/* New version */
if(U_FAILURE(*status)) return 0;
/* New version */
if(U_FAILURE(*status)) return 0;
UCollator *result = NULL;
UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
UCollator *result = NULL;
UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
/* we try to find stuff from keyword */
UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
UResourceBundle *collElem = NULL;
char keyBuffer[256];
// if there is a keyword, we pick it up and try to get elements
if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
// no keyword. we try to find the default setting, which will give us the keyword value
UErrorCode intStatus = U_ZERO_ERROR;
// finding default value does not affect collation fallback status
UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
if(U_SUCCESS(intStatus)) {
int32_t defaultKeyLen = 0;
const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
keyBuffer[defaultKeyLen] = 0;
} else {
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
ures_close(defaultColl);
}
collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
UResourceBundle *binary = NULL;
UErrorCode binaryStatus = U_ZERO_ERROR;
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
*status = U_USING_DEFAULT_WARNING;
result = ucol_initCollator(UCA->image, result, UCA, status);
// if we use UCA, real locale is root
result->rb = ures_open(U_ICUDATA_COLL, "", status);
result->elements = ures_open(U_ICUDATA_COLL, "", status);
if(U_FAILURE(*status)) {
goto clean;
}
ures_close(b);
result->hasRealData = FALSE;
} else if(U_SUCCESS(*status)) {
binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
binary = NULL;
result = tryOpeningFromRules(collElem, status);
if(U_FAILURE(*status)) {
goto clean;
}
} else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
int32_t len = 0;
const uint8_t *inData = ures_getBinary(binary, &len, status);
UCATableHeader *colData = (UCATableHeader *)inData;
if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
colData->version[0] != UCOL_BUILDER_VERSION) {
*status = U_DIFFERENT_UCA_VERSION;
result = tryOpeningFromRules(collElem, status);
} else {
if(U_FAILURE(*status)){
goto clean;
}
if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
if(U_FAILURE(*status)){
goto clean;
}
result->hasRealData = TRUE;
/* we try to find stuff from keyword */
UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
UResourceBundle *collElem = NULL;
char keyBuffer[256];
// if there is a keyword, we pick it up and try to get elements
if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
// no keyword. we try to find the default setting, which will give us the keyword value
UErrorCode intStatus = U_ZERO_ERROR;
// finding default value does not affect collation fallback status
UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
if(U_SUCCESS(intStatus)) {
int32_t defaultKeyLen = 0;
const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
keyBuffer[defaultKeyLen] = 0;
} else {
result = ucol_initCollator(UCA->image, result, UCA, status);
ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
if(U_FAILURE(*status)){
goto clean;
}
result->hasRealData = FALSE;
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
result->freeImageOnClose = FALSE;
}
ures_close(defaultColl);
}
result->rb = b;
result->elements = collElem;
} else { /* There is another error, and we're just gonna clean up */
collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
UResourceBundle *binary = NULL;
UErrorCode binaryStatus = U_ZERO_ERROR;
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
*status = U_USING_DEFAULT_WARNING;
result = ucol_initCollator(UCA->image, result, UCA, status);
// if we use UCA, real locale is root
result->rb = ures_open(U_ICUDATA_COLL, "", status);
result->elements = ures_open(U_ICUDATA_COLL, "", status);
if(U_FAILURE(*status)) {
goto clean;
}
ures_close(b);
result->hasRealData = FALSE;
} else if(U_SUCCESS(*status)) {
binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
binary = NULL;
result = tryOpeningFromRules(collElem, status);
if(U_FAILURE(*status)) {
goto clean;
}
} else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
int32_t len = 0;
const uint8_t *inData = ures_getBinary(binary, &len, status);
UCATableHeader *colData = (UCATableHeader *)inData;
if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
colData->version[0] != UCOL_BUILDER_VERSION) {
*status = U_DIFFERENT_UCA_VERSION;
result = tryOpeningFromRules(collElem, status);
} else {
if(U_FAILURE(*status)){
goto clean;
}
if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
if(U_FAILURE(*status)){
goto clean;
}
result->hasRealData = TRUE;
} else {
result = ucol_initCollator(UCA->image, result, UCA, status);
ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
if(U_FAILURE(*status)){
goto clean;
}
result->hasRealData = FALSE;
}
result->freeImageOnClose = FALSE;
}
}
result->rb = b;
result->elements = collElem;
} else { /* There is another error, and we're just gonna clean up */
goto clean;
}
result->validLocale = NULL; // default is to use rb info
if(loc == NULL) {
loc = ures_getLocale(result->rb, status);
}
result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
/* test for NULL */
if (result->requestedLocale == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto clean;
}
uprv_strcpy(result->requestedLocale, loc);
ures_close(binary);
ures_close(collations); //??? we have to decide on that. Probably affects something :)
result->resCleaner = ucol_prv_closeResources;
return result;
clean:
ures_close(b);
ures_close(collElem);
ures_close(collations);
ures_close(binary);
return NULL;
}
result->validLocale = NULL; // default is to use rb info
if(loc == NULL) {
loc = ures_getLocale(result->rb, status);
}
result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
/* test for NULL */
if (result->requestedLocale == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
ures_close(b); // ??? appears needed
ures_close(collElem);
ures_close(collations);
ures_close(binary); // ??? appears needed
return NULL;
}
uprv_strcpy(result->requestedLocale, loc);
ures_close(binary);
ures_close(collations); //??? we have to decide on that. Probably affects something :)
result->resCleaner = ucol_prv_closeResources;
return result;
}
U_CAPI UCollator*
@ -322,7 +320,7 @@ ucol_openRules( const UChar *rules,
if(U_SUCCESS(*status)) {
UChar *newRules;
result->dataInfo.dataVersion[0] = UCOL_BUILDER_VERSION;
result->dataVersion[0] = UCOL_BUILDER_VERSION;
if(rulesLength > 0) {
newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
/* test for NULL */