ICU-5190 Decrease size of collator. Performance isn't addressed, but it helps to reduce what to look at for cloning.
X-SVN-Rev: 19625
This commit is contained in:
parent
a91ec74f43
commit
3893bd1ab2
@ -667,14 +667,14 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
||||
result->hiraganaQisDefault = TRUE;
|
||||
result->numericCollationisDefault = TRUE;
|
||||
|
||||
result->scriptOrder = NULL;
|
||||
/*result->scriptOrder = NULL;*/
|
||||
|
||||
result->rules = NULL;
|
||||
result->rulesLength = 0;
|
||||
|
||||
/* get the version info from UCATableHeader and populate the Collator struct*/
|
||||
result->dataInfo.dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
|
||||
result->dataInfo.dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
|
||||
result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
|
||||
result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
|
||||
|
||||
result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
|
||||
result->minUnsafeCP = 0;
|
||||
@ -4396,7 +4396,7 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
|
||||
//UBool qShifted = shifted && (compareQuad == 0);
|
||||
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
|
||||
const uint8_t *scriptOrder = coll->scriptOrder;
|
||||
/*const uint8_t *scriptOrder = coll->scriptOrder;*/
|
||||
|
||||
uint32_t variableTopValue = coll->variableTopValue;
|
||||
// TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
|
||||
@ -4522,11 +4522,9 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
if(notIsContinuation) {
|
||||
if(scriptOrder != NULL) {
|
||||
primary1 = scriptOrder[primary1];
|
||||
}
|
||||
}
|
||||
/*if(notIsContinuation && scriptOrder != NULL) {
|
||||
primary1 = scriptOrder[primary1];
|
||||
}*/
|
||||
|
||||
if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|
||||
|| (!notIsContinuation && wasShifted))
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2005, International Business Machines
|
||||
* Copyright (C) 1998-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -862,16 +862,18 @@ struct UCollator {
|
||||
uint32_t *latinOneCEs;
|
||||
char* validLocale;
|
||||
char* requestedLocale;
|
||||
const UChar *rules;
|
||||
const UCollator *UCA;
|
||||
ResourceCleaner *resCleaner;
|
||||
UResourceBundle *rb;
|
||||
UResourceBundle *elements;
|
||||
const UCATableHeader *image;
|
||||
/*CompactEIntArray *mapping;*/
|
||||
UTrie *mapping;
|
||||
const uint32_t *latinOneMapping;
|
||||
const uint32_t *expansion;
|
||||
const UChar *contractionIndex;
|
||||
const uint32_t *contractionCEs;
|
||||
const uint8_t *scriptOrder;
|
||||
/*const uint8_t *scriptOrder;*/
|
||||
|
||||
const uint32_t *endExpansionCE; /* array of last ces in an expansion ce.
|
||||
corresponds to expansionCESize */
|
||||
@ -885,12 +887,9 @@ struct UCollator {
|
||||
UChar minUnsafeCP; /* Smallest unsafe Code Point. */
|
||||
UChar minContrEndCP; /* Smallest code point at end of a contraction */
|
||||
|
||||
const UChar *rules;
|
||||
int32_t rulesLength;
|
||||
int32_t latinOneTableLen;
|
||||
|
||||
/*UErrorCode errorCode;*/ /* internal error code */
|
||||
|
||||
uint32_t variableTopValue;
|
||||
UColAttributeValue frenchCollation;
|
||||
UColAttributeValue alternateHandling; /* attribute for handling variable elements*/
|
||||
@ -931,10 +930,7 @@ struct UCollator {
|
||||
uint8_t tertiaryTopCount;
|
||||
uint8_t tertiaryBottomCount;
|
||||
|
||||
UDataInfo dataInfo; /* Data info of UCA table */
|
||||
const UCollator *UCA;
|
||||
ResourceCleaner *resCleaner;
|
||||
|
||||
UVersionInfo dataVersion; /* Data info of UCA table */
|
||||
};
|
||||
|
||||
U_CDECL_END
|
||||
|
@ -74,124 +74,122 @@ U_CFUNC UCollator*
|
||||
ucol_open_internal(const char *loc,
|
||||
UErrorCode *status)
|
||||
{
|
||||
const UCollator* UCA = ucol_initUCA(status);
|
||||
const UCollator* UCA = ucol_initUCA(status);
|
||||
|
||||
/* New version */
|
||||
if(U_FAILURE(*status)) return 0;
|
||||
/* New version */
|
||||
if(U_FAILURE(*status)) return 0;
|
||||
|
||||
|
||||
|
||||
UCollator *result = NULL;
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
|
||||
UCollator *result = NULL;
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
|
||||
|
||||
/* we try to find stuff from keyword */
|
||||
UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
|
||||
UResourceBundle *collElem = NULL;
|
||||
char keyBuffer[256];
|
||||
// if there is a keyword, we pick it up and try to get elements
|
||||
if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
|
||||
// no keyword. we try to find the default setting, which will give us the keyword value
|
||||
UErrorCode intStatus = U_ZERO_ERROR;
|
||||
// finding default value does not affect collation fallback status
|
||||
UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
|
||||
if(U_SUCCESS(intStatus)) {
|
||||
int32_t defaultKeyLen = 0;
|
||||
const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
|
||||
u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
|
||||
keyBuffer[defaultKeyLen] = 0;
|
||||
} else {
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
ures_close(defaultColl);
|
||||
}
|
||||
collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
|
||||
|
||||
UResourceBundle *binary = NULL;
|
||||
UErrorCode binaryStatus = U_ZERO_ERROR;
|
||||
|
||||
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
|
||||
*status = U_USING_DEFAULT_WARNING;
|
||||
result = ucol_initCollator(UCA->image, result, UCA, status);
|
||||
// if we use UCA, real locale is root
|
||||
result->rb = ures_open(U_ICUDATA_COLL, "", status);
|
||||
result->elements = ures_open(U_ICUDATA_COLL, "", status);
|
||||
if(U_FAILURE(*status)) {
|
||||
goto clean;
|
||||
}
|
||||
ures_close(b);
|
||||
result->hasRealData = FALSE;
|
||||
} else if(U_SUCCESS(*status)) {
|
||||
binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
|
||||
|
||||
if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
|
||||
binary = NULL;
|
||||
result = tryOpeningFromRules(collElem, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
goto clean;
|
||||
}
|
||||
} else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
|
||||
int32_t len = 0;
|
||||
const uint8_t *inData = ures_getBinary(binary, &len, status);
|
||||
UCATableHeader *colData = (UCATableHeader *)inData;
|
||||
if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
colData->version[0] != UCOL_BUILDER_VERSION) {
|
||||
*status = U_DIFFERENT_UCA_VERSION;
|
||||
result = tryOpeningFromRules(collElem, status);
|
||||
} else {
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
|
||||
result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
result->hasRealData = TRUE;
|
||||
/* we try to find stuff from keyword */
|
||||
UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
|
||||
UResourceBundle *collElem = NULL;
|
||||
char keyBuffer[256];
|
||||
// if there is a keyword, we pick it up and try to get elements
|
||||
if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
|
||||
// no keyword. we try to find the default setting, which will give us the keyword value
|
||||
UErrorCode intStatus = U_ZERO_ERROR;
|
||||
// finding default value does not affect collation fallback status
|
||||
UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
|
||||
if(U_SUCCESS(intStatus)) {
|
||||
int32_t defaultKeyLen = 0;
|
||||
const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
|
||||
u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
|
||||
keyBuffer[defaultKeyLen] = 0;
|
||||
} else {
|
||||
result = ucol_initCollator(UCA->image, result, UCA, status);
|
||||
ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
result->hasRealData = FALSE;
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
result->freeImageOnClose = FALSE;
|
||||
}
|
||||
ures_close(defaultColl);
|
||||
}
|
||||
result->rb = b;
|
||||
result->elements = collElem;
|
||||
} else { /* There is another error, and we're just gonna clean up */
|
||||
collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
|
||||
|
||||
UResourceBundle *binary = NULL;
|
||||
UErrorCode binaryStatus = U_ZERO_ERROR;
|
||||
|
||||
if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
|
||||
*status = U_USING_DEFAULT_WARNING;
|
||||
result = ucol_initCollator(UCA->image, result, UCA, status);
|
||||
// if we use UCA, real locale is root
|
||||
result->rb = ures_open(U_ICUDATA_COLL, "", status);
|
||||
result->elements = ures_open(U_ICUDATA_COLL, "", status);
|
||||
if(U_FAILURE(*status)) {
|
||||
goto clean;
|
||||
}
|
||||
ures_close(b);
|
||||
result->hasRealData = FALSE;
|
||||
} else if(U_SUCCESS(*status)) {
|
||||
binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
|
||||
|
||||
if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
|
||||
binary = NULL;
|
||||
result = tryOpeningFromRules(collElem, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
goto clean;
|
||||
}
|
||||
} else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
|
||||
int32_t len = 0;
|
||||
const uint8_t *inData = ures_getBinary(binary, &len, status);
|
||||
UCATableHeader *colData = (UCATableHeader *)inData;
|
||||
if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
|
||||
colData->version[0] != UCOL_BUILDER_VERSION) {
|
||||
*status = U_DIFFERENT_UCA_VERSION;
|
||||
result = tryOpeningFromRules(collElem, status);
|
||||
} else {
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
|
||||
result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
result->hasRealData = TRUE;
|
||||
} else {
|
||||
result = ucol_initCollator(UCA->image, result, UCA, status);
|
||||
ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
|
||||
if(U_FAILURE(*status)){
|
||||
goto clean;
|
||||
}
|
||||
result->hasRealData = FALSE;
|
||||
}
|
||||
result->freeImageOnClose = FALSE;
|
||||
}
|
||||
}
|
||||
result->rb = b;
|
||||
result->elements = collElem;
|
||||
} else { /* There is another error, and we're just gonna clean up */
|
||||
goto clean;
|
||||
}
|
||||
|
||||
result->validLocale = NULL; // default is to use rb info
|
||||
|
||||
if(loc == NULL) {
|
||||
loc = ures_getLocale(result->rb, status);
|
||||
}
|
||||
result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
|
||||
/* test for NULL */
|
||||
if (result->requestedLocale == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto clean;
|
||||
}
|
||||
uprv_strcpy(result->requestedLocale, loc);
|
||||
|
||||
ures_close(binary);
|
||||
ures_close(collations); //??? we have to decide on that. Probably affects something :)
|
||||
result->resCleaner = ucol_prv_closeResources;
|
||||
return result;
|
||||
|
||||
clean:
|
||||
ures_close(b);
|
||||
ures_close(collElem);
|
||||
ures_close(collations);
|
||||
ures_close(binary);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result->validLocale = NULL; // default is to use rb info
|
||||
|
||||
if(loc == NULL) {
|
||||
loc = ures_getLocale(result->rb, status);
|
||||
}
|
||||
result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
|
||||
/* test for NULL */
|
||||
if (result->requestedLocale == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
ures_close(b); // ??? appears needed
|
||||
ures_close(collElem);
|
||||
ures_close(collations);
|
||||
ures_close(binary); // ??? appears needed
|
||||
return NULL;
|
||||
}
|
||||
uprv_strcpy(result->requestedLocale, loc);
|
||||
|
||||
ures_close(binary);
|
||||
ures_close(collations); //??? we have to decide on that. Probably affects something :)
|
||||
result->resCleaner = ucol_prv_closeResources;
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI UCollator*
|
||||
@ -322,7 +320,7 @@ ucol_openRules( const UChar *rules,
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
UChar *newRules;
|
||||
result->dataInfo.dataVersion[0] = UCOL_BUILDER_VERSION;
|
||||
result->dataVersion[0] = UCOL_BUILDER_VERSION;
|
||||
if(rulesLength > 0) {
|
||||
newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
|
||||
/* test for NULL */
|
||||
|
Loading…
Reference in New Issue
Block a user