ICU-3984 changes to the collation reordering - API works same as rules and enhanced testing
X-SVN-Rev: 28960
This commit is contained in:
parent
e2b5a4245f
commit
0c21fdf068
@ -219,8 +219,6 @@ ucol_swapBinary(const UDataSwapper *ds,
|
||||
|
||||
/* swap the necessary pieces in the order of their occurrence in the data */
|
||||
|
||||
udata_printError(ds, "@@@@@ Here inside the collator data swapper\n");
|
||||
|
||||
/* read more of the UCATableHeader (the size field was read above) */
|
||||
header.options= ds->readUInt32(inHeader->options);
|
||||
header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
|
||||
|
Binary file not shown.
@ -833,16 +833,16 @@ Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
|
||||
return Locale::createFromName(loc);
|
||||
}
|
||||
|
||||
int32_t Collator::getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
uint32_t Collator::getScriptOrder(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Collator::setScriptOrder(const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
const uint32_t scriptOrderLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
|
@ -587,15 +587,15 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
|
||||
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
|
||||
}
|
||||
|
||||
int32_t RuleBasedCollator::getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
uint32_t RuleBasedCollator::getScriptOrder(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
return ucol_getScriptOrder(ucollator, dest, destCapacity, &status);
|
||||
}
|
||||
|
||||
void RuleBasedCollator::setScriptOrder(const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
const uint32_t scriptOrderLength,
|
||||
UErrorCode& status)
|
||||
{
|
||||
ucol_setScriptOrder(ucollator, scriptOrder, scriptOrderLength);
|
||||
|
@ -869,6 +869,7 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
||||
result->rules = NULL;
|
||||
result->rulesLength = 0;
|
||||
result->freeRulesOnClose = FALSE;
|
||||
result->scriptReorderTable = NULL;
|
||||
|
||||
/* get the version info from UCATableHeader and populate the Collator struct*/
|
||||
result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
|
||||
@ -907,13 +908,6 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
||||
result->latinOneFailed = FALSE;
|
||||
result->UCA = UCA;
|
||||
|
||||
/* set attributes */
|
||||
ucol_setOptionsFromHeader(
|
||||
result,
|
||||
(UColOptionSet*)((uint8_t*)result->image+result->image->options),
|
||||
status);
|
||||
result->freeOptionsOnClose = FALSE;
|
||||
|
||||
/* Normally these will be set correctly later. This is the default if you use UCA or the default. */
|
||||
result->ucaRules = NULL;
|
||||
result->actualLocale = NULL;
|
||||
@ -921,7 +915,13 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
|
||||
result->requestedLocale = NULL;
|
||||
result->hasRealData = FALSE; // real data lives in .dat file...
|
||||
result->freeImageOnClose = FALSE;
|
||||
result->scriptReorderTable = NULL;
|
||||
|
||||
/* set attributes */
|
||||
ucol_setOptionsFromHeader(
|
||||
result,
|
||||
(UColOptionSet*)((uint8_t*)result->image+result->image->options),
|
||||
status);
|
||||
result->freeOptionsOnClose = FALSE;
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -1134,6 +1134,7 @@ uprv_uca_getImplicitFromRaw(UChar32 cp) {
|
||||
|
||||
static uint32_t U_EXPORT2
|
||||
uprv_uca_getImplicitPrimary(UChar32 cp) {
|
||||
//fprintf(stdout, "Incoming: %04x\n", cp);
|
||||
//if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
|
||||
|
||||
cp = swapCJK(cp);
|
||||
@ -1141,6 +1142,7 @@ uprv_uca_getImplicitPrimary(UChar32 cp) {
|
||||
// we now have a range of numbers from 0 to 21FFFF.
|
||||
|
||||
//if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
|
||||
//fprintf(stdout, "CJK swapped: %04x\n", cp);
|
||||
|
||||
return uprv_uca_getImplicitFromRaw(cp);
|
||||
}
|
||||
@ -2935,17 +2937,17 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
||||
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
|
||||
size = getExpansionCount(CE);
|
||||
CE = *CEOffset++;
|
||||
//source->offsetRepeatCount = -1;
|
||||
//source->offsetRepeatCount = -1;
|
||||
|
||||
if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
|
||||
for(i = 1; i<size; i++) {
|
||||
*(source->CEpos++) = *CEOffset++;
|
||||
source->offsetRepeatCount += 1;
|
||||
source->offsetRepeatCount += 1;
|
||||
}
|
||||
} else { /* else, we do */
|
||||
while(*CEOffset != 0) {
|
||||
*(source->CEpos++) = *CEOffset++;
|
||||
source->offsetRepeatCount += 1;
|
||||
source->offsetRepeatCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3565,14 +3567,14 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
|
||||
return (uint32_t)UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
if (source->offsetRepeatValue != 0) {
|
||||
if (source->offsetRepeatValue != 0) {
|
||||
if (CECount > noChars) {
|
||||
source->offsetRepeatCount += temp.offsetRepeatCount;
|
||||
source->offsetRepeatCount += temp.offsetRepeatCount;
|
||||
} else {
|
||||
// **** does this really skip the right offsets? ****
|
||||
source->offsetReturn -= (noChars - CECount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (offsetBias >= 0) {
|
||||
source->offsetReturn = source->offsetStore - 1;
|
||||
@ -5381,7 +5383,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
|
||||
primary1 = (uint8_t)(order >> 8);
|
||||
|
||||
if(coll->scriptReorderTable != NULL && notIsContinuation){
|
||||
if (coll->scriptReorderTable != NULL && notIsContinuation) {
|
||||
primary1 = coll->scriptReorderTable[primary1];
|
||||
}
|
||||
|
||||
@ -6584,7 +6586,8 @@ ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
|
||||
{
|
||||
uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
|
||||
UBool reverseSecondary = FALSE;
|
||||
if(!isContinuation(CE)) {
|
||||
UBool continuation = isContinuation(CE);
|
||||
if(!continuation) {
|
||||
tertiary = (uint8_t)((CE & coll->tertiaryMask));
|
||||
tertiary ^= coll->caseSwitch;
|
||||
reverseSecondary = TRUE;
|
||||
@ -6599,6 +6602,10 @@ ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
|
||||
primary1 = (uint8_t)(CE >> 8);
|
||||
|
||||
if(primary1 != 0) {
|
||||
if (coll->scriptReorderTable != NULL && !continuation) {
|
||||
primary1 = coll->scriptReorderTable[primary1];
|
||||
}
|
||||
|
||||
coll->latinOneCEs[ch] |= (primary1 << *primShift);
|
||||
*primShift -= 8;
|
||||
}
|
||||
@ -7111,22 +7118,21 @@ ucol_getStrength(const UCollator *coll)
|
||||
return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
|
||||
}
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
U_INTERNAL uint32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator *coll,
|
||||
int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode *pErrorCode){
|
||||
int i;
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
|
||||
if (pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(coll->scriptOrder == NULL){
|
||||
if (coll->scriptOrder == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if(coll->scriptOrderLength > destCapacity){
|
||||
if (coll->scriptOrderLength > destCapacity) {
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
for(i = 0; (i < coll->scriptOrderLength) && (i < destCapacity); i++){
|
||||
for (uint32_t i = 0; (i < coll->scriptOrderLength) && (i < destCapacity); i++) {
|
||||
dest[i] = coll->scriptOrder[i];
|
||||
}
|
||||
return coll->scriptOrderLength;
|
||||
@ -7135,17 +7141,18 @@ ucol_getScriptOrder(const UCollator *coll,
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator *coll,
|
||||
const int32_t *scriptOrder,
|
||||
const int32_t scriptOrderLength){
|
||||
int i;
|
||||
const uint32_t scriptOrderLength) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (coll->scriptOrder != NULL) {
|
||||
uprv_free(coll->scriptOrder);
|
||||
}
|
||||
coll->scriptOrder = (int32_t*) uprv_malloc(scriptOrderLength*sizeof(int32_t));
|
||||
for (i = 0; i < scriptOrderLength; i++) {
|
||||
for (uint32_t i = 0; i < scriptOrderLength; i++) {
|
||||
coll->scriptOrder[i] = scriptOrder[i];
|
||||
}
|
||||
coll->scriptOrderLength = scriptOrderLength;
|
||||
ucol_buildScriptReorderTable(coll);
|
||||
ucol_buildScriptReorderTable(coll, &status);
|
||||
// TODO: something with the status if error condition
|
||||
}
|
||||
|
||||
|
||||
@ -7483,11 +7490,6 @@ ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
|
||||
tOrder &= UCOL_PRIMARYMASK;
|
||||
} while(tOrder == 0);
|
||||
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
// if both primaries are the same
|
||||
if(sOrder == tOrder) {
|
||||
// and there are no more CEs, we advance to the next level
|
||||
@ -7501,6 +7503,12 @@ ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// only need to check one for continuation
|
||||
// if one is then the other must be or the preceding CE would be a prefix of the other
|
||||
if (coll->scriptReorderTable != NULL && !isContinuation(sOrder)) {
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
// if two primaries are different, we are done
|
||||
result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER;
|
||||
goto commonReturn;
|
||||
@ -8083,10 +8091,6 @@ ucol_strcollUseLatin1( const UCollator *coll,
|
||||
}
|
||||
}
|
||||
}
|
||||
if(coll->scriptReorderTable != NULL){
|
||||
sOrder = (coll->scriptReorderTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (coll->scriptReorderTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
if(endOfSource) { // source is finished, but target is not, say the result.
|
||||
return UCOL_LESS;
|
||||
}
|
||||
|
@ -1376,15 +1376,16 @@ ucol_initInverseUCA(UErrorCode *status)
|
||||
return _staticInvUCA;
|
||||
}
|
||||
|
||||
/* This is the data that is used for non-script reordering codes.
|
||||
/* This is the data that is used for non-script reordering codes. These _must_ be kept
|
||||
* in order that they are to be applied as defaults and in synch with the UColReorderCode enum.
|
||||
*/
|
||||
const char* ReorderingTokenNames[] = {
|
||||
"SPACE",
|
||||
"PUNCT",
|
||||
"SYMBOL",
|
||||
"CURRENCY",
|
||||
"DIGIT",
|
||||
NULL
|
||||
"SPACE",
|
||||
"PUNCT",
|
||||
"SYMBOL",
|
||||
"CURRENCY",
|
||||
"DIGIT",
|
||||
NULL
|
||||
};
|
||||
|
||||
void toUpper(const char* src, char* dst, uint32_t length) {
|
||||
@ -1396,14 +1397,14 @@ void toUpper(const char* src, char* dst, uint32_t length) {
|
||||
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucol_findReorderingEntry(const char* name) {
|
||||
char buffer[32];
|
||||
toUpper(name, buffer, 32);
|
||||
for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {
|
||||
if (strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
|
||||
return entry + UCOL_REORDERCODE_FIRST;
|
||||
}
|
||||
}
|
||||
return USCRIPT_INVALID_CODE;
|
||||
char buffer[32];
|
||||
toUpper(name, buffer, 32);
|
||||
for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {
|
||||
if (strcmp(buffer, ReorderingTokenNames[entry]) == 0) {
|
||||
return entry + UCOL_REORDERCODE_FIRST;
|
||||
}
|
||||
}
|
||||
return USCRIPT_INVALID_CODE;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
@ -770,7 +770,7 @@ typedef struct {
|
||||
/*UColAttributeValue*/ int32_t hiraganaQ; /* attribute for special Hiragana */
|
||||
/*UColAttributeValue*/ int32_t numericCollation; /* attribute for numeric collation */
|
||||
/* reorder code */ int32_t* scriptOrder;
|
||||
int32_t scriptOrderLength;
|
||||
uint32_t scriptOrderLength;
|
||||
uint32_t reserved[15]; /* for future use */
|
||||
} UColOptionSet;
|
||||
|
||||
@ -1019,7 +1019,7 @@ struct UCollator {
|
||||
|
||||
UVersionInfo dataVersion; /* Data info of UCA table */
|
||||
int32_t* scriptOrder;
|
||||
int32_t scriptOrderLength;
|
||||
uint32_t scriptOrderLength;
|
||||
uint8_t* scriptReorderTable;
|
||||
};
|
||||
|
||||
@ -1073,7 +1073,7 @@ uprv_uca_getCodePointFromRaw(UChar32 i);
|
||||
|
||||
|
||||
|
||||
U_CAPI void ucol_buildScriptReorderTable(UCollator *coll);
|
||||
U_CAPI void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status);
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
/*
|
||||
|
@ -211,12 +211,11 @@ int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* retu
|
||||
return reorderCodeCount;
|
||||
}
|
||||
|
||||
void ucol_buildScriptReorderTable(UCollator *coll) {
|
||||
int32_t *next;
|
||||
void ucol_buildScriptReorderTable(UCollator *coll, UErrorCode *status) {
|
||||
uint16_t leadBytesSize = 256;
|
||||
uint16_t leadBytes[256];
|
||||
uint16_t reorderCodesSize = 256;
|
||||
int16_t reorderCodes[256];
|
||||
uint32_t internalScriptOrderLength = coll->scriptOrderLength + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST);
|
||||
int32_t* internalScriptOrder;
|
||||
|
||||
// The lowest byte that hasn't been assigned a mapping
|
||||
int toBottom = 0x03;
|
||||
@ -227,16 +226,17 @@ void ucol_buildScriptReorderTable(UCollator *coll) {
|
||||
bool fromTheBottom = true;
|
||||
|
||||
// lead bytes that have alread been assigned to the permutation table
|
||||
bool leadByteUsed[256];
|
||||
bool newLeadByteUsed[256];
|
||||
// permutation table slots that have already been filled
|
||||
bool permutationSlotFilled[256];
|
||||
|
||||
// nothing to do
|
||||
if (coll->scriptOrderLength == 0) {
|
||||
if(U_FAILURE(*status) || coll == NULL || coll->scriptOrderLength == 0) {
|
||||
if (coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
coll->scriptReorderTable = NULL;
|
||||
}
|
||||
coll->scriptOrderLength = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -244,14 +244,27 @@ void ucol_buildScriptReorderTable(UCollator *coll) {
|
||||
coll->scriptReorderTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
|
||||
}
|
||||
|
||||
// prefill the reordering codes with the leading entries
|
||||
internalScriptOrder = (int32_t*)uprv_malloc(internalScriptOrderLength * sizeof(int32_t));
|
||||
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST); codeIndex++) {
|
||||
internalScriptOrder[codeIndex] = UCOL_REORDERCODE_FIRST + codeIndex;
|
||||
}
|
||||
for (uint32_t codeIndex = 0; codeIndex < coll->scriptOrderLength; codeIndex++) {
|
||||
uint32_t scriptOrderCode = coll->scriptOrder[codeIndex];
|
||||
internalScriptOrder[codeIndex + (UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST)] = scriptOrderCode;
|
||||
if (scriptOrderCode >= UCOL_REORDERCODE_FIRST && scriptOrderCode < UCOL_REORDERCODE_LIMIT) {
|
||||
internalScriptOrder[scriptOrderCode - UCOL_REORDERCODE_FIRST] = UCOL_REORDERCODE_IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (i < toBottom || i > toTop) {
|
||||
permutationSlotFilled[i] = true;
|
||||
leadByteUsed[i] = true;
|
||||
newLeadByteUsed[i] = true;
|
||||
coll->scriptReorderTable[i] = i;
|
||||
} else {
|
||||
permutationSlotFilled[i] = false;
|
||||
leadByteUsed[i] = false;
|
||||
newLeadByteUsed[i] = false;
|
||||
coll->scriptReorderTable[i] = 0;
|
||||
}
|
||||
}
|
||||
@ -262,62 +275,122 @@ void ucol_buildScriptReorderTable(UCollator *coll) {
|
||||
* possible location. At each step, we also need to make sure that any scripts
|
||||
* that need to not be moved are copied to their same location in the final table.
|
||||
*/
|
||||
next = coll->scriptOrder;
|
||||
while (next < coll->scriptOrder + coll->scriptOrderLength) {
|
||||
if (*next == UCOL_REORDERCODE_IGNORE) {
|
||||
next++;
|
||||
for (int scriptOrderIndex = 0; scriptOrderIndex < internalScriptOrderLength; scriptOrderIndex++) {
|
||||
int32_t next = internalScriptOrder[scriptOrderIndex];
|
||||
if (next == UCOL_REORDERCODE_IGNORE) {
|
||||
continue;
|
||||
}
|
||||
if (*next == USCRIPT_UNKNOWN) {
|
||||
if (next == USCRIPT_UNKNOWN) {
|
||||
if (fromTheBottom == false) {
|
||||
//TODO - error condition - bad script order
|
||||
// double turnaround
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
coll->scriptReorderTable = NULL;
|
||||
}
|
||||
coll->scriptOrderLength = 0;
|
||||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
fprintf(stdout, "\treturn - next == USCRIPT_UNKNOWN\n");
|
||||
return;
|
||||
}
|
||||
fromTheBottom = false;
|
||||
next++;
|
||||
fromTheBottom = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, *next, leadBytes, leadBytesSize);
|
||||
uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
|
||||
if (fromTheBottom) {
|
||||
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (leadByteUsed[leadBytes[leadByteIndex]]) {
|
||||
// TODO - or should this be an error condition?
|
||||
continue;
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
coll->scriptReorderTable = NULL;
|
||||
}
|
||||
coll->scriptOrderLength = 0;
|
||||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
fprintf(stdout, "\treturn - fromTheBottom reuse lead byte\n");
|
||||
return;
|
||||
}
|
||||
|
||||
coll->scriptReorderTable[leadBytes[leadByteIndex]] = toBottom;
|
||||
leadByteUsed[toBottom] = true;
|
||||
newLeadByteUsed[toBottom] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toBottom++;
|
||||
}
|
||||
} else {
|
||||
for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) {
|
||||
// don't place a lead byte twice in the permutation table
|
||||
if (leadByteUsed[leadBytes[leadByteIndex]]) {
|
||||
// TODO - or should this be an error condition?
|
||||
continue;
|
||||
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
|
||||
// lead byte already used
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
if (coll->scriptReorderTable != NULL) {
|
||||
uprv_free(coll->scriptReorderTable);
|
||||
coll->scriptReorderTable = NULL;
|
||||
}
|
||||
coll->scriptOrderLength = 0;
|
||||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
fprintf(stdout, "\treturn - fromTheTop reuse lead byte\n");
|
||||
return;
|
||||
}
|
||||
|
||||
coll->scriptReorderTable[leadBytes[leadByteIndex]] = toTop;
|
||||
leadByteUsed[toTop] = true;
|
||||
newLeadByteUsed[toTop] = true;
|
||||
permutationSlotFilled[leadBytes[leadByteIndex]] = true;
|
||||
toTop--;
|
||||
}
|
||||
}
|
||||
next++;
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->scriptReorderTable[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Lead Byte Used Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]);
|
||||
}
|
||||
fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Copy everything that's left over */
|
||||
int reorderCode = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (!permutationSlotFilled[i]) {
|
||||
while (reorderCode < 256 && leadByteUsed[reorderCode++]) {
|
||||
;
|
||||
while (reorderCode < 256 && newLeadByteUsed[reorderCode]) {
|
||||
reorderCode++;
|
||||
}
|
||||
coll->scriptReorderTable[i] = reorderCode;
|
||||
permutationSlotFilled[i] = true;
|
||||
newLeadByteUsed[reorderCode] = true;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REORDER_DEBUG
|
||||
fprintf(stdout, "\n@@@@ Script Reordering Table\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
fprintf(stdout, "\t%02x = %02x\n", i, coll->scriptReorderTable[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (internalScriptOrder != NULL) {
|
||||
uprv_free(internalScriptOrder);
|
||||
}
|
||||
|
||||
// force a regen of the latin one table since it is affected by the script reordering
|
||||
coll->latinOneRegenTable = TRUE;
|
||||
ucol_updateInternalState(coll, status);
|
||||
}
|
||||
|
||||
// API in ucol_imp.h
|
||||
@ -623,9 +696,9 @@ ucol_openRules( const UChar *rules,
|
||||
result->actualLocale = NULL;
|
||||
result->validLocale = NULL;
|
||||
result->requestedLocale = NULL;
|
||||
ucol_buildScriptReorderTable(result, status);
|
||||
ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
|
||||
ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
|
||||
ucol_buildScriptReorderTable(result);
|
||||
} else {
|
||||
cleanup:
|
||||
if(result != NULL) {
|
||||
|
@ -615,22 +615,22 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status){
|
||||
int32_t codeCount = 0;
|
||||
int32_t codeIndex = 0;
|
||||
char conversion[64];
|
||||
int32_t tokenLength = 0;
|
||||
const UChar* space;
|
||||
int32_t tokenLength = 0;
|
||||
const UChar* space;
|
||||
|
||||
const UChar* current = src->current;
|
||||
const UChar* end = u_memchr(src->current, 0x005d, src->end - src->current);
|
||||
|
||||
// eat leading whitespace
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
|
||||
while(current < end) {
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
@ -642,44 +642,35 @@ void ucol_tok_parseScriptReorder(UColTokenParser *src, UErrorCode *status){
|
||||
}
|
||||
|
||||
if (codeCount == 0) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
int32_t nonScriptReorderCodes = UCOL_REORDERCODE_LIMIT - UCOL_REORDERCODE_FIRST;
|
||||
codeCount += nonScriptReorderCodes; // to account for the non-script codes
|
||||
src->opts->scriptOrderLength = codeCount;
|
||||
src->opts->scriptOrder = (int32_t*)uprv_malloc(codeCount * sizeof(int32_t));
|
||||
current = src->current;
|
||||
current = src->current;
|
||||
|
||||
for (codeIndex = 0; codeIndex < nonScriptReorderCodes; codeIndex++) {
|
||||
src->opts->scriptOrder[codeIndex] = UCOL_REORDERCODE_FIRST + codeIndex;
|
||||
}
|
||||
|
||||
// eat leading whitespace
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
// eat leading whitespace
|
||||
while(current < end && u_isWhitespace(*current)) {
|
||||
current++;
|
||||
}
|
||||
|
||||
while(current < end) {
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
space = u_memchr(current, 0x0020, end - current);
|
||||
space = space == 0 ? end : space;
|
||||
tokenLength = space - current;
|
||||
if (tokenLength < 4) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
} else {
|
||||
u_UCharsToChars(current, conversion, tokenLength);
|
||||
conversion[tokenLength] = '\0';
|
||||
src->opts->scriptOrder[codeIndex] = ucol_findReorderingEntry(conversion);
|
||||
if (src->opts->scriptOrder[codeIndex] != USCRIPT_INVALID_CODE) {
|
||||
// non-script reorder code used in rule so remove it from the leading slot
|
||||
src->opts->scriptOrder[src->opts->scriptOrder[codeIndex] - UCOL_REORDERCODE_FIRST] = UCOL_REORDERCODE_IGNORE;
|
||||
} else {
|
||||
src->opts->scriptOrder[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
|
||||
}
|
||||
if (src->opts->scriptOrder[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
conversion[tokenLength] = '\0';
|
||||
src->opts->scriptOrder[codeIndex] = ucol_findReorderingEntry(conversion);
|
||||
if (src->opts->scriptOrder[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
src->opts->scriptOrder[codeIndex] = u_getPropertyValueEnum(UCHAR_SCRIPT, conversion);
|
||||
}
|
||||
if (src->opts->scriptOrder[codeIndex] == USCRIPT_INVALID_CODE) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
codeIndex++;
|
||||
current += tokenLength;
|
||||
|
@ -606,9 +606,9 @@ public:
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
virtual int32_t getScriptOrder(int32_t *dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
virtual uint32_t getScriptOrder(int32_t *dest,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the ordering of scripts for this collator.
|
||||
@ -618,7 +618,7 @@ public:
|
||||
* @internal
|
||||
*/
|
||||
virtual void setScriptOrder(const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
const uint32_t scriptOrderLength,
|
||||
UErrorCode& status) ;
|
||||
|
||||
/**
|
||||
|
@ -675,9 +675,9 @@ public:
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
virtual int32_t getScriptOrder(int32_t* dest,
|
||||
const int32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
virtual uint32_t getScriptOrder(int32_t* dest,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the ordering of scripts for this collator.
|
||||
@ -687,7 +687,7 @@ public:
|
||||
* @internal
|
||||
*/
|
||||
virtual void setScriptOrder(const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength,
|
||||
const uint32_t scriptOrderLength,
|
||||
UErrorCode& status);
|
||||
|
||||
|
||||
|
@ -138,14 +138,14 @@ typedef enum {
|
||||
* @internal
|
||||
*/
|
||||
typedef enum {
|
||||
UCOL_REORDERCODE_FIRST = 0x1000,
|
||||
UCOL_REORDERCODE_SPACE = 0x1000,
|
||||
UCOL_REORDERCODE_PUNCTUATION = 0x1001,
|
||||
UCOL_REORDERCODE_SYMBOL = 0x1002,
|
||||
UCOL_REORDERCODE_CURRENCY = 0x1003,
|
||||
UCOL_REORDERCODE_DIGIT = 0x1004,
|
||||
UCOL_REORDERCODE_LIMIT = 0x1005,
|
||||
UCOL_REORDERCODE_IGNORE = 0x7FFF
|
||||
UCOL_REORDERCODE_FIRST = 0x1000,
|
||||
UCOL_REORDERCODE_SPACE = 0x1000,
|
||||
UCOL_REORDERCODE_PUNCTUATION = 0x1001,
|
||||
UCOL_REORDERCODE_SYMBOL = 0x1002,
|
||||
UCOL_REORDERCODE_CURRENCY = 0x1003,
|
||||
UCOL_REORDERCODE_DIGIT = 0x1004,
|
||||
UCOL_REORDERCODE_LIMIT = 0x1005,
|
||||
UCOL_REORDERCODE_IGNORE = 0x7FFF
|
||||
} UColReorderCode;
|
||||
|
||||
/**
|
||||
@ -547,10 +547,10 @@ ucol_setStrength(UCollator *coll,
|
||||
* @see ucol_setScriptOrder
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
U_INTERNAL uint32_t U_EXPORT2
|
||||
ucol_getScriptOrder(const UCollator* coll,
|
||||
int32_t* dest,
|
||||
const int32_t destCapacity,
|
||||
const uint32_t destCapacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
@ -564,7 +564,7 @@ ucol_getScriptOrder(const UCollator* coll,
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucol_setScriptOrder(UCollator* coll,
|
||||
const int32_t* scriptOrder,
|
||||
const int32_t scriptOrderLength);
|
||||
const uint32_t scriptOrderLength);
|
||||
|
||||
/**
|
||||
* Get the display name for a UCollator.
|
||||
|
@ -29,6 +29,7 @@
|
||||
* equlivalent to word 'one'.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
@ -950,8 +950,8 @@ static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName,
|
||||
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
||||
*first = *second = 0;
|
||||
|
||||
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
||||
the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
||||
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
||||
the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
||||
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
|
||||
strength = src.parsedToken.strength;
|
||||
chOffset = src.parsedToken.charsOffset;
|
||||
@ -5829,6 +5829,12 @@ static void TestBeforeRuleWithScriptReordering(void)
|
||||
UChar rules[500];
|
||||
uint32_t rulesLength = 0;
|
||||
UScriptCode scriptOrder[1] = {USCRIPT_GREEK};
|
||||
UCollationResult collResult;
|
||||
|
||||
uint8_t baseKey[256];
|
||||
uint32_t baseKeyLength;
|
||||
uint8_t beforeKey[256];
|
||||
uint32_t beforeKeyLength;
|
||||
|
||||
UChar base[] = { 0x03b1 }; /* base */
|
||||
int32_t baseLen = sizeof(base)/sizeof(*base);
|
||||
@ -5836,15 +5842,13 @@ static void TestBeforeRuleWithScriptReordering(void)
|
||||
UChar before[] = { 0x0e01 }; /* ko kai */
|
||||
int32_t beforeLen = sizeof(before)/sizeof(*before);
|
||||
|
||||
UCollationResult collResult;
|
||||
uint8_t baseKey[256];
|
||||
uint32_t baseKeyLength;
|
||||
uint8_t beforeKey[256];
|
||||
uint32_t beforeKeyLength;
|
||||
/*UChar *data[] = { before, base };
|
||||
genericRulesStarter(srules, data, 2);*/
|
||||
|
||||
log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
|
||||
log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
|
||||
|
||||
/* build collator */
|
||||
|
||||
/* build collator */
|
||||
rulesLength = u_unescape(srules, rules, LEN(rules));
|
||||
myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
@ -5852,85 +5856,174 @@ static void TestBeforeRuleWithScriptReordering(void)
|
||||
return;
|
||||
}
|
||||
|
||||
/* check collation results - before rule applied but not script reordering */
|
||||
/* check collation results - before rule applied but not script reordering */
|
||||
collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct before script reordering = %d\n", collResult);
|
||||
}
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct before script reordering = %d\n", collResult);
|
||||
}
|
||||
|
||||
/* check the lead byte of the collation keys before script reordering */
|
||||
/* check the lead byte of the collation keys before script reordering */
|
||||
baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
|
||||
beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
|
||||
if (baseKey[0] != beforeKey[0]) {
|
||||
log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
|
||||
}
|
||||
|
||||
/* reirder the scripts */
|
||||
/* reorder the scripts */
|
||||
ucol_setScriptOrder(myCollation, scriptOrder, 1);
|
||||
|
||||
/* check collation results - before rule applied and after script reordering */
|
||||
/* check collation results - before rule applied and after script reordering */
|
||||
collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct after script reordering = %d\n", collResult);
|
||||
}
|
||||
if (collResult != UCOL_GREATER) {
|
||||
log_err("Collation result not correct after script reordering = %d\n", collResult);
|
||||
}
|
||||
|
||||
/* check the lead byte of the collation keys after script reordering */
|
||||
/* check the lead byte of the collation keys after script reordering */
|
||||
ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
|
||||
ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
|
||||
if (baseKey[0] != beforeKey[0]) {
|
||||
log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
|
||||
log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
|
||||
}
|
||||
|
||||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility function to test one collation reordering test case.
|
||||
* @param testcases Array of test cases.
|
||||
* @param n_testcases Size of the array testcases.
|
||||
* @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
|
||||
* @param n_rules Size of the array str_rules.
|
||||
*/
|
||||
static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], uint32_t reorderTokensLen)
|
||||
{
|
||||
int testCaseNum;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *myCollation;
|
||||
|
||||
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
|
||||
myCollation = ucol_open("", &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
||||
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
|
||||
ucol_setScriptOrder(myCollation, reorderTokens, reorderTokensLen);
|
||||
for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
|
||||
doTest(myCollation,
|
||||
testCases[testCaseNum].source,
|
||||
testCases[testCaseNum].target,
|
||||
testCases[testCaseNum].result
|
||||
);
|
||||
}
|
||||
ucol_close(myCollation);
|
||||
}
|
||||
}
|
||||
|
||||
static void TestGreekFirstReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek]"
|
||||
};
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_LESS },
|
||||
{ {0x0391}, {0x0060}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_GREEK
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_LESS },
|
||||
{ {0x0391}, {0x0060}, UCOL_GREATER },
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
|
||||
}
|
||||
|
||||
static void TestGreekLastReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Zzzz Grek]"
|
||||
};
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Zzzz Grek]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_LESS },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_UNKNOWN, USCRIPT_GREEK
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0391}, UCOL_EQUAL },
|
||||
{ {0x0041}, {0x0391}, UCOL_LESS },
|
||||
{ {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_LESS },
|
||||
{ {0x0391}, {0xe2dc}, UCOL_GREATER },
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
|
||||
}
|
||||
|
||||
static void TestNonScriptReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
};
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0024}, {0x0041}, UCOL_GREATER },
|
||||
};
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_GREEK, UCOL_REORDERCODE_SYMBOL, UCOL_REORDERCODE_DIGIT, USCRIPT_LATIN,
|
||||
UCOL_REORDERCODE_PUNCTUATION, UCOL_REORDERCODE_SPACE, USCRIPT_UNKNOWN,
|
||||
UCOL_REORDERCODE_CURRENCY
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x0391}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0041}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0060}, {0x0041}, UCOL_LESS },
|
||||
{ {0x0060}, {0x0391}, UCOL_GREATER },
|
||||
{ {0x0024}, {0x0041}, UCOL_GREATER },
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
|
||||
}
|
||||
|
||||
static void TestHaniReorder(void)
|
||||
{
|
||||
const char* strRules[] = {
|
||||
"[scriptReorder Hani]"
|
||||
};
|
||||
|
||||
const int32_t apiRules[] = {
|
||||
USCRIPT_HAN
|
||||
};
|
||||
|
||||
const static OneTestCase privateUseCharacterStrings[] = {
|
||||
{ {0x4e00}, {0x0041}, UCOL_LESS },
|
||||
{ {0x4e00}, {0x0060}, UCOL_GREATER },
|
||||
{ {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
|
||||
{ {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
|
||||
{ {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
|
||||
{ {0xfa27}, {0x0041}, UCOL_LESS },
|
||||
{ {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
|
||||
}
|
||||
|
||||
|
||||
@ -6011,11 +6104,6 @@ void addMiscCollTest(TestNode** root)
|
||||
TEST(TestOutOfBuffer5468);
|
||||
TEST(TestSameStrengthList);
|
||||
|
||||
TEST(TestGreekFirstReorder);
|
||||
TEST(TestGreekLastReorder);
|
||||
TEST(TestBeforeRuleWithScriptReordering);
|
||||
TEST(TestNonScriptReorder);
|
||||
|
||||
TEST(TestSameStrengthListQuoted);
|
||||
TEST(TestSameStrengthListSupplemental);
|
||||
TEST(TestSameStrengthListQwerty);
|
||||
@ -6027,6 +6115,12 @@ void addMiscCollTest(TestNode** root)
|
||||
TEST(TestPrivateUseCharactersInList);
|
||||
TEST(TestPrivateUseCharactersInRange);
|
||||
TEST(TestInvalidListsAndRanges);
|
||||
|
||||
TEST(TestGreekFirstReorder);
|
||||
TEST(TestGreekLastReorder);
|
||||
TEST(TestBeforeRuleWithScriptReordering);
|
||||
TEST(TestNonScriptReorder);
|
||||
TEST(TestHaniReorder);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
@ -348,7 +348,6 @@ fprintf(stderr, "isPackage = %x\n", isPackage);
|
||||
return U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if(isModified) {
|
||||
fprintf(stderr, "@@@@ Calling Package::extractItem\n");
|
||||
pkg->extractItem(destPath, outFilename, 0, outType);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user