ICU-96 primary compression in, needs to be turned on with UCOL_PRIM_COMPRESSION

X-SVN-Rev: 4555
This commit is contained in:
Vladimir Weinstein 2001-04-25 18:00:41 +00:00
parent 331dcd229f
commit 6ca6bab953
2 changed files with 65 additions and 44 deletions

View File

@ -2408,23 +2408,28 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
/* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */ /* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */
#ifdef UCOL_PRIM_COMPRESSION #ifdef UCOL_PRIM_COMPRESSION
/* calculate sortkey size */
if(primary1 != UCOL_IGNORABLE) { if(primary1 != UCOL_IGNORABLE) {
if(notIsContinuation) { if(notIsContinuation) {
if(primary2 != UCOL_IGNORABLE) { /* This is a two byter, should be compressed */ if(leadPrimary == primary1) {
if(primary1 != leadPrimary) {
if(leadPrimary != 0) {
currentSize++;
}
currentSize++;
leadPrimary = primary1;
}
currentSize++; currentSize++;
} else { /* This is a one byter, no compression */ } else {
if(leadPrimary != 0) { /* But if there was some, finish the sequence */ if(leadPrimary != 0) {
currentSize++; currentSize++;
leadPrimary = 0;
} }
currentSize++; if(primary2 == UCOL_IGNORABLE) {
/* one byter, not compressed */
currentSize++;
leadPrimary = 0;
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) {
/* not compressible */
leadPrimary = 0;
currentSize+=2;
} else { /* compress */
leadPrimary = primary1;
currentSize+=2;
}
} }
} else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
currentSize++; currentSize++;
@ -2432,7 +2437,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
currentSize++; currentSize++;
} }
} }
} }
#else #else
if(primary1 != UCOL_IGNORABLE) { if(primary1 != UCOL_IGNORABLE) {
currentSize++; currentSize++;
@ -2812,30 +2817,37 @@ ucol_calcSortKey(const UCollator *coll,
/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */ /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
/* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */ /* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */
#ifdef UCOL_PRIM_COMPRESSION #ifdef UCOL_PRIM_COMPRESSION
if(primary1 != UCOL_IGNORABLE) { /* regular and simple sortkey calc */
if(notIsContinuation) { if(primary1 != UCOL_IGNORABLE) {
if(primary2 != UCOL_IGNORABLE) { /* This is a two byter, should be compressed */ if(notIsContinuation) {
if(primary1 != leadPrimary) { if(leadPrimary == primary1) {
*primaries++ = primary2;
} else {
if(leadPrimary != 0) { if(leadPrimary != 0) {
*primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN; *primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN;
} }
*primaries++ = leadPrimary = primary1; if(primary2 == UCOL_IGNORABLE) {
/* one byter, not compressed */
*primaries++ = primary1;
leadPrimary = 0;
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) {
/* not compressible */
leadPrimary = 0;
*primaries++ = primary1;
*primaries++ = primary2;
} else { /* compress */
*primaries++ = leadPrimary = primary1;
*primaries++ = primary2;
}
} }
*primaries++ = primary2; /* second part */ } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
} else { /* This is a one byter, no compression */ *primaries++ = primary1;
if(leadPrimary != 0) { /* But if there was some, finish the sequence */ if(primary2 != UCOL_IGNORABLE) {
*primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN; *primaries++ = primary2; /* second part */
leadPrimary = 0; /* and reset it */
} }
*primaries++ = primary1; /* add the primary */
} }
} else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ }
*primaries++ = primary1;
if(primary2 != UCOL_IGNORABLE) {
*primaries++ = primary2; /* second part */
}
}
}
#else #else
if(primary1 != UCOL_IGNORABLE) { if(primary1 != UCOL_IGNORABLE) {
*primaries++ = primary1; /* scriptOrder[primary1]; */ /* This is the script ordering thingie */ *primaries++ = primary1; /* scriptOrder[primary1]; */ /* This is the script ordering thingie */
@ -3270,22 +3282,29 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
/* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */ /* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */
/* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */ /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */
#ifdef UCOL_PRIM_COMPRESSION #ifdef UCOL_PRIM_COMPRESSION
/* regular and simple sortkey calc */
if(primary1 != UCOL_IGNORABLE) { if(primary1 != UCOL_IGNORABLE) {
if(notIsContinuation) { if(notIsContinuation) {
if(primary2 != UCOL_IGNORABLE) { /* This is a two byter, should be compressed */ if(leadPrimary == primary1) {
if(primary1 != leadPrimary) { *primaries++ = primary2;
if(leadPrimary != 0) { } else {
*primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN; if(leadPrimary != 0) {
}
*primaries++ = leadPrimary = primary1;
}
*primaries++ = primary2; /* second part */
} else { /* This is a one byter, no compression */
if(leadPrimary != 0) { /* But if there was some, finish the sequence */
*primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN; *primaries++ = (primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN;
leadPrimary = 0;
} }
*primaries++ = primary1; /* add the primary */ if(primary2 == UCOL_IGNORABLE) {
/* one byter, not compressed */
*primaries++ = primary1;
leadPrimary = 0;
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) {
/* not compressible */
leadPrimary = 0;
*primaries++ = primary1;
*primaries++ = primary2;
} else { /* compress */
*primaries++ = leadPrimary = primary1;
*primaries++ = primary2;
}
} }
} else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */ } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
*primaries++ = primary1; *primaries++ = primary1;
@ -3293,7 +3312,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
*primaries++ = primary2; /* second part */ *primaries++ = primary2; /* second part */
} }
} }
} }
#else #else
if(primary1 != UCOL_IGNORABLE) { if(primary1 != UCOL_IGNORABLE) {
*primaries++ = primary1; /* scriptOrder[primary1]; */ /* This is the script ordering thingie */ *primaries++ = primary1; /* scriptOrder[primary1]; */ /* This is the script ordering thingie */

View File

@ -409,6 +409,8 @@ enum {
UCOL_BYTE_FIRST_TAILORED = 0x04, UCOL_BYTE_FIRST_TAILORED = 0x04,
UCOL_BYTE_COMMON = 0x05, UCOL_BYTE_COMMON = 0x05,
UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON, UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON,
UCOL_BYTE_LAST_LATIN_PRIMARY = 0x4C,
UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x4D,
UCOL_BYTE_UNSHIFTED_MAX = 0xFF UCOL_BYTE_UNSHIFTED_MAX = 0xFF
}; };