Cleaning up collation element iterator codes.

X-SVN-Rev: 3946
This commit is contained in:
Syn Wee Quek 2001-03-07 21:01:53 +00:00
parent 0e1ce27f8f
commit 4196c80eee
2 changed files with 80 additions and 49 deletions

View File

@ -1800,13 +1800,16 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
uint32_t length, UErrorCode *status)
{
uint32_t order;
if (ch < 0xFF)
if (ch < 0xFF) {
order = UCA->latinOneMapping[ch];
else
}
else {
order = ucmp32_get(UCA->mapping, ch);
}
if (order >= UCOL_NOT_FOUND)
if (order >= UCOL_NOT_FOUND) {
order = getSpecialPrevCE(UCA, order, collationSource, length, status);
}
if (order == UCOL_NOT_FOUND)
{
@ -1819,12 +1822,14 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
uint32_t
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
LCount = 19, VCount = 21, TCount = 28,
NCount = VCount * TCount, // 588
SCount = LCount * NCount; // 11172
//LLimit = LBase + LCount, // 1113
//VLimit = VBase + VCount, // 1176
//TLimit = TBase + TCount, // 11C3
//SLimit = SBase + SCount; // D7A4
NCount = VCount * TCount, /* 588 */
SCount = LCount * NCount; /* 11172 */
/*
LLimit = LBase + LCount, // 1113
VLimit = VBase + VCount, // 1176
TLimit = TBase + TCount, // 11C3
SLimit = SBase + SCount; // D7A4
*/
/*
once we have failed to find a match for codepoint cp, and are in the
@ -1864,6 +1869,8 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
return *(collationSource->toReturn);
} else {
/*
synwee :TODO
Heh heh.... waiting for vladimir's code, me cut and paste
Jamo is Special
do recursive processing of L, V, and T with fetchCE (but T only if not
equal to TBase!!)
@ -1893,8 +1900,9 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
{
uint32_t cp = ((prevChar << 10UL) + ch - ((0xd800 << 10UL) + 0xdc00));
collationSource->pos --;
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00))
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00)) {
return 0; /* illegal code value, use completely ignoreable! */
}
/*
This is a code point minus 0x10000, that's what algorithm requires
@ -1904,14 +1912,16 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
collationSource->toReturn = collationSource->CEpos;
*(collationSource->CEpos ++) = order;
}
else
else {
return 0; /* completely ignorable */
}
}
else
{
/* otherwise */
if (UTF_IS_FIRST_SURROGATE(ch) || (ch & 0xFFFE) == 0xFFFE)
if (UTF_IS_FIRST_SURROGATE(ch) || (ch & 0xFFFE) == 0xFFFE) {
return 0; /* completely ignorable */
}
/* Make up an artifical CE from code point as per UCA */
*(collationSource->CEpos ++) = 0xD0800303 | (ch & 0xF000) << 12 |
@ -2041,7 +2051,6 @@ uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, U
* This function handles the special CEs like contractions, expansions,
* surrogates, Thai.
* It is called by both getPrevCE and getPrevUCA
* synwee
*/
uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
collIterate *source, uint32_t length,
@ -2075,27 +2084,27 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
/*
someone else has already allocated something
*/
if (source->writableBuffer != source->stackWritableBuffer)
if (source->writableBuffer != source->stackWritableBuffer) {
uprv_free(source->writableBuffer);
}
source->writableBuffer =
(UChar *)uprv_malloc(size * sizeof(UChar));
source->isThai = FALSE;
}
UChar *sourceCopy = source->string;
UChar *targetCopy = source->writableBuffer;
while (sourceCopy <= strend)
{
while (sourceCopy <= strend) {
if (UCOL_ISTHAIPREVOWEL(*sourceCopy) &&
/* This is the combination that needs to be swapped */
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1)))
{
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1))) {
*(targetCopy) = *(sourceCopy + 1);
*(targetCopy + 1) = *(sourceCopy);
targetCopy += 2;
sourceCopy += 2;
}
else
else {
*(targetCopy ++) = *(sourceCopy ++);
}
}
source->pos = targetCopy;
source->len = targetCopy;
@ -2123,8 +2132,8 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
constart = UCharOffset = (UChar *)coll->image + getContractOffset(CE);
strend = source->len;
if ((uint32_t)(strend - source->pos) == length)
{ /* this is the start of string */
if ((uint32_t)(strend - source->pos) == length) {
/* this is the start of string */
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
break;
@ -2136,11 +2145,11 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
UCharOffset += *UCharOffset;
schar = *(source->pos - 1);
while (schar > (tchar = *UCharOffset))
while (schar > (tchar = *UCharOffset)) {
UCharOffset ++;
}
if (schar != tchar)
{
if (schar != tchar) {
/*
we didn't find the correct codepoint. We can use either the first or
the last CE
@ -2148,12 +2157,14 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
/* testing if (tchar != 0xFFFF) */
UCharOffset = constart;
}
else
else {
/* Move up one character */
source->pos --;
}
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
if (!isContraction(CE))
if (!isContraction(CE)) {
break;
}
}
break;
case EXPANSION_TAG:
@ -2165,16 +2176,20 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
/* find the offset to expansion table */
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
size = getExpansionCount(CE);
if (size != 0)
if (size != 0) {
/*
if there are less than 16 elements in expansion, we don't terminate
*/
for (count = 0; count < size; count++)
for (count = 0; count < size; count++) {
*(source->CEpos ++) = *CEOffset++;
else
}
}
else {
/* else, we do */
while (*CEOffset != 0)
while (*CEOffset != 0) {
*(source->CEpos ++) = *CEOffset ++;
}
}
source->toReturn = source->CEpos - 1;
return *(source->toReturn);
case CHARSET_TAG:
@ -2185,7 +2200,9 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
CE=0;
break;
}
if (CE <= UCOL_NOT_FOUND) break;
if (CE <= UCOL_NOT_FOUND) {
break;
}
}
return CE;
}

View File

@ -45,16 +45,18 @@ ucol_openElements(const UCollator *coll,
{
UCollationElements *result;
if (U_FAILURE(*status))
if (U_FAILURE(*status)) {
return NULL;
}
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
result->collator_ = coll;
/* gets the correct length of the null-terminated string */
if (textLength == -1)
if (textLength == -1) {
textLength = u_strlen(text);
}
result->length_ = textLength;
result->reset_ = TRUE;
@ -67,10 +69,13 @@ U_CAPI void
ucol_closeElements(UCollationElements *elems)
{
collIterate *ci = &elems->iteratordata_;
if (ci->writableBuffer != ci->stackWritableBuffer)
if (ci->writableBuffer != ci->stackWritableBuffer) {
uprv_free(ci->writableBuffer);
}
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
uprv_free(elems);
}
@ -84,8 +89,7 @@ ucol_reset(UCollationElements *elems)
ci->CEpos = ci->toReturn = ci->CEs;
ci->isThai = TRUE;
if (ci->stackWritableBuffer != ci->writableBuffer)
{
if (ci->stackWritableBuffer != ci->writableBuffer) {
uprv_free(ci->writableBuffer);
ci->writableBuffer = ci->stackWritableBuffer;
}
@ -96,14 +100,15 @@ ucol_next(UCollationElements *elems,
UErrorCode *status)
{
int32_t result;
if (U_FAILURE(*status))
if (U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
elems->reset_ = FALSE;
UCOL_GETNEXTCE(result, elems->collator_, elems->iteratordata_, status);
/* testing
#ifdef UCOL_DEBUG
if ((elems->iteratordata_).CEpos > (elems->iteratordata_).toReturn)
{
result = *((elems->iteratordata_).toReturn++);
@ -130,10 +135,11 @@ ucol_next(UCollationElements *elems,
}
else
(result) = UCOL_NO_MORE_CES;
*/
#endif
if (result == UCOL_NO_MORE_CES)
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
}
@ -141,8 +147,9 @@ U_CAPI int32_t
ucol_previous(UCollationElements *elems,
UErrorCode *status)
{
if(U_FAILURE(*status))
if(U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
else
{
int32_t result;
@ -156,8 +163,7 @@ ucol_previous(UCollationElements *elems,
UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_,
elems->length_, status);
/* synwee : to be removed, only for testing
#ifdef UCOL_DEBUG
const UCollator *coll = elems->collator_;
collIterate *data = &(elems->iteratordata_);
int32_t length = elems->length_;
@ -191,10 +197,11 @@ ucol_previous(UCollationElements *elems,
}
}
}
*/
#endif
if (result == UCOL_NO_MORE_CES)
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
}
@ -215,17 +222,22 @@ ucol_setText( UCollationElements *elems,
int32_t textLength,
UErrorCode *status)
{
if (U_FAILURE(*status))
if (U_FAILURE(*status)) {
return;
}
/* gets the correct length of the null-terminated string */
if (textLength == -1)
if (textLength == -1) {
textLength = u_strlen(text);
}
elems->length_ = textLength;
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
init_collIterate(text, textLength, &elems->iteratordata_, FALSE);
elems->reset_ = TRUE;
@ -235,8 +247,9 @@ U_CAPI UTextOffset
ucol_getOffset(const UCollationElements *elems)
{
const collIterate *ci = &(elems->iteratordata_);
if (ci->isThai == TRUE)
if (ci->isThai == TRUE) {
return ci->pos - ci->string;
}
return ci->pos - ci->writableBuffer;
}
@ -246,8 +259,9 @@ ucol_setOffset(UCollationElements *elems,
UTextOffset offset,
UErrorCode *status)
{
if (U_FAILURE(*status))
if (U_FAILURE(*status)) {
return;
}
collIterate *ci = &(elems->iteratordata_);
ci->pos = ci->string + offset;