ICU-861
Updated backwards collation element iterator codes. X-SVN-Rev: 3760
This commit is contained in:
parent
6d5b35e584
commit
ec4c07eeb0
@ -7,7 +7,6 @@
|
||||
* Date Name Comments
|
||||
* 02/16/2001 synwee Added internal method getPrevSpecialCE
|
||||
*/
|
||||
|
||||
#include "ucolimp.h"
|
||||
#include "ucoltok.h"
|
||||
|
||||
@ -1068,12 +1067,13 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
|
||||
T += TBase;
|
||||
|
||||
// return the first CE, but first put the rest into the expansion buffer
|
||||
|
||||
if (!collationSource->JamoSpecial) { // FAST PATH
|
||||
|
||||
*(collationSource->CEpos++) = ucmp32_get(UCA->mapping, V);
|
||||
if (T != TBase) {
|
||||
*(collationSource->CEpos++) = ucmp32_get(UCA->mapping, T);
|
||||
}
|
||||
|
||||
return ucmp32_get(UCA->mapping, L); // return first one
|
||||
|
||||
} else { // Jamo is Special
|
||||
@ -1103,6 +1103,7 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
|
||||
}
|
||||
/* This is a code point minus 0x10000, that's what algorithm requires */
|
||||
order = 0xE0010303 | (cp & 0xFFE00) << 8;
|
||||
|
||||
*(collationSource->CEpos++) = 0x80200080 | (cp & 0x001FF) << 22;
|
||||
} else {
|
||||
return 0; /* completely ignorable */
|
||||
@ -1144,7 +1145,7 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
We have to check if ch is possibly a first surrogate - then we need to
|
||||
take the next code unit and make a bigger CE
|
||||
*/
|
||||
UChar nextChar;
|
||||
UChar prevChar;
|
||||
const int
|
||||
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
|
||||
LCount = 19, VCount = 21, TCount = 28,
|
||||
@ -1184,11 +1185,13 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
*/
|
||||
if (!collationSource->JamoSpecial)
|
||||
{
|
||||
*(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, L);
|
||||
*(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, V);
|
||||
if (T != TBase)
|
||||
*(collationSource->CEpos++) = ucmp32_get(UCA->mapping, T);
|
||||
/* return first one */
|
||||
return ucmp32_get(UCA->mapping, L);
|
||||
*(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, T);
|
||||
|
||||
collationSource->toReturn = collationSource->CEpos - 1;
|
||||
return *(collationSource->toReturn);
|
||||
} else {
|
||||
/*
|
||||
Jamo is Special
|
||||
@ -1213,28 +1216,23 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
|
||||
if (UTF_IS_SECOND_SURROGATE(ch))
|
||||
{
|
||||
if ((collationSource->len - collationSource->pos != length) &&
|
||||
(UTF_IS_FIRST_SURROGATE(nextChar = *collationSource->pos)))
|
||||
UChar *temp = collationSource->pos;
|
||||
if (((collationSource->string < temp) ||
|
||||
(collationSource->writableBuffer < temp)) &&
|
||||
(UTF_IS_FIRST_SURROGATE(prevChar = *(collationSource->pos - 1))))
|
||||
{
|
||||
uint32_t cp = ((ch << 10UL) + nextChar - ((0xd800 << 10UL) + 0xdc00));
|
||||
if (collationSource->pos != collationSource->writableBuffer)
|
||||
collationSource->pos --;
|
||||
else
|
||||
{
|
||||
collationSource->pos = collationSource->string +
|
||||
(length - (collationSource->len - collationSource->writableBuffer));
|
||||
collationSource->len = collationSource->string + length;
|
||||
collationSource->isThai = TRUE;
|
||||
}
|
||||
uint32_t cp = ((prevChar << 10UL) + ch - ((0xd800 << 10UL) + 0xdc00));
|
||||
collationSource->pos --;
|
||||
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00))
|
||||
return 0; /* illegal code value, use completely ignoreable! */
|
||||
|
||||
/*
|
||||
This is a code point minus 0x10000, that's what algorithm requires
|
||||
*/
|
||||
order = 0xE0010303 | (cp & 0xFFE00) << 8;
|
||||
*(collationSource->CEpos ++) = 0x80200080 | (cp & 0x001FF) << 22;
|
||||
collationSource->toReturn ++;
|
||||
*(collationSource->CEpos ++) = 0xE0010303 | (cp & 0xFFE00) << 8;
|
||||
order = 0x80200080 | (cp & 0x001FF) << 22;
|
||||
collationSource->toReturn = collationSource->CEpos;
|
||||
*(collationSource->CEpos ++) = order;
|
||||
}
|
||||
else
|
||||
return 0; /* completely ignorable */
|
||||
@ -1246,9 +1244,11 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
return 0; /* completely ignorable */
|
||||
|
||||
/* Make up an artifical CE from code point as per UCA */
|
||||
order = 0xD08003C3 | (ch & 0xF000) << 12 | (ch & 0x0FE0) << 11;
|
||||
*(collationSource->CEpos ++) = 0x04000080 | (ch & 0x001F) << 27;
|
||||
collationSource->toReturn ++;
|
||||
*(collationSource->CEpos ++) = 0xD08003C3 | (ch & 0xF000) << 12 |
|
||||
(ch & 0x0FE0) << 11;
|
||||
collationSource->toReturn = collationSource->CEpos;
|
||||
order = 0x04000080 | (ch & 0x001F) << 27;
|
||||
*(collationSource->CEpos ++) = order;
|
||||
}
|
||||
}
|
||||
return order; /* return the CE */
|
||||
@ -1397,12 +1397,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
if (source->isThai == TRUE)
|
||||
{ /* if we encountered Thai prevowel & the string is not yet touched */
|
||||
source->isThai = FALSE;
|
||||
/*
|
||||
sigh... to cater for getNextCE, we'll have to modify and store the
|
||||
whole string instead of a substring as in getSpecialCE
|
||||
*/
|
||||
UCharOffset = source->pos;
|
||||
strend = source->len;
|
||||
strend = source->pos;
|
||||
size = strend - source->string;
|
||||
if (size > UCOL_WRITABLE_BUFFER_SIZE)
|
||||
{
|
||||
@ -1417,22 +1412,21 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
}
|
||||
UChar *sourceCopy = source->string;
|
||||
UChar *targetCopy = source->writableBuffer;
|
||||
while (sourceCopy < strend)
|
||||
while (sourceCopy <= strend)
|
||||
{
|
||||
if (UCOL_ISTHAIPREVOWEL(*sourceCopy) &&
|
||||
/* This is the combination that needs to be swapped */
|
||||
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1)))
|
||||
{
|
||||
*(targetCopy) = *(sourceCopy + count + 1);
|
||||
*(targetCopy+1) = *(sourceCopy + count);
|
||||
targetCopy+=2;
|
||||
sourceCopy+=2;
|
||||
*(targetCopy) = *(sourceCopy + 1);
|
||||
*(targetCopy + 1) = *(sourceCopy);
|
||||
targetCopy += 2;
|
||||
sourceCopy += 2;
|
||||
}
|
||||
else
|
||||
*(targetCopy++) = *(sourceCopy++);
|
||||
*(targetCopy ++) = *(sourceCopy ++);
|
||||
}
|
||||
source->pos = source->writableBuffer +
|
||||
(UCharOffset - source->string);
|
||||
source->pos = targetCopy;
|
||||
source->len = targetCopy;
|
||||
source->CEpos = source->toReturn = source->CEs;
|
||||
CE = UCOL_IGNORABLE;
|
||||
@ -1470,32 +1464,22 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
*/
|
||||
UCharOffset += *UCharOffset;
|
||||
|
||||
schar = *source->pos;
|
||||
schar = *(source->pos - 1);
|
||||
while (schar > (tchar = *UCharOffset))
|
||||
UCharOffset ++;
|
||||
|
||||
if (schar != tchar)
|
||||
{
|
||||
{
|
||||
/*
|
||||
we didn't find the correct codepoint. We can use either the first or
|
||||
the last CE
|
||||
*/
|
||||
if (tchar != 0xFFFF)
|
||||
UCharOffset = constart;
|
||||
/* testing if (tchar != 0xFFFF) */
|
||||
UCharOffset = constart;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Move up one character */
|
||||
if (source->pos != source->writableBuffer)
|
||||
source->pos --;
|
||||
else
|
||||
{
|
||||
source->pos = source->string +
|
||||
(length - (source->len - source->writableBuffer));
|
||||
source->len = source->string + length;
|
||||
source->isThai = TRUE;
|
||||
}
|
||||
}
|
||||
source->pos --;
|
||||
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
||||
if (!isContraction(CE))
|
||||
break;
|
||||
@ -1521,7 +1505,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
while (*CEOffset != 0)
|
||||
*(source->CEpos ++) = *CEOffset ++;
|
||||
source->toReturn = source->CEpos - 1;
|
||||
return *(source->toReturn --);
|
||||
return *(source->toReturn);
|
||||
case CHARSET_TAG:
|
||||
/* probably after 1.8 */
|
||||
return UCOL_NOT_FOUND;
|
||||
|
@ -57,6 +57,7 @@ ucol_openElements(const UCollator *coll,
|
||||
textLength = u_strlen(text);
|
||||
|
||||
result->length_ = textLength;
|
||||
result->reset_ = TRUE;
|
||||
init_collIterate(text, textLength, &result->iteratordata_, FALSE);
|
||||
|
||||
return result;
|
||||
@ -77,6 +78,7 @@ U_CAPI void
|
||||
ucol_reset(UCollationElements *elems)
|
||||
{
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
elems->reset_ = TRUE;
|
||||
ci->pos = ci->string;
|
||||
ci->len = ci->string + elems->length_;
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
@ -97,40 +99,42 @@ U_CAPI int32_t
|
||||
ucol_next(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
int32_t result;
|
||||
if (U_FAILURE(*status))
|
||||
return UCOL_NULLORDER;
|
||||
|
||||
int32_t result;
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
UCOL_GETNEXTCE(result, elems->collator_, elems->iteratordata_, status);
|
||||
/*
|
||||
if ((elems->iteratordata_).CEpos > (elems->iteratordata_).toReturn)
|
||||
{
|
||||
result = *((elems->iteratordata_).toReturn++);
|
||||
if ((elems->iteratordata_).CEpos == (elems->iteratordata_).toReturn)
|
||||
(elems->iteratordata_).CEpos = (elems->iteratordata_).toReturn =
|
||||
(elems->iteratordata_).CEs;
|
||||
}
|
||||
else
|
||||
if ((elems->iteratordata_).pos < (elems->iteratordata_).len)
|
||||
{
|
||||
UChar ch = *(elems->iteratordata_).pos++;
|
||||
if (ch <= 0xFF)
|
||||
(result) = (elems->collator_)->latinOneMapping[ch];
|
||||
else
|
||||
(result) = ucmp32_get((elems->collator_)->mapping, ch);
|
||||
|
||||
if((result) >= UCOL_NOT_FOUND)
|
||||
{
|
||||
(result) = getSpecialCE((elems->collator_), (result),
|
||||
&(elems->iteratordata_), (status));
|
||||
if ((result) == UCOL_NOT_FOUND)
|
||||
(result) = ucol_getNextUCA(ch, &(elems->iteratordata_), (status));
|
||||
}
|
||||
{
|
||||
result = *((elems->iteratordata_).toReturn++);
|
||||
if ((elems->iteratordata_).CEpos == (elems->iteratordata_).toReturn)
|
||||
(elems->iteratordata_).CEpos = (elems->iteratordata_).toReturn =
|
||||
(elems->iteratordata_).CEs;
|
||||
}
|
||||
else
|
||||
(result) = UCOL_NO_MORE_CES;
|
||||
else
|
||||
if ((elems->iteratordata_).pos < (elems->iteratordata_).len)
|
||||
{
|
||||
UChar ch = *(elems->iteratordata_).pos++;
|
||||
if (ch <= 0xFF)
|
||||
(result) = (elems->collator_)->latinOneMapping[ch];
|
||||
else
|
||||
(result) = ucmp32_get((elems->collator_)->mapping, ch);
|
||||
|
||||
if((result) >= UCOL_NOT_FOUND)
|
||||
{
|
||||
(result) = getSpecialCE((elems->collator_), (result),
|
||||
&(elems->iteratordata_), (status));
|
||||
if ((result) == UCOL_NOT_FOUND)
|
||||
(result) = ucol_getNextUCA(ch, &(elems->iteratordata_), (status));
|
||||
}
|
||||
}
|
||||
else
|
||||
(result) = UCOL_NO_MORE_CES;
|
||||
*/
|
||||
|
||||
|
||||
if (result == UCOL_NO_MORE_CES)
|
||||
result = UCOL_NULLORDER;
|
||||
return result;
|
||||
@ -142,62 +146,61 @@ ucol_previous(UCollationElements *elems,
|
||||
{
|
||||
if(U_FAILURE(*status))
|
||||
return UCOL_NULLORDER;
|
||||
else
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
int32_t result;
|
||||
UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_,
|
||||
elems->length_, status);
|
||||
if (elems->reset_ &&
|
||||
(elems->iteratordata_.pos == elems->iteratordata_.string))
|
||||
elems->iteratordata_.pos = elems->iteratordata_.len;
|
||||
|
||||
/* synwee : to be removed, only for testing
|
||||
const UCollator *coll = elems->collator_;
|
||||
collIterate *data = &(elems->iteratordata_);
|
||||
int32_t length = elems->length_;
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
if (data->CEpos > data->CEs)
|
||||
{
|
||||
data->toReturn --;
|
||||
(result) = *(data->toReturn);
|
||||
if (data->CEs == data->toReturn)
|
||||
data->CEpos = data->toReturn = data->CEs;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* pointers are always at the next position to be retrieved for getnextce
|
||||
for every first previous step after a next, value returned will the same
|
||||
as the last next value
|
||||
*/
|
||||
/*if (data->len - data->pos == length)
|
||||
(result) = UCOL_NO_MORE_CES;
|
||||
UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_,
|
||||
elems->length_, status);
|
||||
|
||||
/* synwee : to be removed, only for testing */
|
||||
/*
|
||||
const UCollator *coll = elems->collator_;
|
||||
collIterate *data = &(elems->iteratordata_);
|
||||
int32_t length = elems->length_;
|
||||
|
||||
if (data->CEpos > data->CEs)
|
||||
{
|
||||
data->toReturn --;
|
||||
(result) = *(data->toReturn);
|
||||
if (data->CEs == data->toReturn)
|
||||
data->CEpos = data->toReturn = data->CEs;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data->pos != data->writableBuffer)
|
||||
data->pos --;
|
||||
{
|
||||
if (data->pos == data->string || data->pos == data->writableBuffer)
|
||||
(result) = UCOL_NO_MORE_CES;
|
||||
else
|
||||
{
|
||||
data->pos = data->string +
|
||||
(length - (data->len - data->writableBuffer));
|
||||
data->len = data->string + length;
|
||||
data->isThai = TRUE;
|
||||
}
|
||||
|
||||
UChar ch = *(data->pos);
|
||||
if (ch <= 0xFF)
|
||||
(result) = (coll)->latinOneMapping[ch];
|
||||
else
|
||||
(result) = ucmp32_get((coll)->mapping, ch);
|
||||
{
|
||||
data->pos --;
|
||||
|
||||
UChar ch = *(data->pos);
|
||||
if (ch <= 0xFF)
|
||||
(result) = (coll)->latinOneMapping[ch];
|
||||
else
|
||||
(result) = ucmp32_get((coll)->mapping, ch);
|
||||
|
||||
if ((result) >= UCOL_NOT_FOUND)
|
||||
{
|
||||
(result) = getSpecialPrevCE(coll, result, data, length, status);
|
||||
if ((result) == UCOL_NOT_FOUND)
|
||||
(result) = ucol_getPrevUCA(ch, data, length, status);
|
||||
}
|
||||
}
|
||||
} */
|
||||
if ((result) >= UCOL_NOT_FOUND)
|
||||
{
|
||||
(result) = getSpecialPrevCE(coll, result, data, length, status);
|
||||
if ((result) == UCOL_NOT_FOUND)
|
||||
(result) = ucol_getPrevUCA(ch, data, length, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
if (result == UCOL_NO_MORE_CES)
|
||||
result = UCOL_NULLORDER;
|
||||
|
||||
if (result == UCOL_NO_MORE_CES)
|
||||
result = UCOL_NULLORDER;
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
@ -240,14 +243,7 @@ ucol_getOffset(const UCollationElements *elems)
|
||||
if (ci->isThai == TRUE)
|
||||
return ci->pos - ci->string;
|
||||
|
||||
/*
|
||||
if it is a thai string with reversed elements, since getNextCE does not
|
||||
store only a substring in writeablebuffer, we'll have to do some calculation
|
||||
to get the offset out.
|
||||
need discussion to see if it is a better idea to store the whole string
|
||||
instead.
|
||||
*/
|
||||
return elems->length_ - (ci->len - ci->pos);
|
||||
return ci->pos - ci->writableBuffer;
|
||||
}
|
||||
|
||||
U_CAPI void
|
||||
|
@ -87,6 +87,10 @@ struct UCollationElements
|
||||
* Source text length
|
||||
*/
|
||||
int32_t length_;
|
||||
/**
|
||||
* Indicates if this data has been reset.
|
||||
*/
|
||||
UBool reset_;
|
||||
};
|
||||
|
||||
struct incrementalContext {
|
||||
@ -240,20 +244,12 @@ struct incrementalContext {
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
if ((data).len - (data).pos == length) { \
|
||||
if ((data).pos == (data).string || (data).pos == (data).writableBuffer) {\
|
||||
(order) = UCOL_NO_MORE_CES; \
|
||||
} \
|
||||
else { \
|
||||
UChar ch; \
|
||||
if ((data).pos != (data).writableBuffer) { \
|
||||
(data).pos --; \
|
||||
} \
|
||||
else { \
|
||||
(data).pos = (data).string + \
|
||||
(length - ((data).len - (data).writableBuffer)); \
|
||||
(data).len = (data).string + length; \
|
||||
(data).isThai = TRUE; \
|
||||
} \
|
||||
(data).pos --; \
|
||||
ch = *((data).pos); \
|
||||
if (ch <= 0xFF) { \
|
||||
(order) = (coll)->latinOneMapping[ch]; \
|
||||
|
Loading…
Reference in New Issue
Block a user