ICU-861
Cleaning up collation element iterator codes. X-SVN-Rev: 3946
This commit is contained in:
parent
0e1ce27f8f
commit
4196c80eee
@ -1800,13 +1800,16 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
uint32_t length, UErrorCode *status)
|
||||
{
|
||||
uint32_t order;
|
||||
if (ch < 0xFF)
|
||||
if (ch < 0xFF) {
|
||||
order = UCA->latinOneMapping[ch];
|
||||
else
|
||||
}
|
||||
else {
|
||||
order = ucmp32_get(UCA->mapping, ch);
|
||||
}
|
||||
|
||||
if (order >= UCOL_NOT_FOUND)
|
||||
if (order >= UCOL_NOT_FOUND) {
|
||||
order = getSpecialPrevCE(UCA, order, collationSource, length, status);
|
||||
}
|
||||
|
||||
if (order == UCOL_NOT_FOUND)
|
||||
{
|
||||
@ -1819,12 +1822,14 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
uint32_t
|
||||
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
|
||||
LCount = 19, VCount = 21, TCount = 28,
|
||||
NCount = VCount * TCount, // 588
|
||||
SCount = LCount * NCount; // 11172
|
||||
//LLimit = LBase + LCount, // 1113
|
||||
//VLimit = VBase + VCount, // 1176
|
||||
//TLimit = TBase + TCount, // 11C3
|
||||
//SLimit = SBase + SCount; // D7A4
|
||||
NCount = VCount * TCount, /* 588 */
|
||||
SCount = LCount * NCount; /* 11172 */
|
||||
/*
|
||||
LLimit = LBase + LCount, // 1113
|
||||
VLimit = VBase + VCount, // 1176
|
||||
TLimit = TBase + TCount, // 11C3
|
||||
SLimit = SBase + SCount; // D7A4
|
||||
*/
|
||||
|
||||
/*
|
||||
once we have failed to find a match for codepoint cp, and are in the
|
||||
@ -1864,6 +1869,8 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
return *(collationSource->toReturn);
|
||||
} else {
|
||||
/*
|
||||
synwee :TODO
|
||||
Heh heh.... waiting for vladimir's code, me cut and paste
|
||||
Jamo is Special
|
||||
do recursive processing of L, V, and T with fetchCE (but T only if not
|
||||
equal to TBase!!)
|
||||
@ -1893,8 +1900,9 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
{
|
||||
uint32_t cp = ((prevChar << 10UL) + ch - ((0xd800 << 10UL) + 0xdc00));
|
||||
collationSource->pos --;
|
||||
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00))
|
||||
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00)) {
|
||||
return 0; /* illegal code value, use completely ignoreable! */
|
||||
}
|
||||
|
||||
/*
|
||||
This is a code point minus 0x10000, that's what algorithm requires
|
||||
@ -1904,14 +1912,16 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
collationSource->toReturn = collationSource->CEpos;
|
||||
*(collationSource->CEpos ++) = order;
|
||||
}
|
||||
else
|
||||
else {
|
||||
return 0; /* completely ignorable */
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* otherwise */
|
||||
if (UTF_IS_FIRST_SURROGATE(ch) || (ch & 0xFFFE) == 0xFFFE)
|
||||
if (UTF_IS_FIRST_SURROGATE(ch) || (ch & 0xFFFE) == 0xFFFE) {
|
||||
return 0; /* completely ignorable */
|
||||
}
|
||||
|
||||
/* Make up an artifical CE from code point as per UCA */
|
||||
*(collationSource->CEpos ++) = 0xD0800303 | (ch & 0xF000) << 12 |
|
||||
@ -2041,7 +2051,6 @@ uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, U
|
||||
* This function handles the special CEs like contractions, expansions,
|
||||
* surrogates, Thai.
|
||||
* It is called by both getPrevCE and getPrevUCA
|
||||
* synwee
|
||||
*/
|
||||
uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
collIterate *source, uint32_t length,
|
||||
@ -2075,27 +2084,27 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
/*
|
||||
someone else has already allocated something
|
||||
*/
|
||||
if (source->writableBuffer != source->stackWritableBuffer)
|
||||
if (source->writableBuffer != source->stackWritableBuffer) {
|
||||
uprv_free(source->writableBuffer);
|
||||
}
|
||||
source->writableBuffer =
|
||||
(UChar *)uprv_malloc(size * sizeof(UChar));
|
||||
source->isThai = FALSE;
|
||||
}
|
||||
UChar *sourceCopy = source->string;
|
||||
UChar *targetCopy = source->writableBuffer;
|
||||
while (sourceCopy <= strend)
|
||||
{
|
||||
while (sourceCopy <= strend) {
|
||||
if (UCOL_ISTHAIPREVOWEL(*sourceCopy) &&
|
||||
/* This is the combination that needs to be swapped */
|
||||
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1)))
|
||||
{
|
||||
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1))) {
|
||||
*(targetCopy) = *(sourceCopy + 1);
|
||||
*(targetCopy + 1) = *(sourceCopy);
|
||||
targetCopy += 2;
|
||||
sourceCopy += 2;
|
||||
}
|
||||
else
|
||||
else {
|
||||
*(targetCopy ++) = *(sourceCopy ++);
|
||||
}
|
||||
}
|
||||
source->pos = targetCopy;
|
||||
source->len = targetCopy;
|
||||
@ -2123,8 +2132,8 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
constart = UCharOffset = (UChar *)coll->image + getContractOffset(CE);
|
||||
strend = source->len;
|
||||
|
||||
if ((uint32_t)(strend - source->pos) == length)
|
||||
{ /* this is the start of string */
|
||||
if ((uint32_t)(strend - source->pos) == length) {
|
||||
/* this is the start of string */
|
||||
CE = *(coll->contractionCEs +
|
||||
(UCharOffset - coll->contractionIndex));
|
||||
break;
|
||||
@ -2136,11 +2145,11 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
UCharOffset += *UCharOffset;
|
||||
|
||||
schar = *(source->pos - 1);
|
||||
while (schar > (tchar = *UCharOffset))
|
||||
while (schar > (tchar = *UCharOffset)) {
|
||||
UCharOffset ++;
|
||||
}
|
||||
|
||||
if (schar != tchar)
|
||||
{
|
||||
if (schar != tchar) {
|
||||
/*
|
||||
we didn't find the correct codepoint. We can use either the first or
|
||||
the last CE
|
||||
@ -2148,12 +2157,14 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
/* testing if (tchar != 0xFFFF) */
|
||||
UCharOffset = constart;
|
||||
}
|
||||
else
|
||||
else {
|
||||
/* Move up one character */
|
||||
source->pos --;
|
||||
}
|
||||
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
||||
if (!isContraction(CE))
|
||||
if (!isContraction(CE)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case EXPANSION_TAG:
|
||||
@ -2165,16 +2176,20 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
/* find the offset to expansion table */
|
||||
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
||||
size = getExpansionCount(CE);
|
||||
if (size != 0)
|
||||
if (size != 0) {
|
||||
/*
|
||||
if there are less than 16 elements in expansion, we don't terminate
|
||||
*/
|
||||
for (count = 0; count < size; count++)
|
||||
for (count = 0; count < size; count++) {
|
||||
*(source->CEpos ++) = *CEOffset++;
|
||||
else
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* else, we do */
|
||||
while (*CEOffset != 0)
|
||||
while (*CEOffset != 0) {
|
||||
*(source->CEpos ++) = *CEOffset ++;
|
||||
}
|
||||
}
|
||||
source->toReturn = source->CEpos - 1;
|
||||
return *(source->toReturn);
|
||||
case CHARSET_TAG:
|
||||
@ -2185,7 +2200,9 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
CE=0;
|
||||
break;
|
||||
}
|
||||
if (CE <= UCOL_NOT_FOUND) break;
|
||||
if (CE <= UCOL_NOT_FOUND) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return CE;
|
||||
}
|
||||
|
@ -45,16 +45,18 @@ ucol_openElements(const UCollator *coll,
|
||||
{
|
||||
UCollationElements *result;
|
||||
|
||||
if (U_FAILURE(*status))
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
|
||||
|
||||
result->collator_ = coll;
|
||||
|
||||
/* gets the correct length of the null-terminated string */
|
||||
if (textLength == -1)
|
||||
if (textLength == -1) {
|
||||
textLength = u_strlen(text);
|
||||
}
|
||||
|
||||
result->length_ = textLength;
|
||||
result->reset_ = TRUE;
|
||||
@ -67,10 +69,13 @@ U_CAPI void
|
||||
ucol_closeElements(UCollationElements *elems)
|
||||
{
|
||||
collIterate *ci = &elems->iteratordata_;
|
||||
if (ci->writableBuffer != ci->stackWritableBuffer)
|
||||
if (ci->writableBuffer != ci->stackWritableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
}
|
||||
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
uprv_free(elems);
|
||||
}
|
||||
|
||||
@ -84,8 +89,7 @@ ucol_reset(UCollationElements *elems)
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
|
||||
ci->isThai = TRUE;
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer)
|
||||
{
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
ci->writableBuffer = ci->stackWritableBuffer;
|
||||
}
|
||||
@ -96,14 +100,15 @@ ucol_next(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
int32_t result;
|
||||
if (U_FAILURE(*status))
|
||||
if (U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
UCOL_GETNEXTCE(result, elems->collator_, elems->iteratordata_, status);
|
||||
|
||||
/* testing
|
||||
#ifdef UCOL_DEBUG
|
||||
if ((elems->iteratordata_).CEpos > (elems->iteratordata_).toReturn)
|
||||
{
|
||||
result = *((elems->iteratordata_).toReturn++);
|
||||
@ -130,10 +135,11 @@ ucol_next(UCollationElements *elems,
|
||||
}
|
||||
else
|
||||
(result) = UCOL_NO_MORE_CES;
|
||||
*/
|
||||
#endif
|
||||
|
||||
if (result == UCOL_NO_MORE_CES)
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -141,8 +147,9 @@ U_CAPI int32_t
|
||||
ucol_previous(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status))
|
||||
if(U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t result;
|
||||
@ -156,8 +163,7 @@ ucol_previous(UCollationElements *elems,
|
||||
UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_,
|
||||
elems->length_, status);
|
||||
|
||||
/* synwee : to be removed, only for testing
|
||||
|
||||
#ifdef UCOL_DEBUG
|
||||
const UCollator *coll = elems->collator_;
|
||||
collIterate *data = &(elems->iteratordata_);
|
||||
int32_t length = elems->length_;
|
||||
@ -191,10 +197,11 @@ ucol_previous(UCollationElements *elems,
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
if (result == UCOL_NO_MORE_CES)
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -215,17 +222,22 @@ ucol_setText( UCollationElements *elems,
|
||||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status))
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* gets the correct length of the null-terminated string */
|
||||
if (textLength == -1)
|
||||
if (textLength == -1) {
|
||||
textLength = u_strlen(text);
|
||||
}
|
||||
|
||||
elems->length_ = textLength;
|
||||
|
||||
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
|
||||
init_collIterate(text, textLength, &elems->iteratordata_, FALSE);
|
||||
|
||||
elems->reset_ = TRUE;
|
||||
@ -235,8 +247,9 @@ U_CAPI UTextOffset
|
||||
ucol_getOffset(const UCollationElements *elems)
|
||||
{
|
||||
const collIterate *ci = &(elems->iteratordata_);
|
||||
if (ci->isThai == TRUE)
|
||||
if (ci->isThai == TRUE) {
|
||||
return ci->pos - ci->string;
|
||||
}
|
||||
|
||||
return ci->pos - ci->writableBuffer;
|
||||
}
|
||||
@ -246,8 +259,9 @@ ucol_setOffset(UCollationElements *elems,
|
||||
UTextOffset offset,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status))
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
ci->pos = ci->string + offset;
|
||||
|
Loading…
Reference in New Issue
Block a user