ICU-861 Optimised the backwards iterator and extra boundary checks.

X-SVN-Rev: 5026
This commit is contained in:
Syn Wee Quek 2001-06-20 18:14:51 +00:00
parent f8136daf50
commit 8b2f397e72
3 changed files with 74 additions and 43 deletions

View File

@ -139,15 +139,21 @@ void CollationElementIterator::setText(const UnicodeString& source,
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return; return;
} }
int32_t length = source.length(); int32_t length = source.length();
UChar *string = (UChar *)uprv_malloc(sizeof(UChar) * length); UChar *string = NULL;
source.extract(0, length, string); if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
if (m_data_->isWritable &&
m_data_->iteratordata_.string != NULL) {
uprv_free(m_data_->iteratordata_.string); uprv_free(m_data_->iteratordata_.string);
} }
m_data_->isWritable = TRUE; m_data_->isWritable = TRUE;
if (length > 0) {
string = (UChar *)uprv_malloc(sizeof(UChar) * length);
source.extract(0, length, string);
}
else {
string = (UChar *)uprv_malloc(sizeof(UChar));
*string = 0;
}
init_collIterate(m_data_->iteratordata_.coll, string, length, init_collIterate(m_data_->iteratordata_.coll, string, length,
&m_data_->iteratordata_); &m_data_->iteratordata_);
} }
@ -160,14 +166,22 @@ void CollationElementIterator::setText(CharacterIterator& source,
return; return;
int32_t length = source.getLength(); int32_t length = source.getLength();
UChar *buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); UChar *buffer = NULL;
/*
Using this constructor will prevent buffer from being removed when if (length == 0) {
string gets removed buffer = (UChar *)uprv_malloc(sizeof(UChar));
*/ *buffer = 0;
UnicodeString string; }
source.getText(string); else {
string.extract(0, length, buffer); buffer = (UChar *)uprv_malloc(sizeof(UChar) * length);
/*
Using this constructor will prevent buffer from being removed when
string gets removed
*/
UnicodeString string;
source.getText(string);
string.extract(0, length, buffer);
}
if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) if (m_data_->isWritable && m_data_->iteratordata_.string != NULL)
uprv_free(m_data_->iteratordata_.string); uprv_free(m_data_->iteratordata_.string);
@ -232,10 +246,8 @@ CollationElementIterator::CollationElementIterator(
} }
else { else {
string = (UChar *)uprv_malloc(sizeof(UChar)); string = (UChar *)uprv_malloc(sizeof(UChar));
// null terminate it
*string = 0; *string = 0;
} }
m_data_ = ucol_openElements(order->ucollator, string, length, &status); m_data_ = ucol_openElements(order->ucollator, string, length, &status);
m_data_->isWritable = TRUE; m_data_->isWritable = TRUE;
} }
@ -273,15 +285,21 @@ CollationElementIterator::CollationElementIterator(
} }
*/ */
int32_t length = sourceText.getLength(); int32_t length = sourceText.getLength();
UChar *buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); UChar *buffer;
/* if (length > 0) {
Using this constructor will prevent buffer from being removed when buffer = (UChar *)uprv_malloc(sizeof(UChar) * length);
string gets removed /*
*/ Using this constructor will prevent buffer from being removed when
UnicodeString string(buffer, length, length); string gets removed
((CharacterIterator &)sourceText).getText(string); */
string.extract(0, length, buffer); UnicodeString string(buffer, length, length);
((CharacterIterator &)sourceText).getText(string);
string.extract(0, length, buffer);
}
else {
buffer = (UChar *)uprv_malloc(sizeof(UChar));
*buffer = 0;
}
m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
m_data_->isWritable = TRUE; m_data_->isWritable = TRUE;
} }

View File

@ -897,7 +897,8 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
if (ch == 0) { if (ch == 0) {
// Ran off end of buffer. // Ran off end of buffer.
if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
// Ran off end of main string. // Ran off end of main string. backing up one character.
collationSource->pos--;
return UCOL_NO_MORE_CES; return UCOL_NO_MORE_CES;
} }
else else
@ -1164,9 +1165,8 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
side buffer / original string, and we need to start again to get the side buffer / original string, and we need to start again to get the
next character. next character.
*/ */
for (;;) { for (;;) {
if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { if (data->flags & UCOL_ITER_HASLEN) {
/* /*
Normal path for strings when length is specified. Normal path for strings when length is specified.
Not in side buffer because it is always null terminated. Not in side buffer because it is always null terminated.
@ -1175,10 +1175,14 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
/* End of the main source string */ /* End of the main source string */
return UCOL_NO_MORE_CES; return UCOL_NO_MORE_CES;
} }
data->pos --;
ch = *data->pos;
} }
else { else {
data->pos --;
ch = *data->pos;
/* we are in the side buffer. */ /* we are in the side buffer. */
if (*(data->pos - 1) == 0) { if (ch == 0) {
/* /*
At the start of the normalize side buffer. At the start of the normalize side buffer.
Go back to string. Go back to string.
@ -1196,20 +1200,19 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
continue; continue;
} }
} }
data->pos --;
ch = *(data->pos);
/* /*
* if there's no fcd and/or normalization stuff to do. * got a character to determine if there's fcd and/or normalization
* stuff to do.
* if the current character is not fcd. * if the current character is not fcd.
* if current character is at the start of the string * if current character is at the start of the string
* Trailing combining class == 0. * Trailing combining class == 0.
* Note if pos is in the writablebuffer, norm is always 0 * Note if pos is in the writablebuffer, norm is always 0
*/ */
if ((data->flags & UCOL_ITER_NORM) == 0 || if ((data->flags & UCOL_ITER_NORM) == 0 ||
data->fcdPosition <= data->pos || ch < ZERO_CC_LIMIT_ ||
data->string == data->pos || (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
ch < ZERO_CC_LIMIT_) { || data->string == data->pos) {
break; break;
} }
@ -1246,11 +1249,14 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
contraction contraction
*/ */
if (!isAtStartPrevIterate(data) && ucol_contractionEndCP(ch, coll)) { if (!isAtStartPrevIterate(data) && ucol_contractionEndCP(ch, coll)) {
result = UCOL_CONTRACTION; result = getSpecialPrevCE(coll, UCOL_CONTRACTION, data, status);
} }
else { else {
if (ch <= 0xFF) { if (ch <= 0xFF) {
result = coll->latinOneMapping[ch]; result = coll->latinOneMapping[ch];
if (result > UCOL_NOT_FOUND) {
result = getSpecialPrevCE(coll, result, data, status);
}
} }
else { else {
if ((data->flags & UCOL_ITER_INNORMBUF) == 0 && if ((data->flags & UCOL_ITER_INNORMBUF) == 0 &&
@ -1262,13 +1268,12 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
else { else {
result = ucmp32_get(coll->mapping, ch); result = ucmp32_get(coll->mapping, ch);
} }
} if (result > UCOL_NOT_FOUND) {
} result = getSpecialPrevCE(coll, result, data, status);
}
if (result >= UCOL_NOT_FOUND) { if (result == UCOL_NOT_FOUND) {
result = getSpecialPrevCE(coll, result, data, status); result = ucol_getPrevUCA(ch, data, status);
if (result == UCOL_NOT_FOUND) { }
result = ucol_getPrevUCA(ch, data, status);
} }
} }
} }

View File

@ -56,6 +56,10 @@ ucol_openElements(const UCollator *coll,
result->normalization_ = UNORM_DEFAULT; result->normalization_ = UNORM_DEFAULT;
result->isWritable = FALSE; result->isWritable = FALSE;
if (text == NULL) {
textLength = 0;
}
init_collIterate(coll, text, textLength, &result->iteratordata_); init_collIterate(coll, text, textLength, &result->iteratordata_);
return result; return result;
@ -180,6 +184,10 @@ ucol_setText( UCollationElements *elems,
uprv_free(elems->iteratordata_.string); uprv_free(elems->iteratordata_.string);
} }
if (text == NULL) {
textLength = 0;
}
elems->isWritable = FALSE; elems->isWritable = FALSE;
init_collIterate(elems->iteratordata_.coll, text, textLength, &elems->iteratordata_); init_collIterate(elems->iteratordata_.coll, text, textLength, &elems->iteratordata_);