From 8b2f397e7231d5229563c019aa56d18e1708271a Mon Sep 17 00:00:00 2001 From: Syn Wee Quek Date: Wed, 20 Jun 2001 18:14:51 +0000 Subject: [PATCH] ICU-861 Optimised the backwards iterator and extra boundary checks. X-SVN-Rev: 5026 --- icu4c/source/i18n/coleitr.cpp | 66 +++++++++++++++++++++------------- icu4c/source/i18n/ucol.cpp | 43 ++++++++++++---------- icu4c/source/i18n/ucoleitr.cpp | 8 +++++ 3 files changed, 74 insertions(+), 43 deletions(-) diff --git a/icu4c/source/i18n/coleitr.cpp b/icu4c/source/i18n/coleitr.cpp index eef528979a..dc6ee74077 100644 --- a/icu4c/source/i18n/coleitr.cpp +++ b/icu4c/source/i18n/coleitr.cpp @@ -139,15 +139,21 @@ void CollationElementIterator::setText(const UnicodeString& source, if (U_FAILURE(status)) { return; } + int32_t length = source.length(); - UChar *string = (UChar *)uprv_malloc(sizeof(UChar) * length); - source.extract(0, length, string); - - if (m_data_->isWritable && - m_data_->iteratordata_.string != NULL) { + UChar *string = NULL; + if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { uprv_free(m_data_->iteratordata_.string); } m_data_->isWritable = TRUE; + if (length > 0) { + string = (UChar *)uprv_malloc(sizeof(UChar) * length); + source.extract(0, length, string); + } + else { + string = (UChar *)uprv_malloc(sizeof(UChar)); + *string = 0; + } init_collIterate(m_data_->iteratordata_.coll, string, length, &m_data_->iteratordata_); } @@ -160,14 +166,22 @@ void CollationElementIterator::setText(CharacterIterator& source, return; int32_t length = source.getLength(); - UChar *buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); - /* - Using this constructor will prevent buffer from being removed when - string gets removed - */ - UnicodeString string; - source.getText(string); - string.extract(0, length, buffer); + UChar *buffer = NULL; + + if (length == 0) { + buffer = (UChar *)uprv_malloc(sizeof(UChar)); + *buffer = 0; + } + else { + buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); + /* + Using this constructor will prevent buffer from being removed when + string gets removed + */ + UnicodeString string; + source.getText(string); + string.extract(0, length, buffer); + } if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) uprv_free(m_data_->iteratordata_.string); @@ -232,10 +246,8 @@ CollationElementIterator::CollationElementIterator( } else { string = (UChar *)uprv_malloc(sizeof(UChar)); - // null terminate it *string = 0; } - m_data_ = ucol_openElements(order->ucollator, string, length, &status); m_data_->isWritable = TRUE; } @@ -273,15 +285,21 @@ CollationElementIterator::CollationElementIterator( } */ int32_t length = sourceText.getLength(); - UChar *buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); - /* - Using this constructor will prevent buffer from being removed when - string gets removed - */ - UnicodeString string(buffer, length, length); - ((CharacterIterator &)sourceText).getText(string); - string.extract(0, length, buffer); - + UChar *buffer; + if (length > 0) { + buffer = (UChar *)uprv_malloc(sizeof(UChar) * length); + /* + Using this constructor will prevent buffer from being removed when + string gets removed + */ + UnicodeString string(buffer, length, length); + ((CharacterIterator &)sourceText).getText(string); + string.extract(0, length, buffer); + } + else { + buffer = (UChar *)uprv_malloc(sizeof(UChar)); + *buffer = 0; + } m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); m_data_->isWritable = TRUE; } diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 4754b09f27..51b316a680 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -897,7 +897,8 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou if (ch == 0) { // Ran off end of buffer. if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { - // Ran off end of main string. + // Ran off end of main string. backing up one character. + collationSource->pos--; return UCOL_NO_MORE_CES; } else @@ -1164,9 +1165,8 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, side buffer / original string, and we need to start again to get the next character. */ - for (;;) { - if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { + if (data->flags & UCOL_ITER_HASLEN) { /* Normal path for strings when length is specified. Not in side buffer because it is always null terminated. @@ -1175,10 +1175,14 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, /* End of the main source string */ return UCOL_NO_MORE_CES; } + data->pos --; + ch = *data->pos; } else { + data->pos --; + ch = *data->pos; /* we are in the side buffer. */ - if (*(data->pos - 1) == 0) { + if (ch == 0) { /* At the start of the normalize side buffer. Go back to string. @@ -1196,20 +1200,19 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, continue; } } - data->pos --; - ch = *(data->pos); - + /* - * if there's no fcd and/or normalization stuff to do. + * got a character to determine if there's fcd and/or normalization + * stuff to do. * if the current character is not fcd. * if current character is at the start of the string * Trailing combining class == 0. * Note if pos is in the writablebuffer, norm is always 0 */ if ((data->flags & UCOL_ITER_NORM) == 0 || - data->fcdPosition <= data->pos || - data->string == data->pos || - ch < ZERO_CC_LIMIT_) { + ch < ZERO_CC_LIMIT_ || + (data->fcdPosition != NULL && data->fcdPosition <= data->pos) + || data->string == data->pos) { break; } @@ -1246,11 +1249,14 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, contraction */ if (!isAtStartPrevIterate(data) && ucol_contractionEndCP(ch, coll)) { - result = UCOL_CONTRACTION; + result = getSpecialPrevCE(coll, UCOL_CONTRACTION, data, status); } else { if (ch <= 0xFF) { result = coll->latinOneMapping[ch]; + if (result > UCOL_NOT_FOUND) { + result = getSpecialPrevCE(coll, result, data, status); + } } else { if ((data->flags & UCOL_ITER_INNORMBUF) == 0 && @@ -1262,13 +1268,12 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, else { result = ucmp32_get(coll->mapping, ch); } - } - } - - if (result >= UCOL_NOT_FOUND) { - result = getSpecialPrevCE(coll, result, data, status); - if (result == UCOL_NOT_FOUND) { - result = ucol_getPrevUCA(ch, data, status); + if (result > UCOL_NOT_FOUND) { + result = getSpecialPrevCE(coll, result, data, status); + } + if (result == UCOL_NOT_FOUND) { + result = ucol_getPrevUCA(ch, data, status); + } } } } diff --git a/icu4c/source/i18n/ucoleitr.cpp b/icu4c/source/i18n/ucoleitr.cpp index 794ba7c4c9..5a7acb9435 100644 --- a/icu4c/source/i18n/ucoleitr.cpp +++ b/icu4c/source/i18n/ucoleitr.cpp @@ -56,6 +56,10 @@ ucol_openElements(const UCollator *coll, result->normalization_ = UNORM_DEFAULT; result->isWritable = FALSE; + + if (text == NULL) { + textLength = 0; + } init_collIterate(coll, text, textLength, &result->iteratordata_); return result; @@ -180,6 +184,10 @@ ucol_setText( UCollationElements *elems, uprv_free(elems->iteratordata_.string); } + if (text == NULL) { + textLength = 0; + } + elems->isWritable = FALSE; init_collIterate(elems->iteratordata_.coll, text, textLength, &elems->iteratordata_);