diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp index 805435a727..deca143e82 100644 --- a/icu4c/source/common/normlzr.cpp +++ b/icu4c/source/common/normlzr.cpp @@ -12,8 +12,61 @@ #include "unicode/schriter.h" #include "unicode/uchriter.h" #include "unicode/normlzr.h" +#include "cmemory.h" #include "unormimp.h" +U_CDECL_BEGIN + +/* + * This is wrapper code around a C++ CharacterIterator to + * look like a C UCharIterator for the internal API + * for incremental normalization. + * + * The UCharIterator.context field holds a pointer to the CharacterIterator. + */ + +static int32_t U_CALLCONV +characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { + return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin); +} + +static UBool U_CALLCONV +characterIteratorHasNext(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->hasNext(); +} + +static UBool U_CALLCONV +characterIteratorHasPrevious(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->hasPrevious(); +} + +static UChar U_CALLCONV +characterIteratorCurrent(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->current(); +} + +static UChar U_CALLCONV +characterIteratorNext(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->nextPostInc(); +} + +static UChar U_CALLCONV +characterIteratorPrevious(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->previous(); +} + +static const UCharIterator characterIteratorWrapper={ + 0, 0, 0, + characterIteratorMove, + characterIteratorHasNext, + characterIteratorHasPrevious, + characterIteratorCurrent, + characterIteratorNext, + characterIteratorPrevious +}; + +U_CDECL_END + U_NAMESPACE_BEGIN //------------------------------------------------------------------------- @@ -22,29 +75,26 @@ U_NAMESPACE_BEGIN Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : fUMode(mode), fOptions(0), - text(new StringCharacterIterator(str)), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(new StringCharacterIterator(str)); } Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : fUMode(mode), fOptions(0), - text(new UCharCharacterIterator(str, length)), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(new UCharCharacterIterator(str, length)); } Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : fUMode(mode), fOptions(0), - text(iter.clone()), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(iter.clone()); } // deprecated constructors @@ -52,71 +102,71 @@ Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : Normalizer::Normalizer(const UnicodeString& str, EMode mode) : fUMode(getUMode(mode)), fOptions(0), - text(new StringCharacterIterator(str)), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(new StringCharacterIterator(str)); } Normalizer::Normalizer(const UnicodeString& str, EMode mode, int32_t options) : fUMode(getUMode(mode)), fOptions(options), - text(new StringCharacterIterator(str)), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(new StringCharacterIterator(str)); } Normalizer::Normalizer(const UChar *str, int32_t length, EMode mode) : fUMode(getUMode(mode)), fOptions(0), - text(new UCharCharacterIterator(str, length)), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(new UCharCharacterIterator(str, length)); } Normalizer::Normalizer(const CharacterIterator& iter, EMode mode) : fUMode(getUMode(mode)), fOptions(0), - text(iter.clone()), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(iter.clone()); } Normalizer::Normalizer(const CharacterIterator& iter, EMode mode, int32_t options) : fUMode(getUMode(mode)), fOptions(options), - text(iter.clone()), currentIndex(0), nextIndex(0), buffer(), bufferPos(0) { - checkData(); + init(iter.clone()); } Normalizer::Normalizer(const Normalizer ©) : fUMode(copy.fUMode), fOptions(copy.fOptions), - text(copy.text->clone()), currentIndex(copy.nextIndex), nextIndex(copy.nextIndex), buffer(copy.buffer), bufferPos(copy.bufferPos) { - checkData(); + init(((CharacterIterator *)(copy.text->context))->clone()); } static const UChar _NUL=0; void -Normalizer::checkData() { +Normalizer::init(CharacterIterator *iter) { UErrorCode errorCode=U_ZERO_ERROR; - if(!unorm_haveData(&errorCode)) { - delete text; - text=new UCharCharacterIterator(&_NUL, 0); + + text=new UCharIterator; + uprv_memcpy(text, &characterIteratorWrapper, sizeof(UCharIterator)); + + if(unorm_haveData(&errorCode)) { + text->context=iter; + } else { + delete iter; + text->context=new UCharCharacterIterator(&_NUL, 0); } } @@ -140,7 +190,7 @@ Normalizer::clone() const */ int32_t Normalizer::hashCode() const { - return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; + return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; } UBool Normalizer::operator==(const Normalizer& that) const @@ -149,7 +199,7 @@ UBool Normalizer::operator==(const Normalizer& that) const this==&that || fUMode==that.fUMode && fOptions==that.fOptions && - *text==*(that.text) && + *((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) && buffer==that.buffer && bufferPos==that.bufferPos && nextIndex==that.nextIndex; @@ -167,13 +217,22 @@ Normalizer::normalize(const UnicodeString& source, if(source.isBogus() || U_FAILURE(status)) { result.setToBogus(); } else { - /* make sure that we do not operate on the same buffer in source and result */ - result.cloneArrayIfNeeded(-1, source.length()+20, FALSE); - result.fLength=unorm_internalNormalize(&result.fArray, &result.fCapacity, - source.fArray, source.fLength, + UChar *buffer=result.getBuffer(source.length()); + int32_t length=unorm_internalNormalize(buffer, result.getCapacity(), + source.getBuffer(), source.length(), mode, (options&IGNORE_HANGUL)!=0, - UnicodeString::growBuffer, &result, &status); + result.releaseBuffer(length); + if(status==U_BUFFER_OVERFLOW_ERROR) { + status=U_ZERO_ERROR; + buffer=result.getBuffer(length); + length=unorm_internalNormalize(buffer, result.getCapacity(), + source.getBuffer(), source.length(), + mode, (options&IGNORE_HANGUL)!=0, + &status); + result.releaseBuffer(length); + } + if(U_FAILURE(status)) { result.setToBogus(); } @@ -188,7 +247,7 @@ Normalizer::quickCheck(const UnicodeString& source, return UNORM_MAYBE; } - return unorm_quickCheck(source.fArray, source.length(), + return unorm_quickCheck(source.getBuffer(), source.length(), mode, &status); } @@ -200,13 +259,22 @@ Normalizer::compose(const UnicodeString& source, if(source.isBogus() || U_FAILURE(status)) { result.setToBogus(); } else { - /* make sure that we do not operate on the same buffer in source and result */ - result.cloneArrayIfNeeded(-1, source.length()+20, FALSE); - result.fLength=unorm_compose(&result.fArray, &result.fCapacity, - source.fArray, source.fLength, + UChar *buffer=result.getBuffer(source.length()); + int32_t length=unorm_compose(buffer, result.getCapacity(), + source.getBuffer(), source.length(), compat, (options&IGNORE_HANGUL)!=0, - UnicodeString::growBuffer, &result, &status); + result.releaseBuffer(length); + if(status==U_BUFFER_OVERFLOW_ERROR) { + status=U_ZERO_ERROR; + buffer=result.getBuffer(length); + length=unorm_compose(buffer, result.getCapacity(), + source.getBuffer(), source.length(), + compat, (options&IGNORE_HANGUL)!=0, + &status); + result.releaseBuffer(length); + } + if(U_FAILURE(status)) { result.setToBogus(); } @@ -221,13 +289,22 @@ Normalizer::decompose(const UnicodeString& source, if(source.isBogus() || U_FAILURE(status)) { result.setToBogus(); } else { - /* make sure that we do not operate on the same buffer in source and result */ - result.cloneArrayIfNeeded(-1, source.length()+20, FALSE); - result.fLength=unorm_decompose(&result.fArray, &result.fCapacity, - source.fArray, source.fLength, - compat, (options&IGNORE_HANGUL)!=0, - UnicodeString::growBuffer, &result, - &status); + UChar *buffer=result.getBuffer(source.length()); + int32_t length=unorm_compose(buffer, result.getCapacity(), + source.getBuffer(), source.length(), + compat, (options&IGNORE_HANGUL)!=0, + &status); + result.releaseBuffer(length); + if(status==U_BUFFER_OVERFLOW_ERROR) { + status=U_ZERO_ERROR; + buffer=result.getBuffer(length); + length=unorm_decompose(buffer, result.getCapacity(), + source.getBuffer(), source.length(), + compat, (options&IGNORE_HANGUL)!=0, + &status); + result.releaseBuffer(length); + } + if(U_FAILURE(status)) { result.setToBogus(); } @@ -239,7 +316,7 @@ Normalizer::decompose(const UnicodeString& source, //------------------------------------------------------------------------- /** - * Return the current character in the normalized text. + * Return the current character in the normalized text-> */ UChar32 Normalizer::current() { if(bufferPossetToStart(); - currentIndex=nextIndex=text->getIndex(); + currentIndex=nextIndex=text->move(text, 0, UITERATOR_START); clearBuffer(); } void Normalizer::setIndexOnly(UTextOffset index) { - text->setIndex(index); - currentIndex=nextIndex=text->getIndex(); // validates index + currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index clearBuffer(); } @@ -303,7 +378,7 @@ Normalizer::setIndexOnly(UTextOffset index) { * by next and previous and the indices passed to and * returned from setIndex and {@link #getIndex}. *

- * @param index the desired index in the input text. + * @param index the desired index in the input text-> * * @return the first normalized character that is the result of iterating * forward starting at the given index. @@ -317,8 +392,8 @@ UChar32 Normalizer::setIndex(UTextOffset index) { } /** - * Return the first character in the normalized text. This resets - * the Normalizer's position to the beginning of the text. + * Return the first character in the normalized text-> This resets + * the Normalizer's position to the beginning of the text-> */ UChar32 Normalizer::first() { reset(); @@ -326,13 +401,12 @@ UChar32 Normalizer::first() { } /** - * Return the last character in the normalized text. This resets + * Return the last character in the normalized text-> This resets * the Normalizer's position to be just before the * the input text corresponding to that normalized character. */ UChar32 Normalizer::last() { - text->setToEnd(); - currentIndex=nextIndex=text->getIndex(); + currentIndex=nextIndex=text->move(text, 0, UITERATOR_END); clearBuffer(); return previous(); } @@ -360,21 +434,21 @@ UTextOffset Normalizer::getIndex() const { } /** - * Retrieve the index of the start of the input text. This is the begin index + * Retrieve the index of the start of the input text-> This is the begin index * of the CharacterIterator or the start (i.e. 0) of the String * over which this Normalizer is iterating */ UTextOffset Normalizer::startIndex() const { - return text->startIndex(); + return text->move(text, 0, UITERATOR_START); } /** - * Retrieve the index of the end of the input text. This is the end index + * Retrieve the index of the end of the input text-> This is the end index * of the CharacterIterator or the length of the String * over which this Normalizer is iterating */ UTextOffset Normalizer::endIndex() const { - return text->endIndex(); + return text->move(text, 0, UITERATOR_END); } //------------------------------------------------------------------------- @@ -412,7 +486,7 @@ Normalizer::getOption(int32_t option) const /** * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning of the input text. + * The iteration position is set to the beginning of the input text-> */ void Normalizer::setText(const UnicodeString& newText, @@ -426,8 +500,8 @@ Normalizer::setText(const UnicodeString& newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete text; - text = newIter; + delete (CharacterIterator *)(text->context); + text->context = newIter; reset(); } @@ -447,8 +521,8 @@ Normalizer::setText(const CharacterIterator& newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete text; - text = newIter; + delete (CharacterIterator *)(text->context); + text->context = newIter; reset(); } @@ -465,8 +539,8 @@ Normalizer::setText(const UChar* newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete text; - text = newIter; + delete (CharacterIterator *)(text->context); + text->context = newIter; reset(); } @@ -477,7 +551,7 @@ Normalizer::setText(const UChar* newText, void Normalizer::getText(UnicodeString& result) { - text->getText(result); + ((CharacterIterator *)(text->context))->getText(result); } //------------------------------------------------------------------------- @@ -491,40 +565,69 @@ void Normalizer::clearBuffer() { UBool Normalizer::nextNormalize() { - UErrorCode errorCode=U_ZERO_ERROR; + UChar *p; + int32_t length; + UErrorCode errorCode; clearBuffer(); currentIndex=nextIndex; - text->setIndex(nextIndex); - if(!text->hasNext()) { + text->move(text, nextIndex, UITERATOR_START); + if(!text->hasNext(text)) { return FALSE; } - buffer.fLength=unorm_nextNormalize(buffer.fArray, buffer.fCapacity, *text, - fUMode, (fOptions&IGNORE_HANGUL)!=0, - UnicodeString::growBuffer, &buffer, - &errorCode); - nextIndex=text->getIndex(); - return U_SUCCESS(errorCode) && buffer.length()>0; + errorCode=U_ZERO_ERROR; + p=buffer.getBuffer(-1); + length=unorm_nextNormalize(p, buffer.getCapacity(), text, + fUMode, (fOptions&IGNORE_HANGUL)!=0, + &errorCode); + buffer.releaseBuffer(length); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + errorCode=U_ZERO_ERROR; + text->move(text, nextIndex, UITERATOR_START); + p=buffer.getBuffer(length); + length=unorm_nextNormalize(p, buffer.getCapacity(), text, + fUMode, (fOptions&IGNORE_HANGUL)!=0, + &errorCode); + buffer.releaseBuffer(length); + } + + nextIndex=text->move(text, 0, UITERATOR_CURRENT); + return U_SUCCESS(errorCode) && !buffer.isEmpty(); } UBool Normalizer::previousNormalize() { - UErrorCode errorCode=U_ZERO_ERROR; + UChar *p; + int32_t length; + UErrorCode errorCode; clearBuffer(); nextIndex=currentIndex; - text->setIndex(currentIndex); - if(!text->hasPrevious()) { + text->move(text, currentIndex, UITERATOR_START); + if(!text->hasPrevious(text)) { return FALSE; } - buffer.fLength=unorm_previousNormalize(buffer.fArray, buffer.fCapacity, *text, - fUMode, (fOptions&IGNORE_HANGUL)!=0, - UnicodeString::growBuffer, &buffer, - &errorCode); + errorCode=U_ZERO_ERROR; + p=buffer.getBuffer(-1); + length=unorm_previousNormalize(p, buffer.getCapacity(), text, + fUMode, (fOptions&IGNORE_HANGUL)!=0, + &errorCode); + buffer.releaseBuffer(length); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + errorCode=U_ZERO_ERROR; + text->move(text, currentIndex, UITERATOR_START); + p=buffer.getBuffer(length); + length=unorm_previousNormalize(p, buffer.getCapacity(), text, + fUMode, (fOptions&IGNORE_HANGUL)!=0, + &errorCode); + buffer.releaseBuffer(length); + } + bufferPos=buffer.length(); - currentIndex=text->getIndex(); - return U_SUCCESS(errorCode) && buffer.length()>0; + currentIndex=text->move(text, 0, UITERATOR_CURRENT); + return U_SUCCESS(errorCode) && !buffer.isEmpty(); } + U_NAMESPACE_END diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 51a8f117d9..d3d3341233 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -14,6 +14,9 @@ #include "unicode/chariter.h" #include "unicode/unorm.h" +struct UCharIterator; +typedef struct UCharIterator UCharIterator; + U_NAMESPACE_BEGIN /** * \file @@ -881,7 +884,7 @@ private: UBool nextNormalize(); UBool previousNormalize(); - void checkData(); + void init(CharacterIterator *iter); void clearBuffer(void); // Helper, without UErrorCode, for easier transitional code @@ -896,7 +899,7 @@ private: int32_t fOptions; // The input text and our position in it - CharacterIterator* text; + UCharIterator *text; // The normalization buffer is the result of normalization // of the source in [currentIndex..nextIndex[ . diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index 8813fb7e0c..5439b4235a 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -831,10 +831,9 @@ unorm_quickCheck(const UChar *src, /* make NFD & NFKD ---------------------------------------------------------- */ static int32_t -_decompose(UChar *&dest, int32_t &destCapacity, +_decompose(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBool compat, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, uint8_t &outTrailCC, UErrorCode * /*pErrorCode*/) { UChar buffer[3]; @@ -843,7 +842,6 @@ _decompose(UChar *&dest, int32_t &destCapacity, int32_t destIndex, reorderStartIndex, length; UChar c, c2, minNoMaybe; uint8_t cc, prevCC, trailCC; - UBool canGrow; if(!compat) { minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]; @@ -862,9 +860,6 @@ _decompose(UChar *&dest, int32_t &destCapacity, norm32=0; c=0; - /* do not attempt to grow if there is no growBuffer function or if it has failed before */ - canGrow=(UBool)(growBuffer!=NULL); - if(srcLength>=0) { /* string with length */ limit=src+srcLength; @@ -893,14 +888,7 @@ _decompose(UChar *&dest, int32_t &destCapacity, /* copy these code units all at once */ if(src!=prevSrc) { length=(int32_t)(src-prevSrc); - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR); } destIndex+=length; @@ -988,14 +976,7 @@ _decompose(UChar *&dest, int32_t &destCapacity, } /* append the decomposition to the destination buffer, assume length>0 */ - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { UChar *reorderSplit=dest+destIndex; if(p==NULL) { /* fastpath: single code point */ @@ -1040,10 +1021,9 @@ _decompose(UChar *&dest, int32_t &destCapacity, } U_CAPI int32_t U_EXPORT2 -unorm_decompose(UChar **pDest, int32_t *pDestCapacity, +unorm_decompose(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBool compat, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode) { int32_t destIndex; uint8_t trailCC; @@ -1052,14 +1032,13 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity, return 0; } - destIndex=_decompose(*pDest, *pDestCapacity, + destIndex=_decompose(dest, destCapacity, src, srcLength, compat, ignoreHangul, - growBuffer, context, trailCC, pErrorCode); - return u_terminateUChars(*pDest, *pDestCapacity, destIndex, pErrorCode); + return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode); } /* make FCD ----------------------------------------------------------------- */ @@ -1118,8 +1097,7 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) { static uint8_t _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit, - UChar *&dest, int32_t &destIndex, int32_t &destCapacity, - UBool canGrow, UGrowBuffer *growBuffer, void *context) { + UChar *dest, int32_t &destIndex, int32_t destCapacity) { const UChar *p; uint32_t norm32; int32_t reorderStartIndex, length; @@ -1181,14 +1159,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit, } /* append the decomposition to the destination buffer, assume length>0 */ - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { UChar *reorderSplit=dest+destIndex; if(p==NULL) { /* fastpath: single code point */ @@ -1232,16 +1203,14 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit, } static int32_t -unorm_makeFCD(UChar *&dest, int32_t &destCapacity, +unorm_makeFCD(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode) { const UChar *limit, *prevSrc, *decompStart; int32_t destIndex, length; UChar c, c2; uint16_t fcd16; int16_t prevCC, cc; - UBool canGrow; if(!_haveData(*pErrorCode)) { return 0; @@ -1256,9 +1225,6 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity, c=0; fcd16=0; - /* do not attempt to grow if there is no growBuffer function or if it has failed before */ - canGrow=(UBool)(growBuffer!=NULL); - if(srcLength>=0) { /* string with length */ limit=src+srcLength; @@ -1313,14 +1279,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity, /* copy these code units all at once */ if(src!=prevSrc) { length=(int32_t)(src-prevSrc); - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR); } destIndex+=length; @@ -1384,14 +1343,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity, /* just append (c, c2) */ length= c2==0 ? 1 : 2; - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { dest[destIndex++]=c; if(c2!=0) { dest[destIndex++]=c2; @@ -1418,8 +1370,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity, * decompose and reorder a limited piece of the text */ prevCC=_decomposeFCD(decompStart, src, limit, - dest, destIndex, destCapacity, - canGrow, growBuffer, context); + dest, destIndex, destCapacity); decompStart=src; } } @@ -1845,9 +1796,20 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_ length=_decompose(buffer, bufferCapacity, prevStarter, src-prevStarter, (decompQCMask&_NORM_QC_NFKD)!=0, FALSE, - (UGrowBuffer*)u_growBufferFromStatic, stackBuffer, trailCC, pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + *pErrorCode=U_ZERO_ERROR; + length=_decompose(buffer, bufferCapacity, + prevStarter, src-prevStarter, + (decompQCMask&_NORM_QC_NFKD)!=0, FALSE, + trailCC, + pErrorCode); + } /* set the next starter */ prevStarter=src; @@ -1864,10 +1826,9 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_ } static int32_t -_compose(UChar *&dest, int32_t &destCapacity, +_compose(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBool compat, UBool /* ### TODO: need to do this? -- ignoreHangul -- ### */, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode) { UChar stackBuffer[_STACK_BUFFER_CAPACITY]; UChar *buffer; @@ -1878,7 +1839,6 @@ _compose(UChar *&dest, int32_t &destCapacity, int32_t destIndex, reorderStartIndex, length; UChar c, c2, minNoMaybe; uint8_t cc, prevCC; - UBool canGrow; if(!_haveData(*pErrorCode)) { return 0; @@ -1918,9 +1878,6 @@ _compose(UChar *&dest, int32_t &destCapacity, norm32=0; c=0; - /* do not attempt to grow if there is no growBuffer function or if it has failed before */ - canGrow=(UBool)(growBuffer!=NULL); - if(srcLength>=0) { /* string with length */ limit=src+srcLength; @@ -1949,14 +1906,7 @@ _compose(UChar *&dest, int32_t &destCapacity, /* copy these code units all at once */ if(src!=prevSrc) { length=(int32_t)(src-prevSrc); - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR); } destIndex+=length; @@ -2098,14 +2048,7 @@ _compose(UChar *&dest, int32_t &destCapacity, } /* append the recomposed buffer contents to the destination buffer */ - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { while(length>0) { dest[destIndex++]=*p++; --length; @@ -2122,14 +2065,7 @@ _compose(UChar *&dest, int32_t &destCapacity, } /* append the single code point (c, c2) to the destination buffer */ - if( (destIndex+length)<=destCapacity || - /* attempt to grow the buffer */ - (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity, - limit==NULL ? - 2*destCapacity+length+20 : - destCapacity+length+2*(limit-src)+20, - destIndex))!=FALSE) - ) { + if((destIndex+length)<=destCapacity) { if(cc!=0 && cc0 && srcLength<=destCapacity) { + uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR); } - return u_terminateUChars(*pDest, *pDestCapacity, srcLength, pErrorCode); + return u_terminateUChars(dest, destCapacity, srcLength, pErrorCode); default: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -2275,10 +2200,9 @@ unorm_normalize(const UChar *src, int32_t srcLength, return 0; } - return unorm_internalNormalize(&dest, &destCapacity, + return unorm_internalNormalize(dest, destCapacity, src, srcLength, mode, (UBool)((option&UNORM_IGNORE_HANGUL)!=0), - NULL, NULL, pErrorCode); } @@ -2288,7 +2212,7 @@ unorm_normalize(const UChar *src, int32_t srcLength, /* * These iteration functions are the core implementations of the * Normalizer class iteration API. - * They read from a CharacterIterator into their own buffer + * They read from a UCharIterator into their own buffer * and normalize into the Normalizer iteration buffer. * Normalizer itself then iterates over its buffer until that needs to be * filled again. @@ -2302,11 +2226,11 @@ unorm_normalize(const UChar *src, int32_t srcLength, * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!) */ static inline uint32_t -_getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) { +_getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) { uint32_t norm32; /* need src.hasPrevious() */ - c=src.previous(); + c=src.previous(&src); c2=0; /* check for a surrogate before getting norm32 to see if we need to predecrement further */ @@ -2314,10 +2238,10 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U return 0; } else if(!UTF_IS_SURROGATE(c)) { return _getNorm32(c); - } else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious()) { + } else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious(&src)) { /* unpaired surrogate */ return 0; - } else if(UTF_IS_FIRST_SURROGATE(c2=src.previous())) { + } else if(UTF_IS_FIRST_SURROGATE(c2=src.previous(&src))) { norm32=_getNorm32(c2); if((norm32&mask)==0) { /* all surrogate pairs with this lead surrogate have irrelevant data */ @@ -2328,7 +2252,7 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U } } else { /* unpaired second surrogate, undo the c2=src.previous() movement */ - src.move(1, CharacterIterator::kCurrent); + src.move(&src, 1, UITERATOR_CURRENT); return 0; } } @@ -2338,14 +2262,14 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!) */ typedef UBool -IsPrevBoundaryFn(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2); +IsPrevBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2); /* * read backwards and check if the combining class is 0 * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!) */ static UBool -_isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) { +_isPrevCCZero(UCharIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) { return (_getPrevNorm32(src, minC, ccMask, c, c2)&ccMask)==0; } @@ -2355,7 +2279,7 @@ _isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!) */ static UBool -_isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) { +_isPrevTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) { uint32_t norm32, decompQCMask; decompQCMask=(ccOrQCMask<<2)&0xf; /* decomposition quick check mask */ @@ -2364,7 +2288,7 @@ _isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, U } static int32_t -_findPreviousIterationBoundary(CharacterIterator &src, +_findPreviousIterationBoundary(UCharIterator &src, IsPrevBoundaryFn *isPrevBoundary, uint32_t minC, uint32_t mask, UChar *&buffer, int32_t &bufferCapacity, int32_t &startIndex, @@ -2377,7 +2301,7 @@ _findPreviousIterationBoundary(CharacterIterator &src, stackBuffer=buffer; startIndex=bufferCapacity; /* fill the buffer from the end backwards */ - while(src.hasPrevious()) { + while(src.hasPrevious(&src)) { isBoundary=isPrevBoundary(src, minC, mask, c, c2); /* always write this character to the front of the buffer */ @@ -2387,7 +2311,7 @@ _findPreviousIterationBoundary(CharacterIterator &src, if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - src.setToStart(); + src.move(&src, 0, UITERATOR_START); return 0; } @@ -2412,10 +2336,9 @@ _findPreviousIterationBoundary(CharacterIterator &src, } U_CFUNC int32_t -unorm_previousNormalize(UChar *&dest, int32_t &destCapacity, - CharacterIterator &src, +unorm_previousNormalize(UChar *dest, int32_t destCapacity, + UCharIterator *src, UNormalizationMode mode, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode) { UChar stackBuffer[40]; UChar *buffer; @@ -2443,15 +2366,30 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity, mask=_NORM_CC_MASK|_NORM_QC_NFKC; break; case UNORM_NONE: - if(src.hasPrevious()) { - UChar32 c=src.previous32(); + destLength=0; + if(src->hasPrevious(src)) { + UChar c, c2; - destLength=0; - UTF_APPEND_CHAR_UNSAFE(dest, destLength, c); - return destLength; - } else { - return 0; + c=src->previous(src); + destLength=1; + if(UTF_IS_TRAIL(c) && src->hasPrevious(src)) { + c2=src->previous(src); + if(UTF_IS_LEAD(c2)) { + if(destCapacity>=2) { + dest[1]=c; /* trail surrogate */ + destLength=2; + } + c=c2; /* lead surrogate to be written below */ + } else { + src->move(src, 1, UITERATOR_CURRENT); + } + } + + if(destCapacity>0) { + dest[0]=c; + } } + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); default: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -2459,16 +2397,16 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity, buffer=stackBuffer; bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR); - bufferLength=_findPreviousIterationBoundary(src, + bufferLength=_findPreviousIterationBoundary(*src, isPreviousBoundary, minC, mask, buffer, bufferCapacity, startIndex, pErrorCode); if(bufferLength>0) { - destLength=unorm_internalNormalize(&dest, &destCapacity, + destLength=unorm_internalNormalize(dest, destCapacity, buffer+startIndex, bufferLength, mode, ignoreHangul, - growBuffer, context, pErrorCode); + pErrorCode); } else { destLength=0; } @@ -2490,11 +2428,11 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity, * always reads complete characters */ static inline uint32_t -_getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) { +_getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) { uint32_t norm32; - /* need src.hasNext() */ - c=src.nextPostInc(); + /* need src.hasNext() to be true */ + c=src.next(&src); c2=0; if(chasNext(src)) { + UChar c, c2; - destLength=0; - UTF_APPEND_CHAR_UNSAFE(dest, destLength, c); - return destLength; - } else { - return 0; + c=src->next(src); + destLength=1; + if(UTF_IS_LEAD(c) && src->hasNext(src)) { + c2=src->next(src); + if(UTF_IS_TRAIL(c2)) { + if(destCapacity>=2) { + dest[1]=c2; /* trail surrogate */ + destLength=2; + } + /* lead surrogate to be written below */ + } else { + src->move(src, -1, UITERATOR_CURRENT); + } + } + + if(destCapacity>0) { + dest[0]=c; + } } + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); default: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -2648,15 +2607,15 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity, buffer=stackBuffer; bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR); - bufferLength=_findNextIterationBoundary(src, + bufferLength=_findNextIterationBoundary(*src, isNextBoundary, minC, mask, buffer, bufferCapacity, pErrorCode); if(bufferLength>0) { - destLength=unorm_internalNormalize(&dest, &destCapacity, + destLength=unorm_internalNormalize(dest, destCapacity, buffer, bufferLength, mode, ignoreHangul, - growBuffer, context, pErrorCode); + pErrorCode); } else { destLength=0; } @@ -2674,4 +2633,3 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity, * and if not, how hard it would be to improve it. * For example, see _findSafeFCD(). */ - diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h index 5142bad7d3..977200e78c 100644 --- a/icu4c/source/common/unormimp.h +++ b/icu4c/source/common/unormimp.h @@ -155,14 +155,13 @@ unorm_haveData(UErrorCode *pErrorCode); /** * Internal API for normalizing. - * Does not check for bad input and uses growBuffer. + * Does not check for bad input. * @internal */ U_CAPI int32_t U_EXPORT2 -unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity, +unorm_internalNormalize(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UNormalizationMode mode, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode); /** @@ -170,10 +169,9 @@ unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity, * @internal */ U_CAPI int32_t U_EXPORT2 -unorm_decompose(UChar **pDest, int32_t *pDestCapacity, +unorm_decompose(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBool compat, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode); /** @@ -181,10 +179,9 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity, * @internal */ U_CAPI int32_t U_EXPORT2 -unorm_compose(UChar **pDest, int32_t *pDestCapacity, +unorm_compose(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBool compat, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode); /** @@ -250,15 +247,103 @@ unorm_getFCD16FromSurrogatePair(const uint16_t *fcdTrieIndex, uint16_t fcd16, UC ]; } +#endif + +U_CDECL_BEGIN + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; + +enum UCharIteratorOrigin { + UITERATOR_START, UITERATOR_CURRENT, UITERATOR_END +}; + +typedef enum UCharIteratorOrigin UCharIteratorOrigin; + +/** + * C API for code unit iteration. + * This can be used as a C wrapper around + * CharacterIterator, Replaceable, or implemented using simple strings, etc. + * + * @internal for normalization + */ +struct UCharIterator { + /** + * (protected) Pointer to string or wrapped object or similar. + * Not used by caller. + */ + const void *context; + + /** + * (protected) Length of string or similar. + * Not used by caller. + */ + int32_t length; + + /** + * (protected) Current index or similar. + * Not used by caller. + */ + int32_t index; + + /** + * (public) Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * + * @param delta can be positive, zero, or negative + * @param origin move relative to the start, end, or current index + * @return the new index + */ + int32_t U_CALLCONV + (*move)(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); + + /** + * (public) Check if current() and next() can still + * return another code unit. + */ + UBool U_CALLCONV + (*hasNext)(UCharIterator *iter); + + /** + * (public) Check if previous() can still return another code unit. + */ + UBool U_CALLCONV + (*hasPrevious)(UCharIterator *iter); + + /** + * (public) Return the code unit at the current position, + * or 0xffff if there is none (index is at the end). + */ + UChar U_CALLCONV + (*current)(UCharIterator *iter); + + /** + * (public) Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return 0xffff if there is none (index is at the end). + */ + UChar U_CALLCONV + (*next)(UCharIterator *iter); + + /** + * (public) Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return 0xffff if there is none (index is at the start). + */ + UChar U_CALLCONV + (*previous)(UCharIterator *iter); +}; + /** * Internal API for iterative normalizing - see Normalizer. * @internal */ U_CFUNC int32_t -unorm_nextNormalize(UChar *&dest, int32_t &destCapacity, - U_NAMESPACE_QUALIFIER CharacterIterator &src, +unorm_nextNormalize(UChar *dest, int32_t destCapacity, + UCharIterator *src, UNormalizationMode mode, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode); /** @@ -266,13 +351,12 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity, * @internal */ U_CFUNC int32_t -unorm_previousNormalize(UChar *&dest, int32_t &destCapacity, - U_NAMESPACE_QUALIFIER CharacterIterator &src, +unorm_previousNormalize(UChar *dest, int32_t destCapacity, + UCharIterator *src, UNormalizationMode mode, UBool ignoreHangul, - UGrowBuffer *growBuffer, void *context, UErrorCode *pErrorCode); -#endif +U_CDECL_END /** * Description of the format of unorm.dat. diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 299d8e2714..7518663f6c 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -760,30 +760,33 @@ void collIterNormalize(collIterate *collationSource) UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */ int32_t normLen; - normLen = unorm_decompose(&collationSource->writableBuffer, (int32_t *)&collationSource->writableBufSize, + normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize, srcP, (int32_t)(endP - srcP), FALSE, FALSE, - u_growBufferFromStatic, collationSource->stackWritableBuffer, &status); - if (U_FAILURE(status)) { -#ifdef UCOL_DEBUG - fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status)); -#endif - return; - } - if(status == U_STRING_NOT_TERMINATED_WARNING) { + if(status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) { // reallocate and terminate if(!u_growBufferFromStatic(collationSource->stackWritableBuffer, &collationSource->writableBuffer, (int32_t *)&collationSource->writableBufSize, normLen + 1, - normLen) + 0) ) { #ifdef UCOL_DEBUG fprintf(stderr, "collIterNormalize(), out of memory\n"); #endif return; } - collationSource->writableBuffer[normLen] = 0; + status = U_ZERO_ERROR; + normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize, + srcP, (int32_t)(endP - srcP), + FALSE, FALSE, + &status); + } + if (U_FAILURE(status)) { +#ifdef UCOL_DEBUG + fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status)); +#endif + return; } if(collationSource->writableBuffer != collationSource->stackWritableBuffer) { @@ -3343,11 +3346,24 @@ ucol_calcSortKey(const UCollator *coll, } if(normMode != UNORM_NONE && UNORM_YES != unorm_quickCheck(source, len, normMode, status)) { - len = unorm_internalNormalize(&normSource, &normSourceLen, + len = unorm_internalNormalize(normSource, normSourceLen, source, len, normMode, FALSE, - u_growBufferFromStatic, normBuffer, status); + if(*status == U_BUFFER_OVERFLOW_ERROR) { + normSourceLen = len; + normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR); + if(normSource == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + *status = U_ZERO_ERROR; + len = unorm_internalNormalize(normSource, normSourceLen, + source, len, + normMode, FALSE, + status); + } + if(U_FAILURE(*status)) { return 0; } @@ -3835,11 +3851,24 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, /* If we need to normalize, we'll do it all at once at the beginning! */ if(coll->normalizationMode != UCOL_OFF && UNORM_YES != unorm_quickCheck(source, len, UNORM_FCD, status)) { - len = unorm_internalNormalize(&normSource, &normSourceLen, + len = unorm_internalNormalize(normSource, normSourceLen, source, len, UNORM_FCD, FALSE, - u_growBufferFromStatic, normBuffer, status); + if(*status == U_BUFFER_OVERFLOW_ERROR) { + normSourceLen = len; + normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR); + if(normSource == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + *status = U_ZERO_ERROR; + len = unorm_internalNormalize(normSource, normSourceLen, + source, len, + UNORM_FCD, FALSE, + status); + } + if(U_FAILURE(*status)) { return 0; } @@ -4678,11 +4707,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo status = U_ZERO_ERROR; if (unorm_quickCheck(sColl->string, sLen, UNORM_NFD, &status) != UNORM_YES) { - sLen = unorm_decompose(&sColl->writableBuffer, (int32_t *)&sColl->writableBufSize, + sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize, sBuf, sLen, FALSE, FALSE, - u_growBufferFromStatic, sColl->stackWritableBuffer, &status); + if(status == U_BUFFER_OVERFLOW_ERROR) { + if(!u_growBufferFromStatic(sColl->stackWritableBuffer, + &sColl->writableBuffer, + (int32_t *)&sColl->writableBufSize, sLen, + 0) + ) { + return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */ + } + status = U_ZERO_ERROR; + sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize, + sBuf, sLen, + FALSE, FALSE, + &status); + } sBuf = sColl->writableBuffer; if (sBuf != sColl->stackWritableBuffer) { sColl->flags |= UCOL_ITER_ALLOCATED; @@ -4691,11 +4733,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo status = U_ZERO_ERROR; if (unorm_quickCheck(tColl->string, tLen, UNORM_NFD, &status) != UNORM_YES) { - tLen = unorm_decompose(&tColl->writableBuffer, (int32_t *)&tColl->writableBufSize, + tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize, tBuf, tLen, FALSE, FALSE, - u_growBufferFromStatic, tColl->stackWritableBuffer, &status); + if(status == U_BUFFER_OVERFLOW_ERROR) { + if(!u_growBufferFromStatic(tColl->stackWritableBuffer, + &tColl->writableBuffer, + (int32_t *)&tColl->writableBufSize, tLen, + 0) + ) { + return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */ + } + status = U_ZERO_ERROR; + tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize, + tBuf, tLen, + FALSE, FALSE, + &status); + } tBuf = tColl->writableBuffer; if (tBuf != tColl->stackWritableBuffer) { tColl->flags |= UCOL_ITER_ALLOCATED;