ICU-1007 simplify internal normalization api (no growBuffers, c UCharIterator)

X-SVN-Rev: 6339
This commit is contained in:
Markus Scherer 2001-10-19 17:36:02 +00:00
parent b464fd7971
commit 237bf8e91e
5 changed files with 501 additions and 298 deletions

View File

@ -12,8 +12,61 @@
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "cmemory.h"
#include "unormimp.h"
U_CDECL_BEGIN
/*
* This is wrapper code around a C++ CharacterIterator to
* look like a C UCharIterator for the internal API
* for incremental normalization.
*
* The UCharIterator.context field holds a pointer to the CharacterIterator.
*/
static int32_t U_CALLCONV
characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
}
static UBool U_CALLCONV
characterIteratorHasNext(UCharIterator *iter) {
return ((CharacterIterator *)(iter->context))->hasNext();
}
static UBool U_CALLCONV
characterIteratorHasPrevious(UCharIterator *iter) {
return ((CharacterIterator *)(iter->context))->hasPrevious();
}
static UChar U_CALLCONV
characterIteratorCurrent(UCharIterator *iter) {
return ((CharacterIterator *)(iter->context))->current();
}
static UChar U_CALLCONV
characterIteratorNext(UCharIterator *iter) {
return ((CharacterIterator *)(iter->context))->nextPostInc();
}
static UChar U_CALLCONV
characterIteratorPrevious(UCharIterator *iter) {
return ((CharacterIterator *)(iter->context))->previous();
}
static const UCharIterator characterIteratorWrapper={
0, 0, 0,
characterIteratorMove,
characterIteratorHasNext,
characterIteratorHasPrevious,
characterIteratorCurrent,
characterIteratorNext,
characterIteratorPrevious
};
U_CDECL_END
U_NAMESPACE_BEGIN
//-------------------------------------------------------------------------
@ -22,29 +75,26 @@ U_NAMESPACE_BEGIN
Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
fUMode(mode), fOptions(0),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(new StringCharacterIterator(str));
}
Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
fUMode(mode), fOptions(0),
text(new UCharCharacterIterator(str, length)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(new UCharCharacterIterator(str, length));
}
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
fUMode(mode), fOptions(0),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(iter.clone());
}
// deprecated constructors
@ -52,71 +102,71 @@ Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
Normalizer::Normalizer(const UnicodeString& str,
EMode mode) :
fUMode(getUMode(mode)), fOptions(0),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(new StringCharacterIterator(str));
}
Normalizer::Normalizer(const UnicodeString& str,
EMode mode,
int32_t options) :
fUMode(getUMode(mode)), fOptions(options),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(new StringCharacterIterator(str));
}
Normalizer::Normalizer(const UChar *str, int32_t length, EMode mode) :
fUMode(getUMode(mode)), fOptions(0),
text(new UCharCharacterIterator(str, length)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(new UCharCharacterIterator(str, length));
}
Normalizer::Normalizer(const CharacterIterator& iter,
EMode mode) :
fUMode(getUMode(mode)), fOptions(0),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(iter.clone());
}
Normalizer::Normalizer(const CharacterIterator& iter,
EMode mode,
int32_t options) :
fUMode(getUMode(mode)), fOptions(options),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
init(iter.clone());
}
Normalizer::Normalizer(const Normalizer &copy) :
fUMode(copy.fUMode), fOptions(copy.fOptions),
text(copy.text->clone()),
currentIndex(copy.nextIndex), nextIndex(copy.nextIndex),
buffer(copy.buffer), bufferPos(copy.bufferPos)
{
checkData();
init(((CharacterIterator *)(copy.text->context))->clone());
}
static const UChar _NUL=0;
void
Normalizer::checkData() {
Normalizer::init(CharacterIterator *iter) {
UErrorCode errorCode=U_ZERO_ERROR;
if(!unorm_haveData(&errorCode)) {
delete text;
text=new UCharCharacterIterator(&_NUL, 0);
text=new UCharIterator;
uprv_memcpy(text, &characterIteratorWrapper, sizeof(UCharIterator));
if(unorm_haveData(&errorCode)) {
text->context=iter;
} else {
delete iter;
text->context=new UCharCharacterIterator(&_NUL, 0);
}
}
@ -140,7 +190,7 @@ Normalizer::clone() const
*/
int32_t Normalizer::hashCode() const
{
return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
UBool Normalizer::operator==(const Normalizer& that) const
@ -149,7 +199,7 @@ UBool Normalizer::operator==(const Normalizer& that) const
this==&that ||
fUMode==that.fUMode &&
fOptions==that.fOptions &&
*text==*(that.text) &&
*((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
buffer==that.buffer &&
bufferPos==that.bufferPos &&
nextIndex==that.nextIndex;
@ -167,13 +217,22 @@ Normalizer::normalize(const UnicodeString& source,
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
} else {
/* make sure that we do not operate on the same buffer in source and result */
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
result.fLength=unorm_internalNormalize(&result.fArray, &result.fCapacity,
source.fArray, source.fLength,
UChar *buffer=result.getBuffer(source.length());
int32_t length=unorm_internalNormalize(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
mode, (options&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &result,
&status);
result.releaseBuffer(length);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=result.getBuffer(length);
length=unorm_internalNormalize(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
mode, (options&IGNORE_HANGUL)!=0,
&status);
result.releaseBuffer(length);
}
if(U_FAILURE(status)) {
result.setToBogus();
}
@ -188,7 +247,7 @@ Normalizer::quickCheck(const UnicodeString& source,
return UNORM_MAYBE;
}
return unorm_quickCheck(source.fArray, source.length(),
return unorm_quickCheck(source.getBuffer(), source.length(),
mode, &status);
}
@ -200,13 +259,22 @@ Normalizer::compose(const UnicodeString& source,
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
} else {
/* make sure that we do not operate on the same buffer in source and result */
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
result.fLength=unorm_compose(&result.fArray, &result.fCapacity,
source.fArray, source.fLength,
UChar *buffer=result.getBuffer(source.length());
int32_t length=unorm_compose(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
compat, (options&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &result,
&status);
result.releaseBuffer(length);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=result.getBuffer(length);
length=unorm_compose(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
compat, (options&IGNORE_HANGUL)!=0,
&status);
result.releaseBuffer(length);
}
if(U_FAILURE(status)) {
result.setToBogus();
}
@ -221,13 +289,22 @@ Normalizer::decompose(const UnicodeString& source,
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
} else {
/* make sure that we do not operate on the same buffer in source and result */
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
result.fLength=unorm_decompose(&result.fArray, &result.fCapacity,
source.fArray, source.fLength,
compat, (options&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &result,
&status);
UChar *buffer=result.getBuffer(source.length());
int32_t length=unorm_compose(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
compat, (options&IGNORE_HANGUL)!=0,
&status);
result.releaseBuffer(length);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=result.getBuffer(length);
length=unorm_decompose(buffer, result.getCapacity(),
source.getBuffer(), source.length(),
compat, (options&IGNORE_HANGUL)!=0,
&status);
result.releaseBuffer(length);
}
if(U_FAILURE(status)) {
result.setToBogus();
}
@ -239,7 +316,7 @@ Normalizer::decompose(const UnicodeString& source,
//-------------------------------------------------------------------------
/**
* Return the current character in the normalized text.
* Return the current character in the normalized text->
*/
UChar32 Normalizer::current() {
if(bufferPos<buffer.length() || nextNormalize()) {
@ -280,15 +357,13 @@ UChar32 Normalizer::previous() {
}
void Normalizer::reset() {
text->setToStart();
currentIndex=nextIndex=text->getIndex();
currentIndex=nextIndex=text->move(text, 0, UITERATOR_START);
clearBuffer();
}
void
Normalizer::setIndexOnly(UTextOffset index) {
text->setIndex(index);
currentIndex=nextIndex=text->getIndex(); // validates index
currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index
clearBuffer();
}
@ -303,7 +378,7 @@ Normalizer::setIndexOnly(UTextOffset index) {
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
* returned from <tt>setIndex</tt> and {@link #getIndex}.
* <p>
* @param index the desired index in the input text.
* @param index the desired index in the input text->
*
* @return the first normalized character that is the result of iterating
* forward starting at the given index.
@ -317,8 +392,8 @@ UChar32 Normalizer::setIndex(UTextOffset index) {
}
/**
* Return the first character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to the beginning of the text.
* Return the first character in the normalized text-> This resets
* the <tt>Normalizer's</tt> position to the beginning of the text->
*/
UChar32 Normalizer::first() {
reset();
@ -326,13 +401,12 @@ UChar32 Normalizer::first() {
}
/**
* Return the last character in the normalized text. This resets
* Return the last character in the normalized text-> This resets
* the <tt>Normalizer's</tt> position to be just before the
* the input text corresponding to that normalized character.
*/
UChar32 Normalizer::last() {
text->setToEnd();
currentIndex=nextIndex=text->getIndex();
currentIndex=nextIndex=text->move(text, 0, UITERATOR_END);
clearBuffer();
return previous();
}
@ -360,21 +434,21 @@ UTextOffset Normalizer::getIndex() const {
}
/**
* Retrieve the index of the start of the input text. This is the begin index
* Retrieve the index of the start of the input text-> This is the begin index
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset Normalizer::startIndex() const {
return text->startIndex();
return text->move(text, 0, UITERATOR_START);
}
/**
* Retrieve the index of the end of the input text. This is the end index
* Retrieve the index of the end of the input text-> This is the end index
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset Normalizer::endIndex() const {
return text->endIndex();
return text->move(text, 0, UITERATOR_END);
}
//-------------------------------------------------------------------------
@ -412,7 +486,7 @@ Normalizer::getOption(int32_t option) const
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the input text.
* The iteration position is set to the beginning of the input text->
*/
void
Normalizer::setText(const UnicodeString& newText,
@ -426,8 +500,8 @@ Normalizer::setText(const UnicodeString& newText,
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
@ -447,8 +521,8 @@ Normalizer::setText(const CharacterIterator& newText,
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
@ -465,8 +539,8 @@ Normalizer::setText(const UChar* newText,
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
@ -477,7 +551,7 @@ Normalizer::setText(const UChar* newText,
void
Normalizer::getText(UnicodeString& result)
{
text->getText(result);
((CharacterIterator *)(text->context))->getText(result);
}
//-------------------------------------------------------------------------
@ -491,40 +565,69 @@ void Normalizer::clearBuffer() {
UBool
Normalizer::nextNormalize() {
UErrorCode errorCode=U_ZERO_ERROR;
UChar *p;
int32_t length;
UErrorCode errorCode;
clearBuffer();
currentIndex=nextIndex;
text->setIndex(nextIndex);
if(!text->hasNext()) {
text->move(text, nextIndex, UITERATOR_START);
if(!text->hasNext(text)) {
return FALSE;
}
buffer.fLength=unorm_nextNormalize(buffer.fArray, buffer.fCapacity, *text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &buffer,
&errorCode);
nextIndex=text->getIndex();
return U_SUCCESS(errorCode) && buffer.length()>0;
errorCode=U_ZERO_ERROR;
p=buffer.getBuffer(-1);
length=unorm_nextNormalize(p, buffer.getCapacity(), text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
&errorCode);
buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, nextIndex, UITERATOR_START);
p=buffer.getBuffer(length);
length=unorm_nextNormalize(p, buffer.getCapacity(), text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
&errorCode);
buffer.releaseBuffer(length);
}
nextIndex=text->move(text, 0, UITERATOR_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
UBool
Normalizer::previousNormalize() {
UErrorCode errorCode=U_ZERO_ERROR;
UChar *p;
int32_t length;
UErrorCode errorCode;
clearBuffer();
nextIndex=currentIndex;
text->setIndex(currentIndex);
if(!text->hasPrevious()) {
text->move(text, currentIndex, UITERATOR_START);
if(!text->hasPrevious(text)) {
return FALSE;
}
buffer.fLength=unorm_previousNormalize(buffer.fArray, buffer.fCapacity, *text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &buffer,
&errorCode);
errorCode=U_ZERO_ERROR;
p=buffer.getBuffer(-1);
length=unorm_previousNormalize(p, buffer.getCapacity(), text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
&errorCode);
buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, currentIndex, UITERATOR_START);
p=buffer.getBuffer(length);
length=unorm_previousNormalize(p, buffer.getCapacity(), text,
fUMode, (fOptions&IGNORE_HANGUL)!=0,
&errorCode);
buffer.releaseBuffer(length);
}
bufferPos=buffer.length();
currentIndex=text->getIndex();
return U_SUCCESS(errorCode) && buffer.length()>0;
currentIndex=text->move(text, 0, UITERATOR_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
U_NAMESPACE_END

View File

@ -14,6 +14,9 @@
#include "unicode/chariter.h"
#include "unicode/unorm.h"
struct UCharIterator;
typedef struct UCharIterator UCharIterator;
U_NAMESPACE_BEGIN
/**
* \file
@ -881,7 +884,7 @@ private:
UBool nextNormalize();
UBool previousNormalize();
void checkData();
void init(CharacterIterator *iter);
void clearBuffer(void);
// Helper, without UErrorCode, for easier transitional code
@ -896,7 +899,7 @@ private:
int32_t fOptions;
// The input text and our position in it
CharacterIterator* text;
UCharIterator *text;
// The normalization buffer is the result of normalization
// of the source in [currentIndex..nextIndex[ .

View File

@ -831,10 +831,9 @@ unorm_quickCheck(const UChar *src,
/* make NFD & NFKD ---------------------------------------------------------- */
static int32_t
_decompose(UChar *&dest, int32_t &destCapacity,
_decompose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
uint8_t &outTrailCC,
UErrorCode * /*pErrorCode*/) {
UChar buffer[3];
@ -843,7 +842,6 @@ _decompose(UChar *&dest, int32_t &destCapacity,
int32_t destIndex, reorderStartIndex, length;
UChar c, c2, minNoMaybe;
uint8_t cc, prevCC, trailCC;
UBool canGrow;
if(!compat) {
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
@ -862,9 +860,6 @@ _decompose(UChar *&dest, int32_t &destCapacity,
norm32=0;
c=0;
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
canGrow=(UBool)(growBuffer!=NULL);
if(srcLength>=0) {
/* string with length */
limit=src+srcLength;
@ -893,14 +888,7 @@ _decompose(UChar *&dest, int32_t &destCapacity,
/* copy these code units all at once */
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
@ -988,14 +976,7 @@ _decompose(UChar *&dest, int32_t &destCapacity,
}
/* append the decomposition to the destination buffer, assume length>0 */
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
UChar *reorderSplit=dest+destIndex;
if(p==NULL) {
/* fastpath: single code point */
@ -1040,10 +1021,9 @@ _decompose(UChar *&dest, int32_t &destCapacity,
}
U_CAPI int32_t U_EXPORT2
unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
unorm_decompose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
int32_t destIndex;
uint8_t trailCC;
@ -1052,14 +1032,13 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
return 0;
}
destIndex=_decompose(*pDest, *pDestCapacity,
destIndex=_decompose(dest, destCapacity,
src, srcLength,
compat, ignoreHangul,
growBuffer, context,
trailCC,
pErrorCode);
return u_terminateUChars(*pDest, *pDestCapacity, destIndex, pErrorCode);
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
}
/* make FCD ----------------------------------------------------------------- */
@ -1118,8 +1097,7 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
static uint8_t
_decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
UChar *&dest, int32_t &destIndex, int32_t &destCapacity,
UBool canGrow, UGrowBuffer *growBuffer, void *context) {
UChar *dest, int32_t &destIndex, int32_t destCapacity) {
const UChar *p;
uint32_t norm32;
int32_t reorderStartIndex, length;
@ -1181,14 +1159,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
}
/* append the decomposition to the destination buffer, assume length>0 */
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
UChar *reorderSplit=dest+destIndex;
if(p==NULL) {
/* fastpath: single code point */
@ -1232,16 +1203,14 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
}
static int32_t
unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
unorm_makeFCD(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
const UChar *limit, *prevSrc, *decompStart;
int32_t destIndex, length;
UChar c, c2;
uint16_t fcd16;
int16_t prevCC, cc;
UBool canGrow;
if(!_haveData(*pErrorCode)) {
return 0;
@ -1256,9 +1225,6 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
c=0;
fcd16=0;
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
canGrow=(UBool)(growBuffer!=NULL);
if(srcLength>=0) {
/* string with length */
limit=src+srcLength;
@ -1313,14 +1279,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
/* copy these code units all at once */
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
@ -1384,14 +1343,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
/* just append (c, c2) */
length= c2==0 ? 1 : 2;
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
dest[destIndex++]=c;
if(c2!=0) {
dest[destIndex++]=c2;
@ -1418,8 +1370,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
* decompose and reorder a limited piece of the text
*/
prevCC=_decomposeFCD(decompStart, src, limit,
dest, destIndex, destCapacity,
canGrow, growBuffer, context);
dest, destIndex, destCapacity);
decompStart=src;
}
}
@ -1845,9 +1796,20 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
length=_decompose(buffer, bufferCapacity,
prevStarter, src-prevStarter,
(decompQCMask&_NORM_QC_NFKD)!=0, FALSE,
(UGrowBuffer*)u_growBufferFromStatic, stackBuffer,
trailCC,
pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
*pErrorCode=U_ZERO_ERROR;
length=_decompose(buffer, bufferCapacity,
prevStarter, src-prevStarter,
(decompQCMask&_NORM_QC_NFKD)!=0, FALSE,
trailCC,
pErrorCode);
}
/* set the next starter */
prevStarter=src;
@ -1864,10 +1826,9 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
}
static int32_t
_compose(UChar *&dest, int32_t &destCapacity,
_compose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool /* ### TODO: need to do this? -- ignoreHangul -- ### */,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
UChar stackBuffer[_STACK_BUFFER_CAPACITY];
UChar *buffer;
@ -1878,7 +1839,6 @@ _compose(UChar *&dest, int32_t &destCapacity,
int32_t destIndex, reorderStartIndex, length;
UChar c, c2, minNoMaybe;
uint8_t cc, prevCC;
UBool canGrow;
if(!_haveData(*pErrorCode)) {
return 0;
@ -1918,9 +1878,6 @@ _compose(UChar *&dest, int32_t &destCapacity,
norm32=0;
c=0;
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
canGrow=(UBool)(growBuffer!=NULL);
if(srcLength>=0) {
/* string with length */
limit=src+srcLength;
@ -1949,14 +1906,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
/* copy these code units all at once */
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
@ -2098,14 +2048,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
}
/* append the recomposed buffer contents to the destination buffer */
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
while(length>0) {
dest[destIndex++]=*p++;
--length;
@ -2122,14 +2065,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
}
/* append the single code point (c, c2) to the destination buffer */
if( (destIndex+length)<=destCapacity ||
/* attempt to grow the buffer */
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
limit==NULL ?
2*destCapacity+length+20 :
destCapacity+length+2*(limit-src)+20,
destIndex))!=FALSE)
) {
if((destIndex+length)<=destCapacity) {
if(cc!=0 && cc<prevCC) {
/* (c, c2) is out of order with respect to the preceding text */
UChar *reorderSplit=dest+destIndex;
@ -2160,10 +2096,9 @@ _compose(UChar *&dest, int32_t &destCapacity,
}
U_CAPI int32_t U_EXPORT2
unorm_compose(UChar **pDest, int32_t *pDestCapacity,
unorm_compose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
int32_t destIndex;
@ -2171,13 +2106,12 @@ unorm_compose(UChar **pDest, int32_t *pDestCapacity,
return 0;
}
destIndex=_compose(*pDest, *pDestCapacity,
destIndex=_compose(dest, destCapacity,
src, srcLength,
compat, ignoreHangul,
growBuffer, context,
pErrorCode);
return u_terminateUChars(*pDest, *pDestCapacity, destIndex, pErrorCode);
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
}
/*
@ -2191,57 +2125,48 @@ unorm_compose(UChar **pDest, int32_t *pDestCapacity,
/**
* Internal API for normalizing.
* Does not check for bad input and uses growBuffer.
* Does not check for bad input.
* @internal
*/
U_CAPI int32_t U_EXPORT2
unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
switch(mode) {
case UNORM_NFD:
return unorm_decompose(pDest, pDestCapacity,
return unorm_decompose(dest, destCapacity,
src, srcLength,
FALSE, ignoreHangul,
growBuffer, context,
pErrorCode);
case UNORM_NFKD:
return unorm_decompose(pDest, pDestCapacity,
return unorm_decompose(dest, destCapacity,
src, srcLength,
TRUE, ignoreHangul,
growBuffer, context,
pErrorCode);
case UNORM_NFC:
return unorm_compose(pDest, pDestCapacity,
return unorm_compose(dest, destCapacity,
src, srcLength,
FALSE, ignoreHangul,
growBuffer, context,
pErrorCode);
case UNORM_NFKC:
return unorm_compose(pDest, pDestCapacity,
return unorm_compose(dest, destCapacity,
src, srcLength,
TRUE, ignoreHangul,
growBuffer, context,
pErrorCode);
case UNORM_FCD:
return unorm_makeFCD(*pDest, *pDestCapacity,
return unorm_makeFCD(dest, destCapacity,
src, srcLength,
growBuffer, context,
pErrorCode);
case UNORM_NONE:
/* just copy the string */
if(srcLength==-1) {
srcLength=u_strlen(src);
}
if( srcLength<=*pDestCapacity ||
/* attempt to grow the buffer */
(growBuffer!=NULL && growBuffer(context, pDest, pDestCapacity, srcLength+1, 0))
) {
uprv_memcpy(*pDest, src, srcLength*U_SIZEOF_UCHAR);
if(srcLength>0 && srcLength<=destCapacity) {
uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR);
}
return u_terminateUChars(*pDest, *pDestCapacity, srcLength, pErrorCode);
return u_terminateUChars(dest, destCapacity, srcLength, pErrorCode);
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -2275,10 +2200,9 @@ unorm_normalize(const UChar *src, int32_t srcLength,
return 0;
}
return unorm_internalNormalize(&dest, &destCapacity,
return unorm_internalNormalize(dest, destCapacity,
src, srcLength,
mode, (UBool)((option&UNORM_IGNORE_HANGUL)!=0),
NULL, NULL,
pErrorCode);
}
@ -2288,7 +2212,7 @@ unorm_normalize(const UChar *src, int32_t srcLength,
/*
* These iteration functions are the core implementations of the
* Normalizer class iteration API.
* They read from a CharacterIterator into their own buffer
* They read from a UCharIterator into their own buffer
* and normalize into the Normalizer iteration buffer.
* Normalizer itself then iterates over its buffer until that needs to be
* filled again.
@ -2302,11 +2226,11 @@ unorm_normalize(const UChar *src, int32_t srcLength,
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
*/
static inline uint32_t
_getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
_getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
uint32_t norm32;
/* need src.hasPrevious() */
c=src.previous();
c=src.previous(&src);
c2=0;
/* check for a surrogate before getting norm32 to see if we need to predecrement further */
@ -2314,10 +2238,10 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
return 0;
} else if(!UTF_IS_SURROGATE(c)) {
return _getNorm32(c);
} else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious()) {
} else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious(&src)) {
/* unpaired surrogate */
return 0;
} else if(UTF_IS_FIRST_SURROGATE(c2=src.previous())) {
} else if(UTF_IS_FIRST_SURROGATE(c2=src.previous(&src))) {
norm32=_getNorm32(c2);
if((norm32&mask)==0) {
/* all surrogate pairs with this lead surrogate have irrelevant data */
@ -2328,7 +2252,7 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
}
} else {
/* unpaired second surrogate, undo the c2=src.previous() movement */
src.move(1, CharacterIterator::kCurrent);
src.move(&src, 1, UITERATOR_CURRENT);
return 0;
}
}
@ -2338,14 +2262,14 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
*/
typedef UBool
IsPrevBoundaryFn(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
IsPrevBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
/*
* read backwards and check if the combining class is 0
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
*/
static UBool
_isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
_isPrevCCZero(UCharIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
return (_getPrevNorm32(src, minC, ccMask, c, c2)&ccMask)==0;
}
@ -2355,7 +2279,7 @@ _isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c,
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
*/
static UBool
_isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
_isPrevTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
uint32_t norm32, decompQCMask;
decompQCMask=(ccOrQCMask<<2)&0xf; /* decomposition quick check mask */
@ -2364,7 +2288,7 @@ _isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, U
}
static int32_t
_findPreviousIterationBoundary(CharacterIterator &src,
_findPreviousIterationBoundary(UCharIterator &src,
IsPrevBoundaryFn *isPrevBoundary, uint32_t minC, uint32_t mask,
UChar *&buffer, int32_t &bufferCapacity,
int32_t &startIndex,
@ -2377,7 +2301,7 @@ _findPreviousIterationBoundary(CharacterIterator &src,
stackBuffer=buffer;
startIndex=bufferCapacity; /* fill the buffer from the end backwards */
while(src.hasPrevious()) {
while(src.hasPrevious(&src)) {
isBoundary=isPrevBoundary(src, minC, mask, c, c2);
/* always write this character to the front of the buffer */
@ -2387,7 +2311,7 @@ _findPreviousIterationBoundary(CharacterIterator &src,
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.setToStart();
src.move(&src, 0, UITERATOR_START);
return 0;
}
@ -2412,10 +2336,9 @@ _findPreviousIterationBoundary(CharacterIterator &src,
}
U_CFUNC int32_t
unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
CharacterIterator &src,
unorm_previousNormalize(UChar *dest, int32_t destCapacity,
UCharIterator *src,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
UChar stackBuffer[40];
UChar *buffer;
@ -2443,15 +2366,30 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
break;
case UNORM_NONE:
if(src.hasPrevious()) {
UChar32 c=src.previous32();
destLength=0;
if(src->hasPrevious(src)) {
UChar c, c2;
destLength=0;
UTF_APPEND_CHAR_UNSAFE(dest, destLength, c);
return destLength;
} else {
return 0;
c=src->previous(src);
destLength=1;
if(UTF_IS_TRAIL(c) && src->hasPrevious(src)) {
c2=src->previous(src);
if(UTF_IS_LEAD(c2)) {
if(destCapacity>=2) {
dest[1]=c; /* trail surrogate */
destLength=2;
}
c=c2; /* lead surrogate to be written below */
} else {
src->move(src, 1, UITERATOR_CURRENT);
}
}
if(destCapacity>0) {
dest[0]=c;
}
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -2459,16 +2397,16 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
buffer=stackBuffer;
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
bufferLength=_findPreviousIterationBoundary(src,
bufferLength=_findPreviousIterationBoundary(*src,
isPreviousBoundary, minC, mask,
buffer, bufferCapacity,
startIndex,
pErrorCode);
if(bufferLength>0) {
destLength=unorm_internalNormalize(&dest, &destCapacity,
destLength=unorm_internalNormalize(dest, destCapacity,
buffer+startIndex, bufferLength,
mode, ignoreHangul,
growBuffer, context, pErrorCode);
pErrorCode);
} else {
destLength=0;
}
@ -2490,11 +2428,11 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
* always reads complete characters
*/
static inline uint32_t
_getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
_getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
uint32_t norm32;
/* need src.hasNext() */
c=src.nextPostInc();
/* need src.hasNext() to be true */
c=src.next(&src);
c2=0;
if(c<minC) {
@ -2502,14 +2440,19 @@ _getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
}
norm32=_getNorm32(c);
if(UTF_IS_FIRST_SURROGATE(c) && src.hasNext() && UTF_IS_SECOND_SURROGATE(c2=src.current())) {
src.move(1, CharacterIterator::kCurrent); /* skip the c2 surrogate */
if((norm32&mask)==0) {
/* irrelevant data */
return 0;
if(UTF_IS_FIRST_SURROGATE(c)) {
if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=src.current(&src))) {
src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */
if((norm32&mask)==0) {
/* irrelevant data */
return 0;
} else {
/* norm32 must be a surrogate special */
return _getNorm32FromSurrogatePair(norm32, c2);
}
} else {
/* norm32 must be a surrogate special */
return _getNorm32FromSurrogatePair(norm32, c2);
/* unmatched surrogate */
return 0;
}
}
return norm32;
@ -2520,14 +2463,14 @@ _getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
* if c2!=0 then (c, c2) is a surrogate pair
*/
typedef UBool
IsNextBoundaryFn(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
IsNextBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
/*
* read forward and check if the combining class is 0
* if c2!=0 then (c, c2) is a surrogate pair
*/
static UBool
_isNextCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
_isNextCCZero(UCharIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
return (_getNextNorm32(src, minC, ccMask, c, c2)&ccMask)==0;
}
@ -2537,7 +2480,7 @@ _isNextCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c,
* if c2!=0 then (c, c2) is a surrogate pair
*/
static UBool
_isNextTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
_isNextTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
uint32_t norm32, decompQCMask;
decompQCMask=(ccOrQCMask<<2)&0xf; /* decomposition quick check mask */
@ -2546,7 +2489,7 @@ _isNextTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, U
}
static int32_t
_findNextIterationBoundary(CharacterIterator &src,
_findNextIterationBoundary(UCharIterator &src,
IsNextBoundaryFn *isNextBoundary, uint32_t minC, uint32_t mask,
UChar *&buffer, int32_t &bufferCapacity,
UErrorCode *pErrorCode) {
@ -2554,7 +2497,7 @@ _findNextIterationBoundary(CharacterIterator &src,
int32_t bufferIndex;
UChar c, c2;
if(!src.hasNext()) {
if(!src.hasNext(&src)) {
return 0;
}
@ -2562,20 +2505,22 @@ _findNextIterationBoundary(CharacterIterator &src,
stackBuffer=buffer;
/* get one character and ignore its properties */
buffer[0]=c=src.current();
buffer[0]=c=src.next(&src);
bufferIndex=1;
c2=src.next();
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2)) {
buffer[bufferIndex++]=c2;
src.move(1, CharacterIterator::kCurrent); /* skip the c2 surrogate */
if(UTF_IS_FIRST_SURROGATE(c) && src.hasNext(&src)) {
if(UTF_IS_SECOND_SURROGATE(c2=src.next(&src))) {
buffer[bufferIndex++]=c2;
} else {
src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */
}
}
/* get all following characters until we see a boundary */
/* checking hasNext() instead of c!=DONE on the off-chance that U+ffff is part of the string */
while(src.hasNext()) {
while(src.hasNext(&src)) {
if(isNextBoundary(src, minC, mask, c, c2)) {
/* back out the latest movement to stop at the boundary */
src.move(c2==0 ? -1 : -2, CharacterIterator::kCurrent);
src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT);
break;
} else {
if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
@ -2590,7 +2535,7 @@ _findNextIterationBoundary(CharacterIterator &src,
}
} else {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.setToEnd();
src.move(&src, 0, UITERATOR_END);
return 0;
}
}
@ -2601,10 +2546,9 @@ _findNextIterationBoundary(CharacterIterator &src,
}
U_CFUNC int32_t
unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
CharacterIterator &src,
unorm_nextNormalize(UChar *dest, int32_t destCapacity,
UCharIterator *src,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode) {
UChar stackBuffer[40];
UChar *buffer;
@ -2632,15 +2576,30 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
break;
case UNORM_NONE:
if(src.hasNext()) {
UChar32 c=src.next32PostInc();
destLength=0;
if(src->hasNext(src)) {
UChar c, c2;
destLength=0;
UTF_APPEND_CHAR_UNSAFE(dest, destLength, c);
return destLength;
} else {
return 0;
c=src->next(src);
destLength=1;
if(UTF_IS_LEAD(c) && src->hasNext(src)) {
c2=src->next(src);
if(UTF_IS_TRAIL(c2)) {
if(destCapacity>=2) {
dest[1]=c2; /* trail surrogate */
destLength=2;
}
/* lead surrogate to be written below */
} else {
src->move(src, -1, UITERATOR_CURRENT);
}
}
if(destCapacity>0) {
dest[0]=c;
}
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -2648,15 +2607,15 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
buffer=stackBuffer;
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
bufferLength=_findNextIterationBoundary(src,
bufferLength=_findNextIterationBoundary(*src,
isNextBoundary, minC, mask,
buffer, bufferCapacity,
pErrorCode);
if(bufferLength>0) {
destLength=unorm_internalNormalize(&dest, &destCapacity,
destLength=unorm_internalNormalize(dest, destCapacity,
buffer, bufferLength,
mode, ignoreHangul,
growBuffer, context, pErrorCode);
pErrorCode);
} else {
destLength=0;
}
@ -2674,4 +2633,3 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
* and if not, how hard it would be to improve it.
* For example, see _findSafeFCD().
*/

View File

@ -155,14 +155,13 @@ unorm_haveData(UErrorCode *pErrorCode);
/**
* Internal API for normalizing.
* Does not check for bad input and uses growBuffer.
* Does not check for bad input.
* @internal
*/
U_CAPI int32_t U_EXPORT2
unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode);
/**
@ -170,10 +169,9 @@ unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
* @internal
*/
U_CAPI int32_t U_EXPORT2
unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
unorm_decompose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode);
/**
@ -181,10 +179,9 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
* @internal
*/
U_CAPI int32_t U_EXPORT2
unorm_compose(UChar **pDest, int32_t *pDestCapacity,
unorm_compose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode);
/**
@ -250,15 +247,103 @@ unorm_getFCD16FromSurrogatePair(const uint16_t *fcdTrieIndex, uint16_t fcd16, UC
];
}
#endif
U_CDECL_BEGIN
struct UCharIterator;
typedef struct UCharIterator UCharIterator;
enum UCharIteratorOrigin {
UITERATOR_START, UITERATOR_CURRENT, UITERATOR_END
};
typedef enum UCharIteratorOrigin UCharIteratorOrigin;
/**
* C API for code unit iteration.
* This can be used as a C wrapper around
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
*
* @internal for normalization
*/
struct UCharIterator {
/**
* (protected) Pointer to string or wrapped object or similar.
* Not used by caller.
*/
const void *context;
/**
* (protected) Length of string or similar.
* Not used by caller.
*/
int32_t length;
/**
* (protected) Current index or similar.
* Not used by caller.
*/
int32_t index;
/**
* (public) Moves the current position relative to the start or end of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
*
* @param delta can be positive, zero, or negative
* @param origin move relative to the start, end, or current index
* @return the new index
*/
int32_t U_CALLCONV
(*move)(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
/**
* (public) Check if current() and next() can still
* return another code unit.
*/
UBool U_CALLCONV
(*hasNext)(UCharIterator *iter);
/**
* (public) Check if previous() can still return another code unit.
*/
UBool U_CALLCONV
(*hasPrevious)(UCharIterator *iter);
/**
* (public) Return the code unit at the current position,
* or 0xffff if there is none (index is at the end).
*/
UChar U_CALLCONV
(*current)(UCharIterator *iter);
/**
* (public) Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return 0xffff if there is none (index is at the end).
*/
UChar U_CALLCONV
(*next)(UCharIterator *iter);
/**
* (public) Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return 0xffff if there is none (index is at the start).
*/
UChar U_CALLCONV
(*previous)(UCharIterator *iter);
};
/**
* Internal API for iterative normalizing - see Normalizer.
* @internal
*/
U_CFUNC int32_t
unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
U_NAMESPACE_QUALIFIER CharacterIterator &src,
unorm_nextNormalize(UChar *dest, int32_t destCapacity,
UCharIterator *src,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode);
/**
@ -266,13 +351,12 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
* @internal
*/
U_CFUNC int32_t
unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
U_NAMESPACE_QUALIFIER CharacterIterator &src,
unorm_previousNormalize(UChar *dest, int32_t destCapacity,
UCharIterator *src,
UNormalizationMode mode, UBool ignoreHangul,
UGrowBuffer *growBuffer, void *context,
UErrorCode *pErrorCode);
#endif
U_CDECL_END
/**
* Description of the format of unorm.dat.

View File

@ -760,30 +760,33 @@ void collIterNormalize(collIterate *collationSource)
UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */
int32_t normLen;
normLen = unorm_decompose(&collationSource->writableBuffer, (int32_t *)&collationSource->writableBufSize,
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
srcP, (int32_t)(endP - srcP),
FALSE, FALSE,
u_growBufferFromStatic, collationSource->stackWritableBuffer,
&status);
if (U_FAILURE(status)) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status));
#endif
return;
}
if(status == U_STRING_NOT_TERMINATED_WARNING) {
if(status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
// reallocate and terminate
if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
&collationSource->writableBuffer,
(int32_t *)&collationSource->writableBufSize, normLen + 1,
normLen)
0)
) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), out of memory\n");
#endif
return;
}
collationSource->writableBuffer[normLen] = 0;
status = U_ZERO_ERROR;
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
srcP, (int32_t)(endP - srcP),
FALSE, FALSE,
&status);
}
if (U_FAILURE(status)) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status));
#endif
return;
}
if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
@ -3343,11 +3346,24 @@ ucol_calcSortKey(const UCollator *coll,
}
if(normMode != UNORM_NONE && UNORM_YES != unorm_quickCheck(source, len, normMode, status)) {
len = unorm_internalNormalize(&normSource, &normSourceLen,
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
normMode, FALSE,
u_growBufferFromStatic, normBuffer,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
normSourceLen = len;
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
if(normSource == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
*status = U_ZERO_ERROR;
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
normMode, FALSE,
status);
}
if(U_FAILURE(*status)) {
return 0;
}
@ -3835,11 +3851,24 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
/* If we need to normalize, we'll do it all at once at the beginning! */
if(coll->normalizationMode != UCOL_OFF && UNORM_YES != unorm_quickCheck(source, len, UNORM_FCD, status)) {
len = unorm_internalNormalize(&normSource, &normSourceLen,
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
UNORM_FCD, FALSE,
u_growBufferFromStatic, normBuffer,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
normSourceLen = len;
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
if(normSource == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
*status = U_ZERO_ERROR;
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
UNORM_FCD, FALSE,
status);
}
if(U_FAILURE(*status)) {
return 0;
}
@ -4678,11 +4707,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
status = U_ZERO_ERROR;
if (unorm_quickCheck(sColl->string, sLen, UNORM_NFD, &status) != UNORM_YES) {
sLen = unorm_decompose(&sColl->writableBuffer, (int32_t *)&sColl->writableBufSize,
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
sBuf, sLen,
FALSE, FALSE,
u_growBufferFromStatic, sColl->stackWritableBuffer,
&status);
if(status == U_BUFFER_OVERFLOW_ERROR) {
if(!u_growBufferFromStatic(sColl->stackWritableBuffer,
&sColl->writableBuffer,
(int32_t *)&sColl->writableBufSize, sLen,
0)
) {
return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */
}
status = U_ZERO_ERROR;
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
sBuf, sLen,
FALSE, FALSE,
&status);
}
sBuf = sColl->writableBuffer;
if (sBuf != sColl->stackWritableBuffer) {
sColl->flags |= UCOL_ITER_ALLOCATED;
@ -4691,11 +4733,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
status = U_ZERO_ERROR;
if (unorm_quickCheck(tColl->string, tLen, UNORM_NFD, &status) != UNORM_YES) {
tLen = unorm_decompose(&tColl->writableBuffer, (int32_t *)&tColl->writableBufSize,
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
tBuf, tLen,
FALSE, FALSE,
u_growBufferFromStatic, tColl->stackWritableBuffer,
&status);
if(status == U_BUFFER_OVERFLOW_ERROR) {
if(!u_growBufferFromStatic(tColl->stackWritableBuffer,
&tColl->writableBuffer,
(int32_t *)&tColl->writableBufSize, tLen,
0)
) {
return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */
}
status = U_ZERO_ERROR;
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
tBuf, tLen,
FALSE, FALSE,
&status);
}
tBuf = tColl->writableBuffer;
if (tBuf != tColl->stackWritableBuffer) {
tColl->flags |= UCOL_ITER_ALLOCATED;