ICU-1007 simplify internal normalization api (no growBuffers, c UCharIterator)
X-SVN-Rev: 6339
This commit is contained in:
parent
b464fd7971
commit
237bf8e91e
@ -12,8 +12,61 @@
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "cmemory.h"
|
||||
#include "unormimp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/*
|
||||
* This is wrapper code around a C++ CharacterIterator to
|
||||
* look like a C UCharIterator for the internal API
|
||||
* for incremental normalization.
|
||||
*
|
||||
* The UCharIterator.context field holds a pointer to the CharacterIterator.
|
||||
*/
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
|
||||
return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
characterIteratorHasNext(UCharIterator *iter) {
|
||||
return ((CharacterIterator *)(iter->context))->hasNext();
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
characterIteratorHasPrevious(UCharIterator *iter) {
|
||||
return ((CharacterIterator *)(iter->context))->hasPrevious();
|
||||
}
|
||||
|
||||
static UChar U_CALLCONV
|
||||
characterIteratorCurrent(UCharIterator *iter) {
|
||||
return ((CharacterIterator *)(iter->context))->current();
|
||||
}
|
||||
|
||||
static UChar U_CALLCONV
|
||||
characterIteratorNext(UCharIterator *iter) {
|
||||
return ((CharacterIterator *)(iter->context))->nextPostInc();
|
||||
}
|
||||
|
||||
static UChar U_CALLCONV
|
||||
characterIteratorPrevious(UCharIterator *iter) {
|
||||
return ((CharacterIterator *)(iter->context))->previous();
|
||||
}
|
||||
|
||||
static const UCharIterator characterIteratorWrapper={
|
||||
0, 0, 0,
|
||||
characterIteratorMove,
|
||||
characterIteratorHasNext,
|
||||
characterIteratorHasPrevious,
|
||||
characterIteratorCurrent,
|
||||
characterIteratorNext,
|
||||
characterIteratorPrevious
|
||||
};
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
@ -22,29 +75,26 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
|
||||
fUMode(mode), fOptions(0),
|
||||
text(new StringCharacterIterator(str)),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(new StringCharacterIterator(str));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
|
||||
fUMode(mode), fOptions(0),
|
||||
text(new UCharCharacterIterator(str, length)),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(new UCharCharacterIterator(str, length));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
|
||||
fUMode(mode), fOptions(0),
|
||||
text(iter.clone()),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(iter.clone());
|
||||
}
|
||||
|
||||
// deprecated constructors
|
||||
@ -52,71 +102,71 @@ Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
|
||||
Normalizer::Normalizer(const UnicodeString& str,
|
||||
EMode mode) :
|
||||
fUMode(getUMode(mode)), fOptions(0),
|
||||
text(new StringCharacterIterator(str)),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(new StringCharacterIterator(str));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const UnicodeString& str,
|
||||
EMode mode,
|
||||
int32_t options) :
|
||||
fUMode(getUMode(mode)), fOptions(options),
|
||||
text(new StringCharacterIterator(str)),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(new StringCharacterIterator(str));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const UChar *str, int32_t length, EMode mode) :
|
||||
fUMode(getUMode(mode)), fOptions(0),
|
||||
text(new UCharCharacterIterator(str, length)),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(new UCharCharacterIterator(str, length));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const CharacterIterator& iter,
|
||||
EMode mode) :
|
||||
fUMode(getUMode(mode)), fOptions(0),
|
||||
text(iter.clone()),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(iter.clone());
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const CharacterIterator& iter,
|
||||
EMode mode,
|
||||
int32_t options) :
|
||||
fUMode(getUMode(mode)), fOptions(options),
|
||||
text(iter.clone()),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
checkData();
|
||||
init(iter.clone());
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const Normalizer ©) :
|
||||
fUMode(copy.fUMode), fOptions(copy.fOptions),
|
||||
text(copy.text->clone()),
|
||||
currentIndex(copy.nextIndex), nextIndex(copy.nextIndex),
|
||||
buffer(copy.buffer), bufferPos(copy.bufferPos)
|
||||
{
|
||||
checkData();
|
||||
init(((CharacterIterator *)(copy.text->context))->clone());
|
||||
}
|
||||
|
||||
static const UChar _NUL=0;
|
||||
|
||||
void
|
||||
Normalizer::checkData() {
|
||||
Normalizer::init(CharacterIterator *iter) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(!unorm_haveData(&errorCode)) {
|
||||
delete text;
|
||||
text=new UCharCharacterIterator(&_NUL, 0);
|
||||
|
||||
text=new UCharIterator;
|
||||
uprv_memcpy(text, &characterIteratorWrapper, sizeof(UCharIterator));
|
||||
|
||||
if(unorm_haveData(&errorCode)) {
|
||||
text->context=iter;
|
||||
} else {
|
||||
delete iter;
|
||||
text->context=new UCharCharacterIterator(&_NUL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,7 +190,7 @@ Normalizer::clone() const
|
||||
*/
|
||||
int32_t Normalizer::hashCode() const
|
||||
{
|
||||
return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
|
||||
return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
|
||||
}
|
||||
|
||||
UBool Normalizer::operator==(const Normalizer& that) const
|
||||
@ -149,7 +199,7 @@ UBool Normalizer::operator==(const Normalizer& that) const
|
||||
this==&that ||
|
||||
fUMode==that.fUMode &&
|
||||
fOptions==that.fOptions &&
|
||||
*text==*(that.text) &&
|
||||
*((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
|
||||
buffer==that.buffer &&
|
||||
bufferPos==that.bufferPos &&
|
||||
nextIndex==that.nextIndex;
|
||||
@ -167,13 +217,22 @@ Normalizer::normalize(const UnicodeString& source,
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_internalNormalize(&result.fArray, &result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
UChar *buffer=result.getBuffer(source.length());
|
||||
int32_t length=unorm_internalNormalize(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
mode, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=result.getBuffer(length);
|
||||
length=unorm_internalNormalize(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
mode, (options&IGNORE_HANGUL)!=0,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
@ -188,7 +247,7 @@ Normalizer::quickCheck(const UnicodeString& source,
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
|
||||
return unorm_quickCheck(source.fArray, source.length(),
|
||||
return unorm_quickCheck(source.getBuffer(), source.length(),
|
||||
mode, &status);
|
||||
}
|
||||
|
||||
@ -200,13 +259,22 @@ Normalizer::compose(const UnicodeString& source,
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_compose(&result.fArray, &result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
UChar *buffer=result.getBuffer(source.length());
|
||||
int32_t length=unorm_compose(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=result.getBuffer(length);
|
||||
length=unorm_compose(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
@ -221,13 +289,22 @@ Normalizer::decompose(const UnicodeString& source,
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_decompose(&result.fArray, &result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
UChar *buffer=result.getBuffer(source.length());
|
||||
int32_t length=unorm_compose(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=result.getBuffer(length);
|
||||
length=unorm_decompose(buffer, result.getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
&status);
|
||||
result.releaseBuffer(length);
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
@ -239,7 +316,7 @@ Normalizer::decompose(const UnicodeString& source,
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return the current character in the normalized text.
|
||||
* Return the current character in the normalized text->
|
||||
*/
|
||||
UChar32 Normalizer::current() {
|
||||
if(bufferPos<buffer.length() || nextNormalize()) {
|
||||
@ -280,15 +357,13 @@ UChar32 Normalizer::previous() {
|
||||
}
|
||||
|
||||
void Normalizer::reset() {
|
||||
text->setToStart();
|
||||
currentIndex=nextIndex=text->getIndex();
|
||||
currentIndex=nextIndex=text->move(text, 0, UITERATOR_START);
|
||||
clearBuffer();
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer::setIndexOnly(UTextOffset index) {
|
||||
text->setIndex(index);
|
||||
currentIndex=nextIndex=text->getIndex(); // validates index
|
||||
currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index
|
||||
clearBuffer();
|
||||
}
|
||||
|
||||
@ -303,7 +378,7 @@ Normalizer::setIndexOnly(UTextOffset index) {
|
||||
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
|
||||
* returned from <tt>setIndex</tt> and {@link #getIndex}.
|
||||
* <p>
|
||||
* @param index the desired index in the input text.
|
||||
* @param index the desired index in the input text->
|
||||
*
|
||||
* @return the first normalized character that is the result of iterating
|
||||
* forward starting at the given index.
|
||||
@ -317,8 +392,8 @@ UChar32 Normalizer::setIndex(UTextOffset index) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first character in the normalized text. This resets
|
||||
* the <tt>Normalizer's</tt> position to the beginning of the text.
|
||||
* Return the first character in the normalized text-> This resets
|
||||
* the <tt>Normalizer's</tt> position to the beginning of the text->
|
||||
*/
|
||||
UChar32 Normalizer::first() {
|
||||
reset();
|
||||
@ -326,13 +401,12 @@ UChar32 Normalizer::first() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the last character in the normalized text. This resets
|
||||
* Return the last character in the normalized text-> This resets
|
||||
* the <tt>Normalizer's</tt> position to be just before the
|
||||
* the input text corresponding to that normalized character.
|
||||
*/
|
||||
UChar32 Normalizer::last() {
|
||||
text->setToEnd();
|
||||
currentIndex=nextIndex=text->getIndex();
|
||||
currentIndex=nextIndex=text->move(text, 0, UITERATOR_END);
|
||||
clearBuffer();
|
||||
return previous();
|
||||
}
|
||||
@ -360,21 +434,21 @@ UTextOffset Normalizer::getIndex() const {
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the index of the start of the input text. This is the begin index
|
||||
* Retrieve the index of the start of the input text-> This is the begin index
|
||||
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset Normalizer::startIndex() const {
|
||||
return text->startIndex();
|
||||
return text->move(text, 0, UITERATOR_START);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the index of the end of the input text. This is the end index
|
||||
* Retrieve the index of the end of the input text-> This is the end index
|
||||
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset Normalizer::endIndex() const {
|
||||
return text->endIndex();
|
||||
return text->move(text, 0, UITERATOR_END);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
@ -412,7 +486,7 @@ Normalizer::getOption(int32_t option) const
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning of the input text.
|
||||
* The iteration position is set to the beginning of the input text->
|
||||
*/
|
||||
void
|
||||
Normalizer::setText(const UnicodeString& newText,
|
||||
@ -426,8 +500,8 @@ Normalizer::setText(const UnicodeString& newText,
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete text;
|
||||
text = newIter;
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -447,8 +521,8 @@ Normalizer::setText(const CharacterIterator& newText,
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete text;
|
||||
text = newIter;
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -465,8 +539,8 @@ Normalizer::setText(const UChar* newText,
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete text;
|
||||
text = newIter;
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -477,7 +551,7 @@ Normalizer::setText(const UChar* newText,
|
||||
void
|
||||
Normalizer::getText(UnicodeString& result)
|
||||
{
|
||||
text->getText(result);
|
||||
((CharacterIterator *)(text->context))->getText(result);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
@ -491,40 +565,69 @@ void Normalizer::clearBuffer() {
|
||||
|
||||
UBool
|
||||
Normalizer::nextNormalize() {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UChar *p;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
clearBuffer();
|
||||
currentIndex=nextIndex;
|
||||
text->setIndex(nextIndex);
|
||||
if(!text->hasNext()) {
|
||||
text->move(text, nextIndex, UITERATOR_START);
|
||||
if(!text->hasNext(text)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
buffer.fLength=unorm_nextNormalize(buffer.fArray, buffer.fCapacity, *text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &buffer,
|
||||
&errorCode);
|
||||
nextIndex=text->getIndex();
|
||||
return U_SUCCESS(errorCode) && buffer.length()>0;
|
||||
errorCode=U_ZERO_ERROR;
|
||||
p=buffer.getBuffer(-1);
|
||||
length=unorm_nextNormalize(p, buffer.getCapacity(), text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(length);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
text->move(text, nextIndex, UITERATOR_START);
|
||||
p=buffer.getBuffer(length);
|
||||
length=unorm_nextNormalize(p, buffer.getCapacity(), text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(length);
|
||||
}
|
||||
|
||||
nextIndex=text->move(text, 0, UITERATOR_CURRENT);
|
||||
return U_SUCCESS(errorCode) && !buffer.isEmpty();
|
||||
}
|
||||
|
||||
UBool
|
||||
Normalizer::previousNormalize() {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UChar *p;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
clearBuffer();
|
||||
nextIndex=currentIndex;
|
||||
text->setIndex(currentIndex);
|
||||
if(!text->hasPrevious()) {
|
||||
text->move(text, currentIndex, UITERATOR_START);
|
||||
if(!text->hasPrevious(text)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
buffer.fLength=unorm_previousNormalize(buffer.fArray, buffer.fCapacity, *text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &buffer,
|
||||
&errorCode);
|
||||
errorCode=U_ZERO_ERROR;
|
||||
p=buffer.getBuffer(-1);
|
||||
length=unorm_previousNormalize(p, buffer.getCapacity(), text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(length);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
text->move(text, currentIndex, UITERATOR_START);
|
||||
p=buffer.getBuffer(length);
|
||||
length=unorm_previousNormalize(p, buffer.getCapacity(), text,
|
||||
fUMode, (fOptions&IGNORE_HANGUL)!=0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(length);
|
||||
}
|
||||
|
||||
bufferPos=buffer.length();
|
||||
currentIndex=text->getIndex();
|
||||
return U_SUCCESS(errorCode) && buffer.length()>0;
|
||||
currentIndex=text->move(text, 0, UITERATOR_CURRENT);
|
||||
return U_SUCCESS(errorCode) && !buffer.isEmpty();
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -14,6 +14,9 @@
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/unorm.h"
|
||||
|
||||
struct UCharIterator;
|
||||
typedef struct UCharIterator UCharIterator;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* \file
|
||||
@ -881,7 +884,7 @@ private:
|
||||
UBool nextNormalize();
|
||||
UBool previousNormalize();
|
||||
|
||||
void checkData();
|
||||
void init(CharacterIterator *iter);
|
||||
void clearBuffer(void);
|
||||
|
||||
// Helper, without UErrorCode, for easier transitional code
|
||||
@ -896,7 +899,7 @@ private:
|
||||
int32_t fOptions;
|
||||
|
||||
// The input text and our position in it
|
||||
CharacterIterator* text;
|
||||
UCharIterator *text;
|
||||
|
||||
// The normalization buffer is the result of normalization
|
||||
// of the source in [currentIndex..nextIndex[ .
|
||||
|
@ -831,10 +831,9 @@ unorm_quickCheck(const UChar *src,
|
||||
/* make NFD & NFKD ---------------------------------------------------------- */
|
||||
|
||||
static int32_t
|
||||
_decompose(UChar *&dest, int32_t &destCapacity,
|
||||
_decompose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
uint8_t &outTrailCC,
|
||||
UErrorCode * /*pErrorCode*/) {
|
||||
UChar buffer[3];
|
||||
@ -843,7 +842,6 @@ _decompose(UChar *&dest, int32_t &destCapacity,
|
||||
int32_t destIndex, reorderStartIndex, length;
|
||||
UChar c, c2, minNoMaybe;
|
||||
uint8_t cc, prevCC, trailCC;
|
||||
UBool canGrow;
|
||||
|
||||
if(!compat) {
|
||||
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
|
||||
@ -862,9 +860,6 @@ _decompose(UChar *&dest, int32_t &destCapacity,
|
||||
norm32=0;
|
||||
c=0;
|
||||
|
||||
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
|
||||
canGrow=(UBool)(growBuffer!=NULL);
|
||||
|
||||
if(srcLength>=0) {
|
||||
/* string with length */
|
||||
limit=src+srcLength;
|
||||
@ -893,14 +888,7 @@ _decompose(UChar *&dest, int32_t &destCapacity,
|
||||
/* copy these code units all at once */
|
||||
if(src!=prevSrc) {
|
||||
length=(int32_t)(src-prevSrc);
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
@ -988,14 +976,7 @@ _decompose(UChar *&dest, int32_t &destCapacity,
|
||||
}
|
||||
|
||||
/* append the decomposition to the destination buffer, assume length>0 */
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
UChar *reorderSplit=dest+destIndex;
|
||||
if(p==NULL) {
|
||||
/* fastpath: single code point */
|
||||
@ -1040,10 +1021,9 @@ _decompose(UChar *&dest, int32_t &destCapacity,
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_decompose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t destIndex;
|
||||
uint8_t trailCC;
|
||||
@ -1052,14 +1032,13 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
|
||||
return 0;
|
||||
}
|
||||
|
||||
destIndex=_decompose(*pDest, *pDestCapacity,
|
||||
destIndex=_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
compat, ignoreHangul,
|
||||
growBuffer, context,
|
||||
trailCC,
|
||||
pErrorCode);
|
||||
|
||||
return u_terminateUChars(*pDest, *pDestCapacity, destIndex, pErrorCode);
|
||||
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
|
||||
}
|
||||
|
||||
/* make FCD ----------------------------------------------------------------- */
|
||||
@ -1118,8 +1097,7 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
|
||||
|
||||
static uint8_t
|
||||
_decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
|
||||
UChar *&dest, int32_t &destIndex, int32_t &destCapacity,
|
||||
UBool canGrow, UGrowBuffer *growBuffer, void *context) {
|
||||
UChar *dest, int32_t &destIndex, int32_t destCapacity) {
|
||||
const UChar *p;
|
||||
uint32_t norm32;
|
||||
int32_t reorderStartIndex, length;
|
||||
@ -1181,14 +1159,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
|
||||
}
|
||||
|
||||
/* append the decomposition to the destination buffer, assume length>0 */
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
UChar *reorderSplit=dest+destIndex;
|
||||
if(p==NULL) {
|
||||
/* fastpath: single code point */
|
||||
@ -1232,16 +1203,14 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
|
||||
}
|
||||
|
||||
static int32_t
|
||||
unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
|
||||
unorm_makeFCD(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *limit, *prevSrc, *decompStart;
|
||||
int32_t destIndex, length;
|
||||
UChar c, c2;
|
||||
uint16_t fcd16;
|
||||
int16_t prevCC, cc;
|
||||
UBool canGrow;
|
||||
|
||||
if(!_haveData(*pErrorCode)) {
|
||||
return 0;
|
||||
@ -1256,9 +1225,6 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
|
||||
c=0;
|
||||
fcd16=0;
|
||||
|
||||
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
|
||||
canGrow=(UBool)(growBuffer!=NULL);
|
||||
|
||||
if(srcLength>=0) {
|
||||
/* string with length */
|
||||
limit=src+srcLength;
|
||||
@ -1313,14 +1279,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
|
||||
/* copy these code units all at once */
|
||||
if(src!=prevSrc) {
|
||||
length=(int32_t)(src-prevSrc);
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
@ -1384,14 +1343,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
|
||||
|
||||
/* just append (c, c2) */
|
||||
length= c2==0 ? 1 : 2;
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
dest[destIndex++]=c;
|
||||
if(c2!=0) {
|
||||
dest[destIndex++]=c2;
|
||||
@ -1418,8 +1370,7 @@ unorm_makeFCD(UChar *&dest, int32_t &destCapacity,
|
||||
* decompose and reorder a limited piece of the text
|
||||
*/
|
||||
prevCC=_decomposeFCD(decompStart, src, limit,
|
||||
dest, destIndex, destCapacity,
|
||||
canGrow, growBuffer, context);
|
||||
dest, destIndex, destCapacity);
|
||||
decompStart=src;
|
||||
}
|
||||
}
|
||||
@ -1845,9 +1796,20 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
|
||||
length=_decompose(buffer, bufferCapacity,
|
||||
prevStarter, src-prevStarter,
|
||||
(decompQCMask&_NORM_QC_NFKD)!=0, FALSE,
|
||||
(UGrowBuffer*)u_growBufferFromStatic, stackBuffer,
|
||||
trailCC,
|
||||
pErrorCode);
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
length=_decompose(buffer, bufferCapacity,
|
||||
prevStarter, src-prevStarter,
|
||||
(decompQCMask&_NORM_QC_NFKD)!=0, FALSE,
|
||||
trailCC,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
/* set the next starter */
|
||||
prevStarter=src;
|
||||
@ -1864,10 +1826,9 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_compose(UChar *&dest, int32_t &destCapacity,
|
||||
_compose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool /* ### TODO: need to do this? -- ignoreHangul -- ### */,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar stackBuffer[_STACK_BUFFER_CAPACITY];
|
||||
UChar *buffer;
|
||||
@ -1878,7 +1839,6 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
int32_t destIndex, reorderStartIndex, length;
|
||||
UChar c, c2, minNoMaybe;
|
||||
uint8_t cc, prevCC;
|
||||
UBool canGrow;
|
||||
|
||||
if(!_haveData(*pErrorCode)) {
|
||||
return 0;
|
||||
@ -1918,9 +1878,6 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
norm32=0;
|
||||
c=0;
|
||||
|
||||
/* do not attempt to grow if there is no growBuffer function or if it has failed before */
|
||||
canGrow=(UBool)(growBuffer!=NULL);
|
||||
|
||||
if(srcLength>=0) {
|
||||
/* string with length */
|
||||
limit=src+srcLength;
|
||||
@ -1949,14 +1906,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
/* copy these code units all at once */
|
||||
if(src!=prevSrc) {
|
||||
length=(int32_t)(src-prevSrc);
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
@ -2098,14 +2048,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
}
|
||||
|
||||
/* append the recomposed buffer contents to the destination buffer */
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
while(length>0) {
|
||||
dest[destIndex++]=*p++;
|
||||
--length;
|
||||
@ -2122,14 +2065,7 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
}
|
||||
|
||||
/* append the single code point (c, c2) to the destination buffer */
|
||||
if( (destIndex+length)<=destCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
|
||||
limit==NULL ?
|
||||
2*destCapacity+length+20 :
|
||||
destCapacity+length+2*(limit-src)+20,
|
||||
destIndex))!=FALSE)
|
||||
) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
if(cc!=0 && cc<prevCC) {
|
||||
/* (c, c2) is out of order with respect to the preceding text */
|
||||
UChar *reorderSplit=dest+destIndex;
|
||||
@ -2160,10 +2096,9 @@ _compose(UChar *&dest, int32_t &destCapacity,
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_compose(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_compose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t destIndex;
|
||||
|
||||
@ -2171,13 +2106,12 @@ unorm_compose(UChar **pDest, int32_t *pDestCapacity,
|
||||
return 0;
|
||||
}
|
||||
|
||||
destIndex=_compose(*pDest, *pDestCapacity,
|
||||
destIndex=_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
compat, ignoreHangul,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
|
||||
return u_terminateUChars(*pDest, *pDestCapacity, destIndex, pErrorCode);
|
||||
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2191,57 +2125,48 @@ unorm_compose(UChar **pDest, int32_t *pDestCapacity,
|
||||
|
||||
/**
|
||||
* Internal API for normalizing.
|
||||
* Does not check for bad input and uses growBuffer.
|
||||
* Does not check for bad input.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
return unorm_decompose(pDest, pDestCapacity,
|
||||
return unorm_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, ignoreHangul,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
case UNORM_NFKD:
|
||||
return unorm_decompose(pDest, pDestCapacity,
|
||||
return unorm_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, ignoreHangul,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
case UNORM_NFC:
|
||||
return unorm_compose(pDest, pDestCapacity,
|
||||
return unorm_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, ignoreHangul,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
case UNORM_NFKC:
|
||||
return unorm_compose(pDest, pDestCapacity,
|
||||
return unorm_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, ignoreHangul,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
case UNORM_FCD:
|
||||
return unorm_makeFCD(*pDest, *pDestCapacity,
|
||||
return unorm_makeFCD(dest, destCapacity,
|
||||
src, srcLength,
|
||||
growBuffer, context,
|
||||
pErrorCode);
|
||||
case UNORM_NONE:
|
||||
/* just copy the string */
|
||||
if(srcLength==-1) {
|
||||
srcLength=u_strlen(src);
|
||||
}
|
||||
if( srcLength<=*pDestCapacity ||
|
||||
/* attempt to grow the buffer */
|
||||
(growBuffer!=NULL && growBuffer(context, pDest, pDestCapacity, srcLength+1, 0))
|
||||
) {
|
||||
uprv_memcpy(*pDest, src, srcLength*U_SIZEOF_UCHAR);
|
||||
if(srcLength>0 && srcLength<=destCapacity) {
|
||||
uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR);
|
||||
}
|
||||
return u_terminateUChars(*pDest, *pDestCapacity, srcLength, pErrorCode);
|
||||
return u_terminateUChars(dest, destCapacity, srcLength, pErrorCode);
|
||||
default:
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
@ -2275,10 +2200,9 @@ unorm_normalize(const UChar *src, int32_t srcLength,
|
||||
return 0;
|
||||
}
|
||||
|
||||
return unorm_internalNormalize(&dest, &destCapacity,
|
||||
return unorm_internalNormalize(dest, destCapacity,
|
||||
src, srcLength,
|
||||
mode, (UBool)((option&UNORM_IGNORE_HANGUL)!=0),
|
||||
NULL, NULL,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
@ -2288,7 +2212,7 @@ unorm_normalize(const UChar *src, int32_t srcLength,
|
||||
/*
|
||||
* These iteration functions are the core implementations of the
|
||||
* Normalizer class iteration API.
|
||||
* They read from a CharacterIterator into their own buffer
|
||||
* They read from a UCharIterator into their own buffer
|
||||
* and normalize into the Normalizer iteration buffer.
|
||||
* Normalizer itself then iterates over its buffer until that needs to be
|
||||
* filled again.
|
||||
@ -2302,11 +2226,11 @@ unorm_normalize(const UChar *src, int32_t srcLength,
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
|
||||
*/
|
||||
static inline uint32_t
|
||||
_getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
|
||||
_getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
|
||||
uint32_t norm32;
|
||||
|
||||
/* need src.hasPrevious() */
|
||||
c=src.previous();
|
||||
c=src.previous(&src);
|
||||
c2=0;
|
||||
|
||||
/* check for a surrogate before getting norm32 to see if we need to predecrement further */
|
||||
@ -2314,10 +2238,10 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
|
||||
return 0;
|
||||
} else if(!UTF_IS_SURROGATE(c)) {
|
||||
return _getNorm32(c);
|
||||
} else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious()) {
|
||||
} else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious(&src)) {
|
||||
/* unpaired surrogate */
|
||||
return 0;
|
||||
} else if(UTF_IS_FIRST_SURROGATE(c2=src.previous())) {
|
||||
} else if(UTF_IS_FIRST_SURROGATE(c2=src.previous(&src))) {
|
||||
norm32=_getNorm32(c2);
|
||||
if((norm32&mask)==0) {
|
||||
/* all surrogate pairs with this lead surrogate have irrelevant data */
|
||||
@ -2328,7 +2252,7 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
|
||||
}
|
||||
} else {
|
||||
/* unpaired second surrogate, undo the c2=src.previous() movement */
|
||||
src.move(1, CharacterIterator::kCurrent);
|
||||
src.move(&src, 1, UITERATOR_CURRENT);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -2338,14 +2262,14 @@ _getPrevNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
|
||||
*/
|
||||
typedef UBool
|
||||
IsPrevBoundaryFn(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
|
||||
IsPrevBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
|
||||
|
||||
/*
|
||||
* read backwards and check if the combining class is 0
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
|
||||
*/
|
||||
static UBool
|
||||
_isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
|
||||
_isPrevCCZero(UCharIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
|
||||
return (_getPrevNorm32(src, minC, ccMask, c, c2)&ccMask)==0;
|
||||
}
|
||||
|
||||
@ -2355,7 +2279,7 @@ _isPrevCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c,
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first surrogate but read second!)
|
||||
*/
|
||||
static UBool
|
||||
_isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
|
||||
_isPrevTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
|
||||
uint32_t norm32, decompQCMask;
|
||||
|
||||
decompQCMask=(ccOrQCMask<<2)&0xf; /* decomposition quick check mask */
|
||||
@ -2364,7 +2288,7 @@ _isPrevTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, U
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_findPreviousIterationBoundary(CharacterIterator &src,
|
||||
_findPreviousIterationBoundary(UCharIterator &src,
|
||||
IsPrevBoundaryFn *isPrevBoundary, uint32_t minC, uint32_t mask,
|
||||
UChar *&buffer, int32_t &bufferCapacity,
|
||||
int32_t &startIndex,
|
||||
@ -2377,7 +2301,7 @@ _findPreviousIterationBoundary(CharacterIterator &src,
|
||||
stackBuffer=buffer;
|
||||
startIndex=bufferCapacity; /* fill the buffer from the end backwards */
|
||||
|
||||
while(src.hasPrevious()) {
|
||||
while(src.hasPrevious(&src)) {
|
||||
isBoundary=isPrevBoundary(src, minC, mask, c, c2);
|
||||
|
||||
/* always write this character to the front of the buffer */
|
||||
@ -2387,7 +2311,7 @@ _findPreviousIterationBoundary(CharacterIterator &src,
|
||||
|
||||
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
src.setToStart();
|
||||
src.move(&src, 0, UITERATOR_START);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2412,10 +2336,9 @@ _findPreviousIterationBoundary(CharacterIterator &src,
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
CharacterIterator &src,
|
||||
unorm_previousNormalize(UChar *dest, int32_t destCapacity,
|
||||
UCharIterator *src,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar stackBuffer[40];
|
||||
UChar *buffer;
|
||||
@ -2443,15 +2366,30 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
|
||||
break;
|
||||
case UNORM_NONE:
|
||||
if(src.hasPrevious()) {
|
||||
UChar32 c=src.previous32();
|
||||
destLength=0;
|
||||
if(src->hasPrevious(src)) {
|
||||
UChar c, c2;
|
||||
|
||||
destLength=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, destLength, c);
|
||||
return destLength;
|
||||
} else {
|
||||
return 0;
|
||||
c=src->previous(src);
|
||||
destLength=1;
|
||||
if(UTF_IS_TRAIL(c) && src->hasPrevious(src)) {
|
||||
c2=src->previous(src);
|
||||
if(UTF_IS_LEAD(c2)) {
|
||||
if(destCapacity>=2) {
|
||||
dest[1]=c; /* trail surrogate */
|
||||
destLength=2;
|
||||
}
|
||||
c=c2; /* lead surrogate to be written below */
|
||||
} else {
|
||||
src->move(src, 1, UITERATOR_CURRENT);
|
||||
}
|
||||
}
|
||||
|
||||
if(destCapacity>0) {
|
||||
dest[0]=c;
|
||||
}
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
default:
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
@ -2459,16 +2397,16 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
|
||||
buffer=stackBuffer;
|
||||
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
|
||||
bufferLength=_findPreviousIterationBoundary(src,
|
||||
bufferLength=_findPreviousIterationBoundary(*src,
|
||||
isPreviousBoundary, minC, mask,
|
||||
buffer, bufferCapacity,
|
||||
startIndex,
|
||||
pErrorCode);
|
||||
if(bufferLength>0) {
|
||||
destLength=unorm_internalNormalize(&dest, &destCapacity,
|
||||
destLength=unorm_internalNormalize(dest, destCapacity,
|
||||
buffer+startIndex, bufferLength,
|
||||
mode, ignoreHangul,
|
||||
growBuffer, context, pErrorCode);
|
||||
pErrorCode);
|
||||
} else {
|
||||
destLength=0;
|
||||
}
|
||||
@ -2490,11 +2428,11 @@ unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
* always reads complete characters
|
||||
*/
|
||||
static inline uint32_t
|
||||
_getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
|
||||
_getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
|
||||
uint32_t norm32;
|
||||
|
||||
/* need src.hasNext() */
|
||||
c=src.nextPostInc();
|
||||
/* need src.hasNext() to be true */
|
||||
c=src.next(&src);
|
||||
c2=0;
|
||||
|
||||
if(c<minC) {
|
||||
@ -2502,14 +2440,19 @@ _getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
|
||||
}
|
||||
|
||||
norm32=_getNorm32(c);
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && src.hasNext() && UTF_IS_SECOND_SURROGATE(c2=src.current())) {
|
||||
src.move(1, CharacterIterator::kCurrent); /* skip the c2 surrogate */
|
||||
if((norm32&mask)==0) {
|
||||
/* irrelevant data */
|
||||
return 0;
|
||||
if(UTF_IS_FIRST_SURROGATE(c)) {
|
||||
if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=src.current(&src))) {
|
||||
src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */
|
||||
if((norm32&mask)==0) {
|
||||
/* irrelevant data */
|
||||
return 0;
|
||||
} else {
|
||||
/* norm32 must be a surrogate special */
|
||||
return _getNorm32FromSurrogatePair(norm32, c2);
|
||||
}
|
||||
} else {
|
||||
/* norm32 must be a surrogate special */
|
||||
return _getNorm32FromSurrogatePair(norm32, c2);
|
||||
/* unmatched surrogate */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return norm32;
|
||||
@ -2520,14 +2463,14 @@ _getNextNorm32(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, U
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
typedef UBool
|
||||
IsNextBoundaryFn(CharacterIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
|
||||
IsNextBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
|
||||
|
||||
/*
|
||||
* read forward and check if the combining class is 0
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
static UBool
|
||||
_isNextCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
|
||||
_isNextCCZero(UCharIterator &src, uint32_t minC, uint32_t ccMask, UChar &c, UChar &c2) {
|
||||
return (_getNextNorm32(src, minC, ccMask, c, c2)&ccMask)==0;
|
||||
}
|
||||
|
||||
@ -2537,7 +2480,7 @@ _isNextCCZero(CharacterIterator &src, uint32_t minC, uint32_t ccMask, UChar &c,
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
static UBool
|
||||
_isNextTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
|
||||
_isNextTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
|
||||
uint32_t norm32, decompQCMask;
|
||||
|
||||
decompQCMask=(ccOrQCMask<<2)&0xf; /* decomposition quick check mask */
|
||||
@ -2546,7 +2489,7 @@ _isNextTrueStarter(CharacterIterator &src, uint32_t minC, uint32_t ccOrQCMask, U
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_findNextIterationBoundary(CharacterIterator &src,
|
||||
_findNextIterationBoundary(UCharIterator &src,
|
||||
IsNextBoundaryFn *isNextBoundary, uint32_t minC, uint32_t mask,
|
||||
UChar *&buffer, int32_t &bufferCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
@ -2554,7 +2497,7 @@ _findNextIterationBoundary(CharacterIterator &src,
|
||||
int32_t bufferIndex;
|
||||
UChar c, c2;
|
||||
|
||||
if(!src.hasNext()) {
|
||||
if(!src.hasNext(&src)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2562,20 +2505,22 @@ _findNextIterationBoundary(CharacterIterator &src,
|
||||
stackBuffer=buffer;
|
||||
|
||||
/* get one character and ignore its properties */
|
||||
buffer[0]=c=src.current();
|
||||
buffer[0]=c=src.next(&src);
|
||||
bufferIndex=1;
|
||||
c2=src.next();
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2)) {
|
||||
buffer[bufferIndex++]=c2;
|
||||
src.move(1, CharacterIterator::kCurrent); /* skip the c2 surrogate */
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && src.hasNext(&src)) {
|
||||
if(UTF_IS_SECOND_SURROGATE(c2=src.next(&src))) {
|
||||
buffer[bufferIndex++]=c2;
|
||||
} else {
|
||||
src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */
|
||||
}
|
||||
}
|
||||
|
||||
/* get all following characters until we see a boundary */
|
||||
/* checking hasNext() instead of c!=DONE on the off-chance that U+ffff is part of the string */
|
||||
while(src.hasNext()) {
|
||||
while(src.hasNext(&src)) {
|
||||
if(isNextBoundary(src, minC, mask, c, c2)) {
|
||||
/* back out the latest movement to stop at the boundary */
|
||||
src.move(c2==0 ? -1 : -2, CharacterIterator::kCurrent);
|
||||
src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT);
|
||||
break;
|
||||
} else {
|
||||
if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
|
||||
@ -2590,7 +2535,7 @@ _findNextIterationBoundary(CharacterIterator &src,
|
||||
}
|
||||
} else {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
src.setToEnd();
|
||||
src.move(&src, 0, UITERATOR_END);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -2601,10 +2546,9 @@ _findNextIterationBoundary(CharacterIterator &src,
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
CharacterIterator &src,
|
||||
unorm_nextNormalize(UChar *dest, int32_t destCapacity,
|
||||
UCharIterator *src,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar stackBuffer[40];
|
||||
UChar *buffer;
|
||||
@ -2632,15 +2576,30 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
|
||||
break;
|
||||
case UNORM_NONE:
|
||||
if(src.hasNext()) {
|
||||
UChar32 c=src.next32PostInc();
|
||||
destLength=0;
|
||||
if(src->hasNext(src)) {
|
||||
UChar c, c2;
|
||||
|
||||
destLength=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, destLength, c);
|
||||
return destLength;
|
||||
} else {
|
||||
return 0;
|
||||
c=src->next(src);
|
||||
destLength=1;
|
||||
if(UTF_IS_LEAD(c) && src->hasNext(src)) {
|
||||
c2=src->next(src);
|
||||
if(UTF_IS_TRAIL(c2)) {
|
||||
if(destCapacity>=2) {
|
||||
dest[1]=c2; /* trail surrogate */
|
||||
destLength=2;
|
||||
}
|
||||
/* lead surrogate to be written below */
|
||||
} else {
|
||||
src->move(src, -1, UITERATOR_CURRENT);
|
||||
}
|
||||
}
|
||||
|
||||
if(destCapacity>0) {
|
||||
dest[0]=c;
|
||||
}
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
default:
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
@ -2648,15 +2607,15 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
|
||||
buffer=stackBuffer;
|
||||
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
|
||||
bufferLength=_findNextIterationBoundary(src,
|
||||
bufferLength=_findNextIterationBoundary(*src,
|
||||
isNextBoundary, minC, mask,
|
||||
buffer, bufferCapacity,
|
||||
pErrorCode);
|
||||
if(bufferLength>0) {
|
||||
destLength=unorm_internalNormalize(&dest, &destCapacity,
|
||||
destLength=unorm_internalNormalize(dest, destCapacity,
|
||||
buffer, bufferLength,
|
||||
mode, ignoreHangul,
|
||||
growBuffer, context, pErrorCode);
|
||||
pErrorCode);
|
||||
} else {
|
||||
destLength=0;
|
||||
}
|
||||
@ -2674,4 +2633,3 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
* and if not, how hard it would be to improve it.
|
||||
* For example, see _findSafeFCD().
|
||||
*/
|
||||
|
||||
|
@ -155,14 +155,13 @@ unorm_haveData(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Internal API for normalizing.
|
||||
* Does not check for bad input and uses growBuffer.
|
||||
* Does not check for bad input.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
@ -170,10 +169,9 @@ unorm_internalNormalize(UChar **pDest, int32_t *pDestCapacity,
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_decompose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
@ -181,10 +179,9 @@ unorm_decompose(UChar **pDest, int32_t *pDestCapacity,
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_compose(UChar **pDest, int32_t *pDestCapacity,
|
||||
unorm_compose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
@ -250,15 +247,103 @@ unorm_getFCD16FromSurrogatePair(const uint16_t *fcdTrieIndex, uint16_t fcd16, UC
|
||||
];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
struct UCharIterator;
|
||||
typedef struct UCharIterator UCharIterator;
|
||||
|
||||
enum UCharIteratorOrigin {
|
||||
UITERATOR_START, UITERATOR_CURRENT, UITERATOR_END
|
||||
};
|
||||
|
||||
typedef enum UCharIteratorOrigin UCharIteratorOrigin;
|
||||
|
||||
/**
|
||||
* C API for code unit iteration.
|
||||
* This can be used as a C wrapper around
|
||||
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
|
||||
*
|
||||
* @internal for normalization
|
||||
*/
|
||||
struct UCharIterator {
|
||||
/**
|
||||
* (protected) Pointer to string or wrapped object or similar.
|
||||
* Not used by caller.
|
||||
*/
|
||||
const void *context;
|
||||
|
||||
/**
|
||||
* (protected) Length of string or similar.
|
||||
* Not used by caller.
|
||||
*/
|
||||
int32_t length;
|
||||
|
||||
/**
|
||||
* (protected) Current index or similar.
|
||||
* Not used by caller.
|
||||
*/
|
||||
int32_t index;
|
||||
|
||||
/**
|
||||
* (public) Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
*
|
||||
* @param delta can be positive, zero, or negative
|
||||
* @param origin move relative to the start, end, or current index
|
||||
* @return the new index
|
||||
*/
|
||||
int32_t U_CALLCONV
|
||||
(*move)(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* (public) Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*/
|
||||
UBool U_CALLCONV
|
||||
(*hasNext)(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* (public) Check if previous() can still return another code unit.
|
||||
*/
|
||||
UBool U_CALLCONV
|
||||
(*hasPrevious)(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current position,
|
||||
* or 0xffff if there is none (index is at the end).
|
||||
*/
|
||||
UChar U_CALLCONV
|
||||
(*current)(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return 0xffff if there is none (index is at the end).
|
||||
*/
|
||||
UChar U_CALLCONV
|
||||
(*next)(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* (public) Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return 0xffff if there is none (index is at the start).
|
||||
*/
|
||||
UChar U_CALLCONV
|
||||
(*previous)(UCharIterator *iter);
|
||||
};
|
||||
|
||||
/**
|
||||
* Internal API for iterative normalizing - see Normalizer.
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
U_NAMESPACE_QUALIFIER CharacterIterator &src,
|
||||
unorm_nextNormalize(UChar *dest, int32_t destCapacity,
|
||||
UCharIterator *src,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
@ -266,13 +351,12 @@ unorm_nextNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
unorm_previousNormalize(UChar *&dest, int32_t &destCapacity,
|
||||
U_NAMESPACE_QUALIFIER CharacterIterator &src,
|
||||
unorm_previousNormalize(UChar *dest, int32_t destCapacity,
|
||||
UCharIterator *src,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* Description of the format of unorm.dat.
|
||||
|
@ -760,30 +760,33 @@ void collIterNormalize(collIterate *collationSource)
|
||||
UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */
|
||||
int32_t normLen;
|
||||
|
||||
normLen = unorm_decompose(&collationSource->writableBuffer, (int32_t *)&collationSource->writableBufSize,
|
||||
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
|
||||
srcP, (int32_t)(endP - srcP),
|
||||
FALSE, FALSE,
|
||||
u_growBufferFromStatic, collationSource->stackWritableBuffer,
|
||||
&status);
|
||||
if (U_FAILURE(status)) {
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status));
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if(status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
// reallocate and terminate
|
||||
if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
|
||||
&collationSource->writableBuffer,
|
||||
(int32_t *)&collationSource->writableBufSize, normLen + 1,
|
||||
normLen)
|
||||
0)
|
||||
) {
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, "collIterNormalize(), out of memory\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
collationSource->writableBuffer[normLen] = 0;
|
||||
status = U_ZERO_ERROR;
|
||||
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
|
||||
srcP, (int32_t)(endP - srcP),
|
||||
FALSE, FALSE,
|
||||
&status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
#ifdef UCOL_DEBUG
|
||||
fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status));
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
|
||||
@ -3343,11 +3346,24 @@ ucol_calcSortKey(const UCollator *coll,
|
||||
}
|
||||
|
||||
if(normMode != UNORM_NONE && UNORM_YES != unorm_quickCheck(source, len, normMode, status)) {
|
||||
len = unorm_internalNormalize(&normSource, &normSourceLen,
|
||||
len = unorm_internalNormalize(normSource, normSourceLen,
|
||||
source, len,
|
||||
normMode, FALSE,
|
||||
u_growBufferFromStatic, normBuffer,
|
||||
status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
normSourceLen = len;
|
||||
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
|
||||
if(normSource == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
*status = U_ZERO_ERROR;
|
||||
len = unorm_internalNormalize(normSource, normSourceLen,
|
||||
source, len,
|
||||
normMode, FALSE,
|
||||
status);
|
||||
}
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
@ -3835,11 +3851,24 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
||||
|
||||
/* If we need to normalize, we'll do it all at once at the beginning! */
|
||||
if(coll->normalizationMode != UCOL_OFF && UNORM_YES != unorm_quickCheck(source, len, UNORM_FCD, status)) {
|
||||
len = unorm_internalNormalize(&normSource, &normSourceLen,
|
||||
len = unorm_internalNormalize(normSource, normSourceLen,
|
||||
source, len,
|
||||
UNORM_FCD, FALSE,
|
||||
u_growBufferFromStatic, normBuffer,
|
||||
status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
normSourceLen = len;
|
||||
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
|
||||
if(normSource == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
*status = U_ZERO_ERROR;
|
||||
len = unorm_internalNormalize(normSource, normSourceLen,
|
||||
source, len,
|
||||
UNORM_FCD, FALSE,
|
||||
status);
|
||||
}
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
@ -4678,11 +4707,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
if (unorm_quickCheck(sColl->string, sLen, UNORM_NFD, &status) != UNORM_YES) {
|
||||
sLen = unorm_decompose(&sColl->writableBuffer, (int32_t *)&sColl->writableBufSize,
|
||||
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
|
||||
sBuf, sLen,
|
||||
FALSE, FALSE,
|
||||
u_growBufferFromStatic, sColl->stackWritableBuffer,
|
||||
&status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
if(!u_growBufferFromStatic(sColl->stackWritableBuffer,
|
||||
&sColl->writableBuffer,
|
||||
(int32_t *)&sColl->writableBufSize, sLen,
|
||||
0)
|
||||
) {
|
||||
return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
|
||||
sBuf, sLen,
|
||||
FALSE, FALSE,
|
||||
&status);
|
||||
}
|
||||
sBuf = sColl->writableBuffer;
|
||||
if (sBuf != sColl->stackWritableBuffer) {
|
||||
sColl->flags |= UCOL_ITER_ALLOCATED;
|
||||
@ -4691,11 +4733,24 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
if (unorm_quickCheck(tColl->string, tLen, UNORM_NFD, &status) != UNORM_YES) {
|
||||
tLen = unorm_decompose(&tColl->writableBuffer, (int32_t *)&tColl->writableBufSize,
|
||||
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
|
||||
tBuf, tLen,
|
||||
FALSE, FALSE,
|
||||
u_growBufferFromStatic, tColl->stackWritableBuffer,
|
||||
&status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
if(!u_growBufferFromStatic(tColl->stackWritableBuffer,
|
||||
&tColl->writableBuffer,
|
||||
(int32_t *)&tColl->writableBufSize, tLen,
|
||||
0)
|
||||
) {
|
||||
return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
|
||||
tBuf, tLen,
|
||||
FALSE, FALSE,
|
||||
&status);
|
||||
}
|
||||
tBuf = tColl->writableBuffer;
|
||||
if (tBuf != tColl->stackWritableBuffer) {
|
||||
tColl->flags |= UCOL_ITER_ALLOCATED;
|
||||
|
Loading…
Reference in New Issue
Block a user