ICU-1107 redesign/fix Normalizer iteration index movement

X-SVN-Rev: 5731
This commit is contained in:
Markus Scherer 2001-09-11 01:29:36 +00:00
parent 6be4499096
commit b15ef62e32
2 changed files with 48 additions and 59 deletions

View File

@ -21,12 +21,12 @@
* fix the very broken iteration API
*/
#include "unicode/normlzr.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "unormimp.h"
//-------------------------------------------------------------------------
@ -36,7 +36,8 @@
Normalizer::Normalizer(const UnicodeString& str,
EMode mode) :
fMode(mode), fOptions(0),
text(new StringCharacterIterator(str)), nextIndex(-1),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
@ -46,7 +47,8 @@ Normalizer::Normalizer(const UnicodeString& str,
EMode mode,
int32_t options) :
fMode(mode), fOptions(options),
text(new StringCharacterIterator(str)), nextIndex(-1),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
@ -54,7 +56,8 @@ Normalizer::Normalizer(const UnicodeString& str,
Normalizer::Normalizer(const UChar *str, int32_t length, EMode mode) :
fMode(mode), fOptions(0),
text(new UCharCharacterIterator(str, length)), nextIndex(-1),
text(new UCharCharacterIterator(str, length)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
@ -63,7 +66,8 @@ Normalizer::Normalizer(const UChar *str, int32_t length, EMode mode) :
Normalizer::Normalizer(const CharacterIterator& iter,
EMode mode) :
fMode(mode), fOptions(0),
text(iter.clone()), nextIndex(-1),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
@ -73,7 +77,8 @@ Normalizer::Normalizer(const CharacterIterator& iter,
EMode mode,
int32_t options) :
fMode(mode), fOptions(options),
text(iter.clone()), nextIndex(-1),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
checkData();
@ -81,7 +86,8 @@ Normalizer::Normalizer(const CharacterIterator& iter,
Normalizer::Normalizer(const Normalizer &copy) :
fMode(copy.fMode), fOptions(copy.fOptions),
text(copy.text->clone()), nextIndex(copy.nextIndex),
text(copy.text->clone()),
currentIndex(copy.nextIndex), nextIndex(copy.nextIndex),
buffer(copy.buffer), bufferPos(copy.bufferPos)
{
checkData();
@ -118,7 +124,7 @@ Normalizer::clone() const
*/
int32_t Normalizer::hashCode() const
{
return text->hashCode() + fMode + fOptions + buffer.hashCode() + bufferPos + nextIndex;
return text->hashCode() + fMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
UBool Normalizer::operator==(const Normalizer& that) const
@ -223,27 +229,10 @@ Normalizer::decompose(const UnicodeString& source,
* Return the current character in the normalized text.
*/
UChar32 Normalizer::current() {
if(bufferPos<buffer.length()) {
if(bufferPos<buffer.length() || nextNormalize()) {
return buffer.char32At(bufferPos);
} else {
/*
* Normalize from the current index,
* return the first character from there, and
* reset the character iterator to the original index.
* Set nextIndex to where the iterator stopped so
* that next() can later continue from there.
*/
UTextOffset currentIndex=text->getIndex();
UChar32 c;
if(nextNormalize()) {
c=buffer.char32At(bufferPos);
nextIndex=text->getIndex();
} else {
c=DONE;
}
text->setIndex(currentIndex);
return c;
return DONE;
}
}
@ -253,30 +242,12 @@ UChar32 Normalizer::current() {
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::next() {
UChar32 c;
if(bufferPos<buffer.length()) {
c=buffer.char32At(bufferPos);
if(bufferPos<buffer.length() || nextNormalize()) {
UChar32 c=buffer.char32At(bufferPos);
bufferPos+=UTF_CHAR_LENGTH(c);
return c;
} else {
/*
* If the buffer (which is now exhausted) was normalized
* during current() or setIndex() then the character iterator
* must be set to behind what was normalized then
* in order to continue with the following text.
* That "position behind what was normalized" is nextIndex.
*/
if(nextIndex>=0) {
text->setIndex(nextIndex);
}
if(text->hasNext() && nextNormalize()) {
c=buffer.char32At(bufferPos);
bufferPos+=UTF_CHAR_LENGTH(c);
return c;
} else {
return DONE;
}
return DONE;
}
}
@ -286,10 +257,8 @@ UChar32 Normalizer::next() {
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::previous() {
UChar32 c;
if(bufferPos>0 || text->hasPrevious() && previousNormalize()) {
c=buffer.char32At(bufferPos-1);
if(bufferPos>0 || previousNormalize()) {
UChar32 c=buffer.char32At(bufferPos-1);
bufferPos-=UTF_CHAR_LENGTH(c);
return c;
} else {
@ -299,12 +268,14 @@ UChar32 Normalizer::previous() {
void Normalizer::reset() {
text->setToStart();
currentIndex=nextIndex=text->getIndex();
clearBuffer();
}
void
Normalizer::setIndexOnly(UTextOffset index) {
text->setIndex(index);
currentIndex=nextIndex=text->getIndex(); // validates index
clearBuffer();
}
@ -337,8 +308,7 @@ UChar32 Normalizer::setIndex(UTextOffset index) {
* the <tt>Normalizer's</tt> position to the beginning of the text.
*/
UChar32 Normalizer::first() {
text->setToStart();
clearBuffer();
reset();
return next();
}
@ -349,6 +319,7 @@ UChar32 Normalizer::first() {
*/
UChar32 Normalizer::last() {
text->setToEnd();
currentIndex=nextIndex=text->getIndex();
clearBuffer();
return previous();
}
@ -368,7 +339,11 @@ UChar32 Normalizer::last() {
*
*/
UTextOffset Normalizer::getIndex() const {
return text->getIndex();
if(bufferPos<buffer.length()) {
return currentIndex;
} else {
return nextIndex;
}
}
/**
@ -497,7 +472,6 @@ Normalizer::getText(UnicodeString& result)
//-------------------------------------------------------------------------
void Normalizer::clearBuffer() {
nextIndex=-1;
buffer.remove();
bufferPos=0;
}
@ -507,11 +481,18 @@ Normalizer::nextNormalize() {
UErrorCode errorCode=U_ZERO_ERROR;
clearBuffer();
currentIndex=nextIndex;
text->setIndex(nextIndex);
if(!text->hasNext()) {
return FALSE;
}
buffer.fLength=unorm_nextNormalize(buffer.fArray, buffer.fCapacity, *text,
getUNormalizationMode(fMode, errorCode),
(fOptions&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &buffer,
&errorCode);
nextIndex=text->getIndex();
return U_SUCCESS(errorCode) && buffer.length()>0;
}
@ -520,11 +501,18 @@ Normalizer::previousNormalize() {
UErrorCode errorCode=U_ZERO_ERROR;
clearBuffer();
nextIndex=currentIndex;
text->setIndex(currentIndex);
if(!text->hasPrevious()) {
return FALSE;
}
buffer.fLength=unorm_previousNormalize(buffer.fArray, buffer.fCapacity, *text,
getUNormalizationMode(fMode, errorCode),
(fOptions&IGNORE_HANGUL)!=0,
UnicodeString::growBuffer, &buffer,
&errorCode);
bufferPos=buffer.length();
currentIndex=text->getIndex();
return U_SUCCESS(errorCode) && buffer.length()>0;
}

View File

@ -755,9 +755,10 @@ private:
// The input text and our position in it
CharacterIterator* text;
// The next index (if >= 0) to set in text for next(), which is
// necessary to make current() and setIndex() work reasonably.
UTextOffset nextIndex;
// The normalization buffer is the result of normalization
// of the source in [currentIndex..nextIndex[ .
UTextOffset currentIndex, nextIndex;
// A buffer for holding intermediate results
UnicodeString buffer;