ICU-1007 completely replace the old with the new implementation

X-SVN-Rev: 5614
This commit is contained in:
Markus Scherer 2001-08-29 23:57:15 +00:00
parent bed1f92331
commit 974bfd3519
2 changed files with 222 additions and 937 deletions

File diff suppressed because it is too large Load Diff

View File

@ -27,9 +27,6 @@
#include "unicode/chariter.h" #include "unicode/chariter.h"
#include "unicode/unorm.h" #include "unicode/unorm.h"
/* forward declaration */
class ComposedCharIter;
/** /**
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or * <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
* decomposed form, allowing for easier sorting and searching of text. * decomposed form, allowing for easier sorting and searching of text.
@ -246,6 +243,7 @@ class U_COMMON_API Normalizer
* Unicode Normalization Forms</a>. * Unicode Normalization Forms</a>.
* <p> * <p>
* @see #setOption * @see #setOption
* @deprecated To be removed (or moved to private for documentation) after 2002-aug-31. Obsolete option.
*/ */
IGNORE_HANGUL = 0x001 IGNORE_HANGUL = 0x001
}; };
@ -500,14 +498,14 @@ class U_COMMON_API Normalizer
UErrorCode& status); UErrorCode& status);
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
// CharacterIterator overrides // Iteration API
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
/** /**
* Return the current character in the normalized text. * Return the current character in the normalized text.
* @draft * @draft
*/ */
UChar32 current(void) const; UChar32 current(void);
/** /**
* Return the first character in the normalized text. This resets * Return the first character in the normalized text. This resets
@ -555,10 +553,12 @@ class U_COMMON_API Normalizer
* *
* @return the first normalized character that is the result of iterating * @return the first normalized character that is the result of iterating
* forward starting at the given index. * forward starting at the given index.
* @draft * @deprecated To be removed after 2002-aug-31. Use setIndexOnly().
*/ */
UChar32 setIndex(UTextOffset index); UChar32 setIndex(UTextOffset index);
void setIndexOnly(UTextOffset index);
/** /**
* Reset the iterator so that it is in the same state that it was just after * Reset the iterator so that it is in the same state that it was just after
* it was constructed. A subsequent call to <tt>next</tt> will return the first * it was constructed. A subsequent call to <tt>next</tt> will return the first
@ -740,98 +740,28 @@ class U_COMMON_API Normalizer
private: private:
// Private utility methods for iteration // Private utility methods for iteration
// For documentation, see the source code // For documentation, see the source code
UChar nextCompose(void); UBool nextNormalize();
UChar prevCompose(void); UBool previousNormalize();
UChar nextDecomp(void);
UChar prevDecomp(void);
UChar curForward(void); void init(CharacterIterator* iter, EMode mode, int32_t option);
UChar curBackward(void);
void init(CharacterIterator* iter,
EMode mode,
int32_t option);
void initBuffer(void);
void clearBuffer(void); void clearBuffer(void);
// Utilities used by Compose
static void bubbleAppend(UnicodeString& target,
UChar ch,
uint32_t cclass);
static uint32_t getComposeClass(UChar ch);
static uint16_t composeLookup(UChar ch);
static uint16_t composeAction(uint16_t baseIndex,
uint16_t comIndex);
static void explode(UnicodeString& target,
uint16_t index);
static UChar pairExplode(UnicodeString& target,
uint16_t action);
// Utilities used by Decompose
static void fixCanonical(UnicodeString& result); // Reorders combining marks
static uint8_t getClass(UChar ch); // Gets char's combining class
// Other static utility methods
static void doAppend(const UChar source[],
uint16_t offset,
UnicodeString& dest);
static void doInsert(const UChar source[],
uint16_t offset,
UnicodeString& dest,
UTextOffset pos);
static uint16_t doReplace(const UChar source[],
uint16_t offset,
UnicodeString& dest,
UTextOffset pos);
static void hangulToJamo(UChar ch,
UnicodeString& result,
uint16_t decompLimit);
static void jamoAppend(UChar ch,
uint16_t decompLimit,
UnicodeString& dest);
static void jamoToHangul(UnicodeString& buffer,
UTextOffset start);
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
// Private data // Private data
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
EMode fMode; EMode fMode;
int32_t fOptions; int32_t fOptions;
int16_t minDecomp;
// The input text and our position in it // The input text and our position in it
CharacterIterator* text; CharacterIterator* text;
// The next index (if >= 0) to set in text for next(), which is
// necessary to make current() and setIndex() work reasonably.
UTextOffset nextIndex;
// A buffer for holding intermediate results // A buffer for holding intermediate results
UnicodeString buffer; UnicodeString buffer;
UTextOffset bufferPos; UTextOffset bufferPos;
UTextOffset bufferLimit;
UChar currentChar;
// Another buffer for use during iterative composition
UnicodeString explodeBuf;
enum {
EMPTY = -1,
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
STR_LENGTH_MASK = 0x0003
};
enum {
HANGUL_BASE = 0xac00,
HANGUL_LIMIT = 0xd7a4,
JAMO_LBASE = 0x1100,
JAMO_VBASE = 0x1161,
JAMO_TBASE = 0x11a7,
JAMO_LCOUNT = 19,
JAMO_VCOUNT = 21,
JAMO_TCOUNT = 28,
JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT
};
friend class ComposedCharIter;
}; };
inline UBool inline UBool