ICU-1007 completely replace the old with the new implementation
X-SVN-Rev: 5614
This commit is contained in:
parent
bed1f92331
commit
974bfd3519
File diff suppressed because it is too large
Load Diff
@ -27,9 +27,6 @@
|
|||||||
#include "unicode/chariter.h"
|
#include "unicode/chariter.h"
|
||||||
#include "unicode/unorm.h"
|
#include "unicode/unorm.h"
|
||||||
|
|
||||||
/* forward declaration */
|
|
||||||
class ComposedCharIter;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
|
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
|
||||||
* decomposed form, allowing for easier sorting and searching of text.
|
* decomposed form, allowing for easier sorting and searching of text.
|
||||||
@ -246,6 +243,7 @@ class U_COMMON_API Normalizer
|
|||||||
* Unicode Normalization Forms</a>.
|
* Unicode Normalization Forms</a>.
|
||||||
* <p>
|
* <p>
|
||||||
* @see #setOption
|
* @see #setOption
|
||||||
|
* @deprecated To be removed (or moved to private for documentation) after 2002-aug-31. Obsolete option.
|
||||||
*/
|
*/
|
||||||
IGNORE_HANGUL = 0x001
|
IGNORE_HANGUL = 0x001
|
||||||
};
|
};
|
||||||
@ -500,14 +498,14 @@ class U_COMMON_API Normalizer
|
|||||||
UErrorCode& status);
|
UErrorCode& status);
|
||||||
|
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
// CharacterIterator overrides
|
// Iteration API
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the current character in the normalized text.
|
* Return the current character in the normalized text.
|
||||||
* @draft
|
* @draft
|
||||||
*/
|
*/
|
||||||
UChar32 current(void) const;
|
UChar32 current(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the first character in the normalized text. This resets
|
* Return the first character in the normalized text. This resets
|
||||||
@ -555,10 +553,12 @@ class U_COMMON_API Normalizer
|
|||||||
*
|
*
|
||||||
* @return the first normalized character that is the result of iterating
|
* @return the first normalized character that is the result of iterating
|
||||||
* forward starting at the given index.
|
* forward starting at the given index.
|
||||||
* @draft
|
* @deprecated To be removed after 2002-aug-31. Use setIndexOnly().
|
||||||
*/
|
*/
|
||||||
UChar32 setIndex(UTextOffset index);
|
UChar32 setIndex(UTextOffset index);
|
||||||
|
|
||||||
|
void setIndexOnly(UTextOffset index);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset the iterator so that it is in the same state that it was just after
|
* Reset the iterator so that it is in the same state that it was just after
|
||||||
* it was constructed. A subsequent call to <tt>next</tt> will return the first
|
* it was constructed. A subsequent call to <tt>next</tt> will return the first
|
||||||
@ -740,98 +740,28 @@ class U_COMMON_API Normalizer
|
|||||||
private:
|
private:
|
||||||
// Private utility methods for iteration
|
// Private utility methods for iteration
|
||||||
// For documentation, see the source code
|
// For documentation, see the source code
|
||||||
UChar nextCompose(void);
|
UBool nextNormalize();
|
||||||
UChar prevCompose(void);
|
UBool previousNormalize();
|
||||||
UChar nextDecomp(void);
|
|
||||||
UChar prevDecomp(void);
|
|
||||||
|
|
||||||
UChar curForward(void);
|
void init(CharacterIterator* iter, EMode mode, int32_t option);
|
||||||
UChar curBackward(void);
|
|
||||||
|
|
||||||
void init(CharacterIterator* iter,
|
|
||||||
EMode mode,
|
|
||||||
int32_t option);
|
|
||||||
void initBuffer(void);
|
|
||||||
void clearBuffer(void);
|
void clearBuffer(void);
|
||||||
|
|
||||||
// Utilities used by Compose
|
|
||||||
static void bubbleAppend(UnicodeString& target,
|
|
||||||
UChar ch,
|
|
||||||
uint32_t cclass);
|
|
||||||
static uint32_t getComposeClass(UChar ch);
|
|
||||||
static uint16_t composeLookup(UChar ch);
|
|
||||||
static uint16_t composeAction(uint16_t baseIndex,
|
|
||||||
uint16_t comIndex);
|
|
||||||
static void explode(UnicodeString& target,
|
|
||||||
uint16_t index);
|
|
||||||
static UChar pairExplode(UnicodeString& target,
|
|
||||||
uint16_t action);
|
|
||||||
|
|
||||||
// Utilities used by Decompose
|
|
||||||
static void fixCanonical(UnicodeString& result); // Reorders combining marks
|
|
||||||
static uint8_t getClass(UChar ch); // Gets char's combining class
|
|
||||||
|
|
||||||
// Other static utility methods
|
|
||||||
static void doAppend(const UChar source[],
|
|
||||||
uint16_t offset,
|
|
||||||
UnicodeString& dest);
|
|
||||||
static void doInsert(const UChar source[],
|
|
||||||
uint16_t offset,
|
|
||||||
UnicodeString& dest,
|
|
||||||
UTextOffset pos);
|
|
||||||
static uint16_t doReplace(const UChar source[],
|
|
||||||
uint16_t offset,
|
|
||||||
UnicodeString& dest,
|
|
||||||
UTextOffset pos);
|
|
||||||
|
|
||||||
static void hangulToJamo(UChar ch,
|
|
||||||
UnicodeString& result,
|
|
||||||
uint16_t decompLimit);
|
|
||||||
static void jamoAppend(UChar ch,
|
|
||||||
uint16_t decompLimit,
|
|
||||||
UnicodeString& dest);
|
|
||||||
static void jamoToHangul(UnicodeString& buffer,
|
|
||||||
UTextOffset start);
|
|
||||||
|
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
// Private data
|
// Private data
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
|
|
||||||
EMode fMode;
|
EMode fMode;
|
||||||
int32_t fOptions;
|
int32_t fOptions;
|
||||||
int16_t minDecomp;
|
|
||||||
|
|
||||||
// The input text and our position in it
|
// The input text and our position in it
|
||||||
CharacterIterator* text;
|
CharacterIterator* text;
|
||||||
|
// The next index (if >= 0) to set in text for next(), which is
|
||||||
|
// necessary to make current() and setIndex() work reasonably.
|
||||||
|
UTextOffset nextIndex;
|
||||||
|
|
||||||
// A buffer for holding intermediate results
|
// A buffer for holding intermediate results
|
||||||
UnicodeString buffer;
|
UnicodeString buffer;
|
||||||
UTextOffset bufferPos;
|
UTextOffset bufferPos;
|
||||||
UTextOffset bufferLimit;
|
|
||||||
UChar currentChar;
|
|
||||||
|
|
||||||
// Another buffer for use during iterative composition
|
|
||||||
UnicodeString explodeBuf;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
EMPTY = -1,
|
|
||||||
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
|
|
||||||
STR_LENGTH_MASK = 0x0003
|
|
||||||
};
|
|
||||||
|
|
||||||
enum {
|
|
||||||
HANGUL_BASE = 0xac00,
|
|
||||||
HANGUL_LIMIT = 0xd7a4,
|
|
||||||
JAMO_LBASE = 0x1100,
|
|
||||||
JAMO_VBASE = 0x1161,
|
|
||||||
JAMO_TBASE = 0x11a7,
|
|
||||||
JAMO_LCOUNT = 19,
|
|
||||||
JAMO_VCOUNT = 21,
|
|
||||||
JAMO_TCOUNT = 28,
|
|
||||||
JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT
|
|
||||||
};
|
|
||||||
|
|
||||||
friend class ComposedCharIter;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline UBool
|
inline UBool
|
||||||
|
Loading…
Reference in New Issue
Block a user