/****************************************************************************** * Copyright © {1996-1999}, International Business Machines Corporation and others. All Rights Reserved. ****************************************************************************** */ //============================================================================= // // File mergecol.h // // // // Created by: Helena Shih // // Modification History: // // Date Name Description // 3/5/97 mark Cleaned up fixEntry(). Added constants BYTEPOWER // and BYTEMASK to replace BYTESIZE. // 6/17/97 helena In getPattern, added the queue-up list for entries // with the same extension chars. // 8/18/97 helena Added internal API documentation. // 8/13/98 erm Synched up with 1.2 version of MergeCollation.java // 04/23/99 stephen Removed EDecompositionMode, merged with // Normalizer::EMode //============================================================================= #ifndef MERGECOL_H #define MERGECOL_H #include "unicode/unistr.h" #include "ptnentry.h" #include "tables.h" #include "unicode/coll.h" #include "unicode/normlzr.h" /** * Utility class for normalizing and merging patterns for collation. * Patterns are strings of the form *, where has the * form: *
 *  := *
 *  := {"/"}
 *  := "=", ",", ";", "<", "&"
 * , and  are both arbitrary strings.
 * 
*

Unquoted whitespaces are ignored. * 'xxx' can be used to quote characters. *

* One difference from Collation is that & is used to reset to a current * point. Or, in other words, it introduces a new sequence which is to * be added to the old. *

* That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR * "a < b < d & b < c" * XXX: make '' be a single quote. * @see PatternEntry * @version 1.4 1/7/97 * @author Mark Davis, Helena Shih */ class MergeCollation { public: /** * Creates a merged collation table from a pattern string. * @param pattern the pattern string. * @param status the error code status. If the input pattern is incorrect, * this will be set to U_INVALID_FORMAT_ERROR. */ MergeCollation( const UnicodeString& pattern, Normalizer::EMode decompMode, UErrorCode& success); /** * Copy constructor. */ MergeCollation( const MergeCollation& other); /** * Destructor. */ ~MergeCollation(); /** Assignment operator */ const MergeCollation& operator=(const MergeCollation& other); /** * Recovers current pattern from this merged collation object. * @param pattern the result buffer. * @return the recovered result. */ UnicodeString& getPattern(UnicodeString& pattern) const; /** * Recovers current pattern with white spaces. * @param pattern the result buffer. * @param withWhiteSpace puts spacing around the entries, and \n * before & and < * @return the recovered result. */ UnicodeString& getPattern(UnicodeString& pattern, UBool withWhiteSpace) const; /** * Emits the pattern for collation builder. * @param pattern the result buffer. * @return Emits the string in the format understable to the collation * builder. */ UnicodeString& emitPattern(UnicodeString& pattern) const; /** * Emits the pattern for collation builder. * @param pattern the result buffer. * @param withWhiteSpace puts spacing around the entries, and \n * before & and < * @return Emits the string in the format understable to the collation * builder. */ UnicodeString& emitPattern(UnicodeString& pattern, UBool withWhiteSpace) const; /** * Sets the pattern. * @param pattern string. * @param status the error code status, it will be set to U_INVALID_FORMAT_ERROR * if the pattern is incorrect. */ void setPattern(const UnicodeString& pattern, Normalizer::EMode decompMode, UErrorCode& status); /** * Adds a pattern to the current merge collation object. * @param pattern the new pattern to be added. * @param status the error code status, it will be set to U_INVALID_FORMAT_ERROR * if the pattern is incorrect. */ void addPattern(const UnicodeString& pattern, Normalizer::EMode decompMode, UErrorCode& status); /** * Gets count of separate entries in the merge collation object. * @return the number of pattern entries */ int32_t getCount(void) const; /** * Gets the specified pattern entry out of the merge collation object. * @param index the offset of the desired pattern entry * @return the requested pattern entry */ const PatternEntry* getItemAt(UTextOffset index) const; private: //============================================================ // privates //============================================================ VectorOfPointersToPatternEntry* patterns; // a vector of PatternEntries static const int32_t BITARRAYSIZE; static const uint8_t BITARRAYMASK; static const int32_t BYTEPOWER; static const int32_t BYTEMASK; PatternEntry* lastEntry; PatternEntry* saveEntry; uint8_t* statusArray; /** * Finds the last pattern entry before the specified offset that does not have * extension chars. * @param i the offset. * @return the pattern entry. */ const PatternEntry* findLastWithNoExtension(int32_t i) const; /** * Fixes the new pattern entry in the merge collation table. * If the strength is RESET, then just change the lastEntry to * be the current. (If the current is not in patterns, signal an error). * If not, then remove the current entry, and add it after lastEntry * (which is usually at the end). Strength indicates the text order * weight for an entry. * @param newEntry the new pattern entry * @param status the error code status, it will be set to U_INVALID_FORMAT_ERROR * if the strength is RESET and a previous entry can't be found. */ void fixEntry( PatternEntry* newEntry, UErrorCode& status); /** * Finds the offset of the specified entry that was previously installed in the * merge collation object. * @param lastEntry the entry that was previously installed. * @param excess the extra characters * @param status the error code status, it will be set to U_INVALID_FORMAT_ERROR * if the strength is RESET and a previous entry can't be found. * @return the offset of the found entry */ int32_t findLastEntry( const PatternEntry* lastEntry, UnicodeString& excess, UErrorCode& success) const; }; inline UnicodeString& MergeCollation::getPattern(UnicodeString& result) const { return getPattern(result, TRUE); } inline UnicodeString& MergeCollation::emitPattern(UnicodeString& result) const { return emitPattern(result, TRUE); } #endif // _MERGECOL