/* ******************************************************************************* * Copyright (C) 2016 and later: Unicode, Inc. and others. * License & terms of use: http://www.unicode.org/copyright.html ******************************************************************************* * collationsets.h * * created on: 2013feb09 * created by: Markus W. Scherer */ #ifndef __COLLATIONSETS_H__ #define __COLLATIONSETS_H__ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/uniset.h" #include "collation.h" U_NAMESPACE_BEGIN struct CollationData; /** * Finds the set of characters and strings that sort differently in the tailoring * from the base data. * * Every mapping in the tailoring needs to be compared to the base, * because some mappings are copied for optimization, and * all contractions for a character are copied if any contractions for that character * are added, modified or removed. * * It might be simpler to re-parse the rule string, but: * - That would require duplicating some of the from-rules builder code. * - That would make the runtime code depend on the builder. * - That would only work if we have the rule string, and we allow users to * omit the rule string from data files. */ class TailoredSet : public UMemory { public: TailoredSet(UnicodeSet *t) : data(NULL), baseData(NULL), tailored(t), suffix(NULL), errorCode(U_ZERO_ERROR) {} void forData(const CollationData *d, UErrorCode &errorCode); /** * @return U_SUCCESS(errorCode) in C++, void in Java * @internal only public for access by callback */ UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); private: void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); void compareContractions(UChar32 c, const UChar *p, const UChar *q); void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); void addContractions(UChar32 c, const UChar *p); void addSuffix(UChar32 c, const UnicodeString &sfx); void add(UChar32 c); /** Prefixes are reversed in the data structure. */ void setPrefix(const UnicodeString &pfx) { unreversedPrefix = pfx; unreversedPrefix.reverse(); } void resetPrefix() { unreversedPrefix.remove(); } const CollationData *data; const CollationData *baseData; UnicodeSet *tailored; UnicodeString unreversedPrefix; const UnicodeString *suffix; UErrorCode errorCode; }; class ContractionsAndExpansions : public UMemory { public: class CESink : public UMemory { public: virtual ~CESink(); virtual void handleCE(int64_t ce) = 0; virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; }; ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) : data(NULL), contractions(con), expansions(exp), sink(s), addPrefixes(prefixes), checkTailored(0), suffix(NULL), errorCode(U_ZERO_ERROR) {} void forData(const CollationData *d, UErrorCode &errorCode); void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); // all following: @internal, only public for access by callback void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); void addExpansions(UChar32 start, UChar32 end); void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); /** Prefixes are reversed in the data structure. */ void setPrefix(const UnicodeString &pfx) { unreversedPrefix = pfx; unreversedPrefix.reverse(); } void resetPrefix() { unreversedPrefix.remove(); } const CollationData *data; UnicodeSet *contractions; UnicodeSet *expansions; CESink *sink; UBool addPrefixes; int8_t checkTailored; // -1: collected tailored +1: exclude tailored UnicodeSet tailored; UnicodeSet ranges; UnicodeString unreversedPrefix; const UnicodeString *suffix; int64_t ces[Collation::MAX_EXPANSION_LENGTH]; UErrorCode errorCode; }; U_NAMESPACE_END #endif // !UCONFIG_NO_COLLATION #endif // __COLLATIONSETS_H__