/* * Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/17/99 aliu Creation. ********************************************************************** */ #ifndef RBT_SET_H #define RBT_SET_H #include "uvector.h" #include "unicode/utrans.h" class Replaceable; class TransliterationRule; class TransliterationRuleData; class UnicodeFilter; class UnicodeString; /** * A set of rules for a RuleBasedTransliterator. This set encodes * the transliteration in one direction from one set of characters or short * strings to another. A RuleBasedTransliterator consists of up to * two such sets, one for the forward direction, and one for the reverse. * *

A TransliterationRuleSet has one important operation, that of * finding a matching rule at a given point in the text. This is accomplished * by the findMatch() method. * * @author Alan Liu */ class TransliterationRuleSet { /** * Vector of rules, in the order added. This is only used while the rule * set is getting built. After that, freeze() reorders and indexes the * rules into rules[]. However, the vector is kept until destruction. */ UVector* ruleVector; /** * Length of the longest preceding context */ int32_t maxContextLength; /** * Sorted and indexed table of rules. This is created by freeze() from * the rules in ruleVector. */ TransliterationRule** rules; /** * Index table. For text having a first character c, compute x = c&0xFF. * Now use rules[index[x]..index[x+1]-1]. This index table is created by * freeze(). */ int32_t index[257]; public: /** * Construct a new empty rule set. */ TransliterationRuleSet(); /** * Copy constructor. */ TransliterationRuleSet(const TransliterationRuleSet&); /** * Destructor. */ virtual ~TransliterationRuleSet(); /** * Return the maximum context length. * @return the length of the longest preceding context. */ virtual int32_t getMaximumContextLength(void) const; /** * Add a rule to this set. Rules are added in order, and order is * significant. The last call to this method must be followed by * a call to freeze() before the rule set is used. * * @param adoptedRule the rule to add */ virtual void addRule(TransliterationRule* adoptedRule, UErrorCode& status); /** * Check this for masked rules and index it to optimize performance. * The sequence of operations is: (1) add rules to a set using * addRule(); (2) freeze the set using * freeze(); (3) use the rule set. If * addRule() is called after calling this method, it * invalidates this object, and this method must be called again. * That is, freeze() may be called multiple times, * although for optimal performance it shouldn't be. */ virtual void freeze(const TransliterationRuleData& data, UErrorCode& status); /** * Attempt to find a matching rule at the specified point in the text. * @param text the text, both translated and untranslated * @param start the beginning index, inclusive; 0 <= start * <= limit. * @param limit the ending index, exclusive; start <= limit * <= text.length(). * @param cursor position at which to translate next, representing offset * into text. This value must be between start and * limit. * @param data a dictionary mapping variables to the sets they * represent (maps Character to UnicodeSet) * @param filter the filter. Any character for which * filter.isIn() returns false will not be * altered by this transliterator. If filter is * null then no filtering is applied. * @return the matching rule, or null if none found. */ virtual TransliterationRule* findMatch(const Replaceable& text, const UTransPosition& pos, const TransliterationRuleData& data, const UnicodeFilter* filter) const; /** * Attempt to find a matching rule at the specified point in the text. * Unlike findMatch(), this method does an incremental match. * An incremental match requires that there be no partial matches that might * pre-empt the full match that is found. If there are partial matches, * then null is returned. A non-null result indicates that a full match has * been found, and that it cannot be pre-empted by a partial match * regardless of what additional text is added to the translation buffer. * @param text the text, both translated and untranslated * @param start the beginning index, inclusive; 0 <= start * <= limit. * @param limit the ending index, exclusive; start <= limit * <= text.length(). * @param cursor position at which to translate next, representing offset * into text. This value must be between start and * limit. * @param data a dictionary mapping variables to the sets they * represent (maps Character to UnicodeSet) * @param partial output parameter. partial[0] is set to * true if a partial match is returned. * @param filter the filter. Any character for which * filter.isIn() returns false will not be * altered by this transliterator. If filter is * null then no filtering is applied. * @return the matching rule, or null if none found, or if the text buffer * does not have enough text yet to unambiguously match a rule. */ virtual TransliterationRule* findIncrementalMatch(const Replaceable& text, const UTransPosition& pos, const TransliterationRuleData& data, UBool& isPartial, const UnicodeFilter* filter) const; }; #endif