/*
* Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
**********************************************************************
*/
#ifndef RBT_SET_H
#define RBT_SET_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utrans.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
class Replaceable;
class TransliterationRule;
class TransliterationRuleData;
class UnicodeFilter;
class UnicodeString;
class UnicodeSet;
/**
* A set of rules for a RuleBasedTransliterator
.
* @author Alan Liu
*/
class U_I18N_API TransliterationRuleSet : public UObject {
/**
* Vector of rules, in the order added. This is used while the
* rule set is getting built. After that, freeze() reorders and
* indexes the rules into rules[]. Any given rule is stored once
* in ruleVector, and one or more times in rules[]. ruleVector
* owns and deletes the rules.
*/
UVector* ruleVector;
/**
* Sorted and indexed table of rules. This is created by freeze()
* from the rules in ruleVector. It contains alias pointers to
* the rules in ruleVector. It is zero before freeze() is called
* and non-zero thereafter.
*/
TransliterationRule** rules;
/**
* Index table. For text having a first character c, compute x = c&0xFF.
* Now use rules[index[x]..index[x+1]-1]. This index table is created by
* freeze(). Before freeze() is called it contains garbage.
*/
int32_t index[257];
/**
* Length of the longest preceding context
*/
int32_t maxContextLength;
public:
/**
* Construct a new empty rule set.
* @param status Output parameter filled in with success or failure status.
*/
TransliterationRuleSet(UErrorCode& status);
/**
* Copy constructor.
*/
TransliterationRuleSet(const TransliterationRuleSet&);
/**
* Destructor.
*/
virtual ~TransliterationRuleSet();
/**
* Change the data object that this rule belongs to. Used
* internally by the TransliterationRuleData copy constructor.
* @param data the new data value to be set.
*/
void setData(const TransliterationRuleData* data);
/**
* Return the maximum context length.
* @return the length of the longest preceding context.
*/
virtual int32_t getMaximumContextLength(void) const;
/**
* Add a rule to this set. Rules are added in order, and order is
* significant. The last call to this method must be followed by
* a call to freeze()
before the rule set is used.
* This method must not be called after freeze() has been
* called.
*
* @param adoptedRule the rule to add
*/
virtual void addRule(TransliterationRule* adoptedRule,
UErrorCode& status);
/**
* Check this for masked rules and index it to optimize performance.
* The sequence of operations is: (1) add rules to a set using
* addRule()
; (2) freeze the set using
* freeze()
; (3) use the rule set. If
* addRule()
is called after calling this method, it
* invalidates this object, and this method must be called again.
* That is, freeze()
may be called multiple times,
* although for optimal performance it shouldn't be.
* @param parseError A pointer to UParseError to receive information about errors
* occurred.
* @param status Output parameter filled in with success or failure status.
*/
virtual void freeze(UParseError& parseError, UErrorCode& status);
/**
* Transliterate the given text with the given UTransPosition
* indices. Return TRUE if the transliteration should continue
* or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
* Note that FALSE is only ever returned if isIncremental is TRUE.
* @param text the text to be transliterated
* @param index the position indices, which will be updated
* @param isIncremental if TRUE, assume new text may be inserted
* at index.limit, and return FALSE if thre is a partial match.
* @return TRUE unless a U_PARTIAL_MATCH has been obtained,
* indicating that transliteration should stop until more text
* arrives.
*/
UBool transliterate(Replaceable& text,
UTransPosition& index,
UBool isIncremental);
/**
* Create rule strings that represents this rule set.
* @param result string to receive the rule strings. Current
* contents will be deleted.
* @param escapeUnprintable True, will escape the unprintable characters
* @return A reference to 'result'.
*/
virtual UnicodeString& toRules(UnicodeString& result,
UBool escapeUnprintable) const;
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
* @draft ICU 2.2
*/
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
* @draft ICU 2.2
*/
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
/**
* Return the set of all characters that may be modified
* (getTarget=false) or emitted (getTarget=true) by this set.
*/
UnicodeSet& getSourceTargetSet(UnicodeSet& result,
UBool getTarget) const;
private:
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
static const char fgClassID;
};
U_NAMESPACE_END
#endif