scuffed-code/icu4c/source/i18n/rbt_set.h

/*
* Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef RBT_SET_H
#define RBT_SET_H

#include "uvector.h"
#include "unicode/utrans.h"

class Replaceable;
class TransliterationRule;
class TransliterationRuleData;
class UnicodeFilter;
class UnicodeString;

/**
 * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
 * the transliteration in one direction from one set of characters or short
 * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
 * two such sets, one for the forward direction, and one for the reverse.
 *
 * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
 * finding a matching rule at a given point in the text.  This is accomplished
 * by the <code>findMatch()</code> method.
 *
 * @author Alan Liu
 */
class TransliterationRuleSet {
    /**
     * Vector of rules, in the order added.  This is only used while the rule
     * set is getting built.  After that, freeze() reorders and indexes the
     * rules into rules[].  However, the vector is kept until destruction.
     */
    UVector* ruleVector;

    /**
     * Length of the longest preceding context
     */
    int32_t maxContextLength;

    /**
     * Sorted and indexed table of rules.  This is created by freeze() from
     * the rules in ruleVector.
     */
    TransliterationRule** rules;

    /**
     * Index table.  For text having a first character c, compute x = c&0xFF.
     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     * freeze().
     */
    int32_t index[257];

public:

    /**
     * Construct a new empty rule set.
     */
    TransliterationRuleSet();

    /**
     * Copy constructor.
     */
    TransliterationRuleSet(const TransliterationRuleSet&);

    /**
     * Destructor.
     */
    virtual ~TransliterationRuleSet();

    /**
     * Return the maximum context length.
     * @return the length of the longest preceding context.
     */
    virtual int32_t getMaximumContextLength(void) const;

    /**
     * Add a rule to this set.  Rules are added in order, and order is
     * significant.  The last call to this method must be followed by
     * a call to <code>freeze()</code> before the rule set is used.
     *
     * @param adoptedRule the rule to add
     */
    virtual void addRule(TransliterationRule* adoptedRule,
                         UErrorCode& status);

    /**
     * Check this for masked rules and index it to optimize performance.
     * The sequence of operations is: (1) add rules to a set using
     * <code>addRule()</code>; (2) freeze the set using
     * <code>freeze()</code>; (3) use the rule set.  If
     * <code>addRule()</code> is called after calling this method, it
     * invalidates this object, and this method must be called again.
     * That is, <code>freeze()</code> may be called multiple times,
     * although for optimal performance it shouldn't be.
     */
    virtual void freeze(const TransliterationRuleData& data,
                        UErrorCode& status);

    /**
     * Attempt to find a matching rule at the specified point in the text.
     * @param text the text, both translated and untranslated
     * @param start the beginning index, inclusive; <code>0 <= start
     * <= limit</code>.
     * @param limit the ending index, exclusive; <code>start <= limit
     * <= text.length()</code>.
     * @param cursor position at which to translate next, representing offset
     * into text.  This value must be between <code>start</code> and
     * <code>limit</code>.
     * @param data a dictionary mapping variables to the sets they
     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
     * @param filter the filter.  Any character for which
     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     * @return the matching rule, or null if none found.
     */
    virtual TransliterationRule* findMatch(const Replaceable& text,
                                           const UTransPosition& pos,
                                           const TransliterationRuleData& data,
                                           const UnicodeFilter* filter) const;

    /**
     * Attempt to find a matching rule at the specified point in the text.
     * Unlike <code>findMatch()</code>, this method does an incremental match.
     * An incremental match requires that there be no partial matches that might
     * pre-empt the full match that is found.  If there are partial matches,
     * then null is returned.  A non-null result indicates that a full match has
     * been found, and that it cannot be pre-empted by a partial match
     * regardless of what additional text is added to the translation buffer.
     * @param text the text, both translated and untranslated
     * @param start the beginning index, inclusive; <code>0 <= start
     * <= limit</code>.
     * @param limit the ending index, exclusive; <code>start <= limit
     * <= text.length()</code>.
     * @param cursor position at which to translate next, representing offset
     * into text.  This value must be between <code>start</code> and
     * <code>limit</code>.
     * @param data a dictionary mapping variables to the sets they
     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
     * @param partial output parameter.  <code>partial[0]</code> is set to
     * true if a partial match is returned.
     * @param filter the filter.  Any character for which
     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     * @return the matching rule, or null if none found, or if the text buffer
     * does not have enough text yet to unambiguously match a rule.
     */
    virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
                                              const UTransPosition& pos,
                                              const TransliterationRuleData& data,
                                              UBool& isPartial,
                                              const UnicodeFilter* filter) const;

    /**
     * Create rule strings that represents this rule set.
     * @param result string to receive the rule strings.  Current
     * contents will be deleted.
     */
    virtual UnicodeString& toRules(UnicodeString& result,
                                   const TransliterationRuleData& data,
                                   UBool escapeUnprintable) const;
};
#endif
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/*`
ICU-903 updated copyright notices. X-SVN-Rev: 4249 2001-03-22 00:09:10 +00:00			`* Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`**********************************************************************`
			`* Date Name Description`
			`* 11/17/99 aliu Creation.`
			`**********************************************************************`
			`*/`
			`#ifndef RBT_SET_H`
			`#define RBT_SET_H`

			`#include "uvector.h"`
ICU-474 fix UTransPosition handling X-SVN-Rev: 1688 2000-06-29 00:18:43 +00:00			`#include "unicode/utrans.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`class Replaceable;`
			`class TransliterationRule;`
			`class TransliterationRuleData;`
			`class UnicodeFilter;`
			`class UnicodeString;`

			`/**`
			`* A set of rules for a <code>RuleBasedTransliterator</code>. This set encodes`
			`* the transliteration in one direction from one set of characters or short`
			`* strings to another. A <code>RuleBasedTransliterator</code> consists of up to`
			`* two such sets, one for the forward direction, and one for the reverse.`
			`*`
			`* <p>A <code>TransliterationRuleSet</code> has one important operation, that of`
			`* finding a matching rule at a given point in the text. This is accomplished`
			`* by the <code>findMatch()</code> method.`
			`*`
			`* @author Alan Liu`
			`*/`
			`class TransliterationRuleSet {`
			`/**`
ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`* Vector of rules, in the order added. This is only used while the rule`
			`* set is getting built. After that, freeze() reorders and indexes the`
ICU-432 leak fix cleanup X-SVN-Rev: 1840 2000-07-13 00:40:31 +00:00			`* rules into rules[]. However, the vector is kept until destruction.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*/`
ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`UVector* ruleVector;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Length of the longest preceding context`
			`*/`
			`int32_t maxContextLength;`

ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`/**`
			`* Sorted and indexed table of rules. This is created by freeze() from`
			`* the rules in ruleVector.`
			`*/`
			`TransliterationRule** rules;`

			`/**`
			`* Index table. For text having a first character c, compute x = c&0xFF.`
			`* Now use rules[index[x]..index[x+1]-1]. This index table is created by`
			`* freeze().`
			`*/`
			`int32_t index[257];`

ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`public:`

			`/**`
			`* Construct a new empty rule set.`
			`*/`
			`TransliterationRuleSet();`

ICU-476 fix RBT et al copy constructor X-SVN-Rev: 1727 2000-06-30 23:26:07 +00:00			`/**`
			`* Copy constructor.`
			`*/`
ICU-432 make copy ct canonical to fix mem leak X-SVN-Rev: 1787 2000-07-11 18:45:49 +00:00			`TransliterationRuleSet(const TransliterationRuleSet&);`
ICU-476 fix RBT et al copy constructor X-SVN-Rev: 1727 2000-06-30 23:26:07 +00:00
ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`/**`
			`* Destructor.`
			`*/`
			`virtual ~TransliterationRuleSet();`

ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/**`
			`* Return the maximum context length.`
			`* @return the length of the longest preceding context.`
			`*/`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`virtual int32_t getMaximumContextLength(void) const;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Add a rule to this set. Rules are added in order, and order is`
ICU-432 leak fix cleanup X-SVN-Rev: 1840 2000-07-13 00:40:31 +00:00			`* significant. The last call to this method must be followed by`
			`* a call to <code>freeze()</code> before the rule set is used.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*`
ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`* @param adoptedRule the rule to add`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*/`
			`virtual void addRule(TransliterationRule* adoptedRule,`
			`UErrorCode& status);`

			`/**`
ICU-432 leak fix cleanup X-SVN-Rev: 1840 2000-07-13 00:40:31 +00:00			`* Check this for masked rules and index it to optimize performance.`
			`* The sequence of operations is: (1) add rules to a set using`
			`* <code>addRule()</code>; (2) freeze the set using`
			`* <code>freeze()</code>; (3) use the rule set. If`
			`* <code>addRule()</code> is called after calling this method, it`
			`* invalidates this object, and this method must be called again.`
			`* That is, <code>freeze()</code> may be called multiple times,`
			`* although for optimal performance it shouldn't be.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*/`
ICU-199 new rule syntax; performance improvement; update rules X-SVN-Rev: 559 2000-01-13 07:28:08 +00:00			`virtual void freeze(const TransliterationRuleData& data,`
			`UErrorCode& status);`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Attempt to find a matching rule at the specified point in the text.`
			`* @param text the text, both translated and untranslated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @param cursor position at which to translate next, representing offset`
			`* into text. This value must be between <code>start</code> and`
			`* <code>limit</code>.`
			`* @param data a dictionary mapping variables to the sets they`
			`* represent (maps <code>Character</code> to <code>UnicodeSet</code>)`
			`* @param filter the filter. Any character for which`
			`* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be`
			`* altered by this transliterator. If <tt>filter</tt> is`
			`* <tt>null</tt> then no filtering is applied.`
			`* @return the matching rule, or null if none found.`
			`*/`
			`virtual TransliterationRule* findMatch(const Replaceable& text,`
ICU-474 fix UTransPosition handling X-SVN-Rev: 1688 2000-06-29 00:18:43 +00:00			`const UTransPosition& pos,`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`const TransliterationRuleData& data,`
			`const UnicodeFilter* filter) const;`
ICU-903 updated copyright notices. X-SVN-Rev: 4249 2001-03-22 00:09:10 +00:00
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/**`
			`* Attempt to find a matching rule at the specified point in the text.`
			`* Unlike <code>findMatch()</code>, this method does an incremental match.`
			`* An incremental match requires that there be no partial matches that might`
			`* pre-empt the full match that is found. If there are partial matches,`
			`* then null is returned. A non-null result indicates that a full match has`
			`* been found, and that it cannot be pre-empted by a partial match`
			`* regardless of what additional text is added to the translation buffer.`
			`* @param text the text, both translated and untranslated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @param cursor position at which to translate next, representing offset`
			`* into text. This value must be between <code>start</code> and`
			`* <code>limit</code>.`
			`* @param data a dictionary mapping variables to the sets they`
			`* represent (maps <code>Character</code> to <code>UnicodeSet</code>)`
			`* @param partial output parameter. <code>partial[0]</code> is set to`
			`* true if a partial match is returned.`
			`* @param filter the filter. Any character for which`
			`* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be`
			`* altered by this transliterator. If <tt>filter</tt> is`
			`* <tt>null</tt> then no filtering is applied.`
			`* @return the matching rule, or null if none found, or if the text buffer`
			`* does not have enough text yet to unambiguously match a rule.`
			`*/`
			`virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,`
ICU-474 fix UTransPosition handling X-SVN-Rev: 1688 2000-06-29 00:18:43 +00:00			`const UTransPosition& pos,`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`const TransliterationRuleData& data,`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool& isPartial,`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`const UnicodeFilter* filter) const;`
ICU-990 add toRules API to TransliterationRule and TransliterationRuleSet X-SVN-Rev: 4970 2001-06-12 18:02:16 +00:00
			`/**`
			`* Create rule strings that represents this rule set.`
			`* @param result string to receive the rule strings. Current`
			`* contents will be deleted.`
			`*/`
			`virtual UnicodeString& toRules(UnicodeString& result,`
			`const TransliterationRuleData& data,`
			`UBool escapeUnprintable) const;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`};`
			`#endif`