2017-01-20 00:20:31 +00:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
2016-06-15 18:58:17 +00:00
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
1999-11-20 00:40:50 +00:00
|
|
|
/*
|
2007-06-03 06:08:46 +00:00
|
|
|
**********************************************************************
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 1999-2007, International Business Machines Corporation
|
|
|
|
* and others. All Rights Reserved.
|
1999-11-20 00:40:50 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#ifndef RBT_SET_H
|
|
|
|
#define RBT_SET_H
|
|
|
|
|
2002-06-27 01:19:20 +00:00
|
|
|
#include "unicode/utypes.h"
|
2002-09-20 01:54:48 +00:00
|
|
|
|
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
|
|
|
|
2002-06-27 01:19:20 +00:00
|
|
|
#include "unicode/uobject.h"
|
2000-06-29 00:18:43 +00:00
|
|
|
#include "unicode/utrans.h"
|
2002-06-27 01:19:20 +00:00
|
|
|
#include "uvector.h"
|
1999-11-20 00:40:50 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
class Replaceable;
|
|
|
|
class TransliterationRule;
|
|
|
|
class TransliterationRuleData;
|
|
|
|
class UnicodeFilter;
|
|
|
|
class UnicodeString;
|
2002-06-28 21:13:54 +00:00
|
|
|
class UnicodeSet;
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
/**
|
2001-07-25 19:11:02 +00:00
|
|
|
* A set of rules for a <code>RuleBasedTransliterator</code>.
|
1999-11-20 00:40:50 +00:00
|
|
|
* @author Alan Liu
|
|
|
|
*/
|
2007-06-03 06:08:46 +00:00
|
|
|
class TransliterationRuleSet : public UMemory {
|
1999-11-20 00:40:50 +00:00
|
|
|
/**
|
2001-10-02 05:53:25 +00:00
|
|
|
* Vector of rules, in the order added. This is used while the
|
|
|
|
* rule set is getting built. After that, freeze() reorders and
|
|
|
|
* indexes the rules into rules[]. Any given rule is stored once
|
|
|
|
* in ruleVector, and one or more times in rules[]. ruleVector
|
|
|
|
* owns and deletes the rules.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
2000-01-13 07:28:08 +00:00
|
|
|
UVector* ruleVector;
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
/**
|
2001-10-02 05:53:25 +00:00
|
|
|
* Sorted and indexed table of rules. This is created by freeze()
|
|
|
|
* from the rules in ruleVector. It contains alias pointers to
|
|
|
|
* the rules in ruleVector. It is zero before freeze() is called
|
|
|
|
* and non-zero thereafter.
|
2000-01-13 07:28:08 +00:00
|
|
|
*/
|
|
|
|
TransliterationRule** rules;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Index table. For text having a first character c, compute x = c&0xFF.
|
|
|
|
* Now use rules[index[x]..index[x+1]-1]. This index table is created by
|
2001-10-02 05:53:25 +00:00
|
|
|
* freeze(). Before freeze() is called it contains garbage.
|
2000-01-13 07:28:08 +00:00
|
|
|
*/
|
|
|
|
int32_t index[257];
|
|
|
|
|
2001-10-02 05:53:25 +00:00
|
|
|
/**
|
|
|
|
* Length of the longest preceding context
|
|
|
|
*/
|
|
|
|
int32_t maxContextLength;
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
public:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Construct a new empty rule set.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param status Output parameter filled in with success or failure status.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
2001-08-23 01:06:08 +00:00
|
|
|
TransliterationRuleSet(UErrorCode& status);
|
1999-11-20 00:40:50 +00:00
|
|
|
|
2000-06-30 23:26:07 +00:00
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
2000-07-11 18:45:49 +00:00
|
|
|
TransliterationRuleSet(const TransliterationRuleSet&);
|
2000-06-30 23:26:07 +00:00
|
|
|
|
2000-01-13 07:28:08 +00:00
|
|
|
/**
|
|
|
|
* Destructor.
|
|
|
|
*/
|
|
|
|
virtual ~TransliterationRuleSet();
|
|
|
|
|
2001-09-18 00:24:14 +00:00
|
|
|
/**
|
|
|
|
* Change the data object that this rule belongs to. Used
|
|
|
|
* internally by the TransliterationRuleData copy constructor.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param data the new data value to be set.
|
2001-09-18 00:24:14 +00:00
|
|
|
*/
|
|
|
|
void setData(const TransliterationRuleData* data);
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
/**
|
|
|
|
* Return the maximum context length.
|
|
|
|
* @return the length of the longest preceding context.
|
|
|
|
*/
|
1999-12-22 22:57:04 +00:00
|
|
|
virtual int32_t getMaximumContextLength(void) const;
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Add a rule to this set. Rules are added in order, and order is
|
2000-07-13 00:40:31 +00:00
|
|
|
* significant. The last call to this method must be followed by
|
|
|
|
* a call to <code>freeze()</code> before the rule set is used.
|
2001-10-02 05:53:25 +00:00
|
|
|
* This method must <em>not</em> be called after freeze() has been
|
|
|
|
* called.
|
1999-11-20 00:40:50 +00:00
|
|
|
*
|
2000-01-13 07:28:08 +00:00
|
|
|
* @param adoptedRule the rule to add
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
|
|
|
virtual void addRule(TransliterationRule* adoptedRule,
|
|
|
|
UErrorCode& status);
|
|
|
|
|
|
|
|
/**
|
2000-07-13 00:40:31 +00:00
|
|
|
* Check this for masked rules and index it to optimize performance.
|
|
|
|
* The sequence of operations is: (1) add rules to a set using
|
|
|
|
* <code>addRule()</code>; (2) freeze the set using
|
|
|
|
* <code>freeze()</code>; (3) use the rule set. If
|
|
|
|
* <code>addRule()</code> is called after calling this method, it
|
|
|
|
* invalidates this object, and this method must be called again.
|
|
|
|
* That is, <code>freeze()</code> may be called multiple times,
|
|
|
|
* although for optimal performance it shouldn't be.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param parseError A pointer to UParseError to receive information about errors
|
|
|
|
* occurred.
|
|
|
|
* @param status Output parameter filled in with success or failure status.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
2001-08-31 03:23:39 +00:00
|
|
|
virtual void freeze(UParseError& parseError, UErrorCode& status);
|
2001-07-25 19:11:02 +00:00
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
/**
|
2001-07-25 19:11:02 +00:00
|
|
|
* Transliterate the given text with the given UTransPosition
|
|
|
|
* indices. Return TRUE if the transliteration should continue
|
|
|
|
* or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
|
|
|
|
* Note that FALSE is only ever returned if isIncremental is TRUE.
|
|
|
|
* @param text the text to be transliterated
|
|
|
|
* @param index the position indices, which will be updated
|
|
|
|
* @param isIncremental if TRUE, assume new text may be inserted
|
|
|
|
* at index.limit, and return FALSE if thre is a partial match.
|
|
|
|
* @return TRUE unless a U_PARTIAL_MATCH has been obtained,
|
|
|
|
* indicating that transliteration should stop until more text
|
|
|
|
* arrives.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
2001-07-25 19:11:02 +00:00
|
|
|
UBool transliterate(Replaceable& text,
|
|
|
|
UTransPosition& index,
|
|
|
|
UBool isIncremental);
|
2001-06-12 18:02:16 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Create rule strings that represents this rule set.
|
|
|
|
* @param result string to receive the rule strings. Current
|
|
|
|
* contents will be deleted.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param escapeUnprintable True, will escape the unprintable characters
|
|
|
|
* @return A reference to 'result'.
|
2001-06-12 18:02:16 +00:00
|
|
|
*/
|
|
|
|
virtual UnicodeString& toRules(UnicodeString& result,
|
|
|
|
UBool escapeUnprintable) const;
|
2002-06-28 21:13:54 +00:00
|
|
|
|
2002-06-29 00:15:00 +00:00
|
|
|
/**
|
|
|
|
* Return the set of all characters that may be modified
|
|
|
|
* (getTarget=false) or emitted (getTarget=true) by this set.
|
|
|
|
*/
|
|
|
|
UnicodeSet& getSourceTargetSet(UnicodeSet& result,
|
2004-11-11 23:34:58 +00:00
|
|
|
UBool getTarget) const;
|
2002-06-29 00:15:00 +00:00
|
|
|
|
2002-06-29 00:04:16 +00:00
|
|
|
private:
|
|
|
|
|
2002-10-04 18:06:33 +00:00
|
|
|
TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
|
1999-11-20 00:40:50 +00:00
|
|
|
};
|
2001-10-08 23:26:58 +00:00
|
|
|
|
|
|
|
U_NAMESPACE_END
|
2002-09-20 01:54:48 +00:00
|
|
|
|
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
#endif
|