ICU-1434 initial implementation of source/target set API
X-SVN-Rev: 8971
This commit is contained in:
parent
59164f02ca
commit
d1773b2571
@ -95,6 +95,13 @@ public:
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*) {}
|
||||
|
||||
/**
|
||||
* Stubbed out implementation of UnicodeMatcher API.
|
||||
* @param toUnionTo the set into which to union the source characters
|
||||
* @return a reference to toUnionTo
|
||||
*/
|
||||
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
protected:
|
||||
|
||||
UnicodeFilter();
|
||||
|
@ -14,6 +14,7 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
class Replaceable;
|
||||
class UnicodeString;
|
||||
class UnicodeSet;
|
||||
|
||||
/**
|
||||
* Constants returned by <code>UnicodeMatcher::matches()</code>
|
||||
@ -128,6 +129,13 @@ public:
|
||||
* indexing.
|
||||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const = 0;
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be matched by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the source characters
|
||||
*/
|
||||
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -536,6 +536,15 @@ public:
|
||||
const UnicodeString& s);
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Implementation of UnicodeMatcher API. Union the set of all
|
||||
* characters that may be matched by this object into the given
|
||||
* set.
|
||||
* @param toUnionTo the set into which to union the source characters
|
||||
*/
|
||||
void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* Returns the index of the given character within this set, where
|
||||
* the set is ordered by ascending code point. If the character
|
||||
|
@ -62,6 +62,10 @@ UBool UnicodeFilter::matchesIndexValue(uint8_t v) const {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// Stub this out for filters that do not implement this
|
||||
void UnicodeFilter::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
@ -900,6 +900,13 @@ int32_t UnicodeSet::matchRest(const Replaceable& text,
|
||||
return maxLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement of UnicodeMatcher
|
||||
*/
|
||||
void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
||||
toUnionTo.addAll(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the given character within this set, where
|
||||
* the set is ordered by ascending code point. If the character
|
||||
|
@ -366,6 +366,41 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
|
||||
return rulesSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement Transliterator framework
|
||||
*/
|
||||
void CompoundTransliterator::handleGetSourceSet(UnicodeSet& result) const {
|
||||
UnicodeSet set;
|
||||
result.clear();
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
result.addAll(trans[i]->getSourceSet(set));
|
||||
// Take the example of Hiragana-Latin. This is really
|
||||
// Hiragana-Katakana; Katakana-Latin. The source set of
|
||||
// these two is roughly [:Hiragana:] and [:Katakana:].
|
||||
// But the source set for the entire transliterator is
|
||||
// actually [:Hiragana:] ONLY -- that is, the first
|
||||
// non-empty source set.
|
||||
|
||||
// This is a heuristic, and not 100% reliable.
|
||||
if (!result.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Override Transliterator framework
|
||||
*/
|
||||
UnicodeSet& CompoundTransliterator::getTargetSet(UnicodeSet& result) const {
|
||||
UnicodeSet set;
|
||||
result.clear();
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
// This is a heuristic, and not 100% reliable.
|
||||
result.addAll(trans[i]->getTargetSet(set));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -9,6 +9,7 @@
|
||||
*/
|
||||
#include "funcrepl.h"
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
static const UChar AMPERSAND = 38; // '&'
|
||||
static const UChar OPEN[] = {40,32,0}; // "( "
|
||||
@ -91,6 +92,14 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule,
|
||||
return rule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeReplacer
|
||||
*/
|
||||
void FunctionReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
|
||||
UnicodeSet set;
|
||||
toUnionTo.addAll(translit->getTargetSet(set));
|
||||
}
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
|
@ -81,6 +81,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer {
|
||||
virtual UnicodeString& toReplacerPattern(UnicodeString& rule,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeReplacer
|
||||
*/
|
||||
virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
|
@ -114,6 +114,15 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const {
|
||||
return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher
|
||||
*/
|
||||
void Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
||||
if (maxCount > 0) {
|
||||
matcher->toMatcher()->addMatchSetTo(toUnionTo);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
|
@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher {
|
||||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher
|
||||
*/
|
||||
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
|
@ -151,5 +151,19 @@ UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
|
||||
return data->ruleSet.toRules(rulesSource, escapeUnprintable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement Transliterator framework
|
||||
*/
|
||||
void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
|
||||
data->ruleSet.getSourceTargetSet(result, FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Override Transliterator framework
|
||||
*/
|
||||
UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
|
||||
return data->ruleSet.getSourceTargetSet(result, TRUE);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
@ -494,6 +494,32 @@ void TransliterationRule::setData(const TransliterationRuleData* d) {
|
||||
// Don't have to do segments since they are in the context or key
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be modified by this rule
|
||||
* into the given set.
|
||||
*/
|
||||
void TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const {
|
||||
int32_t limit = anteContextLength + keyLength;
|
||||
for (int32_t i=anteContextLength; i<limit; ) {
|
||||
UChar32 ch = pattern.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(ch);
|
||||
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
|
||||
if (matcher == NULL) {
|
||||
toUnionTo.add(ch);
|
||||
} else {
|
||||
matcher->addMatchSetTo(toUnionTo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be emitted by this rule
|
||||
* into the given set.
|
||||
*/
|
||||
void TransliterationRule::addTargetSetTo(UnicodeSet& toUnionTo) const {
|
||||
output->toReplacer()->addReplacementSetTo(toUnionTo);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
@ -268,6 +268,19 @@ public:
|
||||
*/
|
||||
virtual UnicodeString& toRule(UnicodeString& pat,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be modified by this rule
|
||||
* into the given set.
|
||||
*/
|
||||
void addSourceSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be emitted by this rule
|
||||
* into the given set.
|
||||
*/
|
||||
void addTargetSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
private:
|
||||
|
||||
friend class StringMatcher;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "rbt_set.h"
|
||||
#include "rbt_rule.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -404,4 +405,24 @@ UnicodeString& TransliterationRuleSet::toRules(UnicodeString& ruleSource,
|
||||
return ruleSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the set of all characters that may be modified
|
||||
* (getTarget=false) or emitted (getTarget=true) by this set.
|
||||
*/
|
||||
UnicodeSet& TransliterationRuleSet::getSourceTargetSet(UnicodeSet& result,
|
||||
UBool getTarget) const {
|
||||
result.clear();
|
||||
int32_t count = ruleVector->size();
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
TransliterationRule* r =
|
||||
(TransliterationRule*) ruleVector->elementAt(i);
|
||||
if (getTarget) {
|
||||
r->addTargetSetTo(result);
|
||||
} else {
|
||||
r->addSourceSetTo(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -20,6 +20,7 @@ class TransliterationRule;
|
||||
class TransliterationRuleData;
|
||||
class UnicodeFilter;
|
||||
class UnicodeString;
|
||||
class UnicodeSet;
|
||||
|
||||
/**
|
||||
* A set of rules for a <code>RuleBasedTransliterator</code>.
|
||||
@ -132,6 +133,13 @@ public:
|
||||
*/
|
||||
virtual UnicodeString& toRules(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* Return the set of all characters that may be modified
|
||||
* (getTarget=false) or emitted (getTarget=true) by this set.
|
||||
*/
|
||||
UnicodeSet& getSourceTargetSet(UnicodeSet& result,
|
||||
UBool getTarget) const;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "strmatch.h"
|
||||
#include "rbt_data.h"
|
||||
#include "util.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -180,6 +181,22 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
|
||||
return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher
|
||||
*/
|
||||
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
||||
UChar32 ch;
|
||||
for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
|
||||
ch = pattern.char32At(i);
|
||||
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
|
||||
if (matcher == NULL) {
|
||||
toUnionTo.add(ch);
|
||||
} else {
|
||||
matcher->addMatchSetTo(toUnionTo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UnicodeReplacer API
|
||||
*/
|
||||
@ -226,6 +243,19 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
|
||||
matchStart = matchLimit = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
*/
|
||||
void StringMatcher::addReplacementSetTo(UnicodeSet& toUnionTo) const {
|
||||
// The output of this replacer varies; it is the source text between
|
||||
// matchStart and matchLimit. Since this varies depending on the
|
||||
// input text, we can't compute it here. We can either do nothing
|
||||
// or we can add ALL characters to the set. It's probably more useful
|
||||
// to do nothing.
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
|
@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
|
||||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher
|
||||
*/
|
||||
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
@ -145,6 +150,13 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
|
||||
*/
|
||||
void resetMatch();
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
*/
|
||||
virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "strrepl.h"
|
||||
#include "rbt_data.h"
|
||||
#include "util.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -257,6 +258,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
|
||||
return rule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeReplacer
|
||||
*/
|
||||
void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
|
||||
UChar32 ch;
|
||||
for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
|
||||
ch = output.char32At(i);
|
||||
UnicodeReplacer* r = data->lookupReplacer(ch);
|
||||
if (r == NULL) {
|
||||
toUnionTo.add(ch);
|
||||
} else {
|
||||
r->addReplacementSetTo(toUnionTo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
|
@ -127,6 +127,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer {
|
||||
virtual UnicodeString& toReplacerPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeReplacer
|
||||
*/
|
||||
virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
|
@ -1057,6 +1057,36 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
|
||||
return rulesSource;
|
||||
}
|
||||
|
||||
UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {
|
||||
handleGetSourceSet(result);
|
||||
if (filter != NULL) {
|
||||
UnicodeSet* filterSet;
|
||||
UBool deleteFilterSet = FALSE;
|
||||
// Most, but not all filters will be UnicodeSets. Optimize for
|
||||
// the high-runner case.
|
||||
if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) {
|
||||
filterSet = (UnicodeSet*) filter;
|
||||
} else {
|
||||
filterSet = new UnicodeSet();
|
||||
deleteFilterSet = TRUE;
|
||||
filter->addMatchSetTo(*filterSet);
|
||||
}
|
||||
result.retainAll(*filterSet);
|
||||
if (deleteFilterSet) {
|
||||
delete filterSet;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Transliterator::handleGetSourceSet(UnicodeSet& result) const {
|
||||
result.clear();
|
||||
}
|
||||
|
||||
UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const {
|
||||
return result.clear();
|
||||
}
|
||||
|
||||
// For public consumption
|
||||
void Transliterator::registerFactory(const UnicodeString& id,
|
||||
Transliterator::Factory factory,
|
||||
|
@ -159,6 +159,24 @@ public:
|
||||
virtual UnicodeString& toRules(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Implement Transliterator framework
|
||||
*/
|
||||
virtual void handleGetSourceSet(UnicodeSet& result) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Override Transliterator framework
|
||||
*/
|
||||
virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
|
||||
|
||||
// handleTransliterate should be protected, but was declared public before ICU 2.2.
|
||||
// We do not have a separate deprecation date for this method since the entire class
|
||||
// will become internal after 2002-sep-30.
|
||||
#ifndef U_USE_DEPRECATED_TRANSLITERATOR_API
|
||||
protected:
|
||||
#endif
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
* @deprecated To be removed after 2002-sep-30.
|
||||
|
@ -382,6 +382,7 @@ public:
|
||||
*/
|
||||
Transliterator* clone(void) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
* @deprecated To be removed after 2002-sep-30.
|
||||
@ -389,6 +390,7 @@ public:
|
||||
virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Return a representation of this transliterator as source rules.
|
||||
* These rules will produce an equivalent transliterator if used
|
||||
@ -404,6 +406,18 @@ public:
|
||||
virtual UnicodeString& toRules(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Implement Transliterator framework
|
||||
*/
|
||||
virtual void handleGetSourceSet(UnicodeSet& result) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Override Transliterator framework
|
||||
*/
|
||||
virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
|
||||
|
||||
/**
|
||||
* Return the class ID for this class. This is useful only for
|
||||
* comparing to a return value from getDynamicClassID(). For example:
|
||||
|
@ -780,6 +780,51 @@ public:
|
||||
virtual UnicodeString& toRules(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be modified in the
|
||||
* input text by this Transliterator. This incorporates this
|
||||
* object's current filter; if the filter is changed, the return
|
||||
* value of this function will change. The default implementation
|
||||
* returns an empty set. Some subclasses may override {@link
|
||||
* #handleGetSourceSet} to return a more precise result. The
|
||||
* return result is approximate in any case and is intended for
|
||||
* use by tests, tools, or utilities.
|
||||
* @param result receives result set; previous contents lost
|
||||
* @return a reference to result
|
||||
* @see #getTargetSet
|
||||
* @see #handleGetSourceSet
|
||||
*/
|
||||
UnicodeSet& getSourceSet(UnicodeSet& result) const;
|
||||
|
||||
/**
|
||||
* Framework method that returns the set of all characters that
|
||||
* may be modified in the input text by this Transliterator,
|
||||
* ignoring the effect of this object's filter. The base class
|
||||
* implementation returns the empty set. Subclasses that wish to
|
||||
* implement this should override this method.
|
||||
* @return the set of characters that this transliterator may
|
||||
* modify. The set may be modified, so subclasses should return a
|
||||
* newly-created object.
|
||||
* @param result receives result set; previous contents lost
|
||||
* @see #getSourceSet
|
||||
* @see #getTargetSet
|
||||
*/
|
||||
virtual void handleGetSourceSet(UnicodeSet& result) const;
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be generated as
|
||||
* replacement text by this transliterator. The default
|
||||
* implementation returns the empty set. Some subclasses may
|
||||
* override this method to return a more precise result. The
|
||||
* return result is approximate in any case and is intended for
|
||||
* use by tests, tools, or utilities requiring such
|
||||
* meta-information.
|
||||
* @param result receives result set; previous contents lost
|
||||
* @return a reference to result
|
||||
* @see #getTargetSet
|
||||
*/
|
||||
virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
|
@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
class Replaceable;
|
||||
class UnicodeString;
|
||||
class UnicodeSet;
|
||||
|
||||
/**
|
||||
* <code>UnicodeReplacer</code> defines a protocol for objects that
|
||||
@ -67,6 +68,13 @@ class U_I18N_API UnicodeReplacer /* not : public UObject because this is an inte
|
||||
*/
|
||||
virtual UnicodeString& toReplacerPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const = 0;
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
*/
|
||||
virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -331,6 +331,9 @@ void CompoundTransliteratorTest::TestTransliterate(){
|
||||
if(U_FAILURE(status)){
|
||||
errln("CompoundTransliterator construction failed");
|
||||
}else {
|
||||
#if 0
|
||||
// handleTransliterate is a protected method that was erroneously made
|
||||
// public. It is not public API that needs to be tested.
|
||||
UnicodeString s("abcabc");
|
||||
expect(*ct1, s, s);
|
||||
UTransPosition index = { 0, 0, 0, 0 };
|
||||
@ -343,7 +346,7 @@ void CompoundTransliteratorTest::TestTransliterate(){
|
||||
UnicodeString rsource3(s);
|
||||
ct1->handleTransliterate(rsource3, index, TRUE);
|
||||
expectAux(ct1->getID() + ":String, index(1,2,3), incremental=TRUE", rsource3 + "->" + rsource3, rsource3==expectedResult, expectedResult);
|
||||
|
||||
#endif
|
||||
}
|
||||
delete ct1;
|
||||
UnicodeString Data[]={
|
||||
@ -391,7 +394,7 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t,
|
||||
t.transliterate(rsource);
|
||||
expectAux(t.getID() + ":Replaceable", source + "->" + rsource, rsource==expectedResult, expectedResult);
|
||||
|
||||
// Test handleTransliterate (incremental) transliteration --
|
||||
// Test transliterate (incremental) transliteration --
|
||||
rsource.remove();
|
||||
rsource.append(source);
|
||||
UTransPosition index;
|
||||
@ -399,7 +402,8 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t,
|
||||
index.contextLimit = source.length();
|
||||
index.start = 0;
|
||||
index.limit = source.length();
|
||||
t.handleTransliterate(rsource, index, TRUE);
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
t.transliterate(rsource, index, ec);
|
||||
t.finishTransliteration(rsource,index);
|
||||
expectAux(t.getID() + ":handleTransliterate ", source + "->" + rsource, rsource==expectedResult, expectedResult);
|
||||
|
||||
|
@ -159,6 +159,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
||||
TESTCASE(69,TestMulticharStringSet);
|
||||
TESTCASE(70,TestUserFunction);
|
||||
TESTCASE(71,TestAnyX);
|
||||
TESTCASE(72,TestSourceTargetSet);
|
||||
|
||||
default: name = ""; break;
|
||||
}
|
||||
@ -3570,6 +3571,53 @@ void TransliteratorTest::TestAnyX(void) {
|
||||
delete anyLatin;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the source and target set API. These are only implemented
|
||||
* for RBT and CompoundTransliterator at this time.
|
||||
*/
|
||||
void TransliteratorTest::TestSourceTargetSet() {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
|
||||
// Rules
|
||||
const char* r =
|
||||
"a > b; "
|
||||
"r [x{lu}] > q;";
|
||||
|
||||
// Expected source
|
||||
UnicodeSet expSrc("[arx{lu}]", ec);
|
||||
|
||||
// Expected target
|
||||
UnicodeSet expTrg("[bq]", ec);
|
||||
|
||||
UParseError pe;
|
||||
Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
|
||||
|
||||
if (U_FAILURE(ec)) {
|
||||
delete t;
|
||||
errln("FAIL: Couldn't set up test");
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeSet src; t->getSourceSet(src);
|
||||
UnicodeSet trg; t->getTargetSet(trg);
|
||||
|
||||
if (src == expSrc && trg == expTrg) {
|
||||
UnicodeString a, b;
|
||||
logln((UnicodeString)"Ok: " +
|
||||
r + " => source = " + src.toPattern(a, TRUE) +
|
||||
", target = " + trg.toPattern(b, TRUE));
|
||||
} else {
|
||||
UnicodeString a, b, c, d;
|
||||
errln((UnicodeString)"FAIL: " +
|
||||
r + " => source = " + src.toPattern(a, TRUE) +
|
||||
", expected " + expSrc.toPattern(b, TRUE) +
|
||||
"; target = " + trg.toPattern(c, TRUE) +
|
||||
", expected " + expTrg.toPattern(d, TRUE));
|
||||
}
|
||||
|
||||
delete t;
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -328,6 +328,8 @@ private:
|
||||
|
||||
void TestAnyX(void);
|
||||
|
||||
void TestSourceTargetSet(void);
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
Loading…
Reference in New Issue
Block a user