ICU-1434 initial implementation of source/target set API

X-SVN-Rev: 8971
2002-06-28 21:13:54 +00:00 · 2002-06-28 21:13:54 +00:00 · d1773b2571
commit d1773b2571
parent 59164f02ca
27 changed files with 416 additions and 3 deletions
--- a/icu4c/source/common/unicode/unifilt.h
+++ b/icu4c/source/common/unicode/unifilt.h
@ -95,6 +95,13 @@ public:
     */
    virtual void setData(const TransliterationRuleData*) {}

+    /**
+     * Stubbed out implementation of UnicodeMatcher API.
+     * @param toUnionTo the set into which to union the source characters
+     * @return a reference to toUnionTo
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
 protected:

    UnicodeFilter();
--- a/icu4c/source/common/unicode/unimatch.h
+++ b/icu4c/source/common/unicode/unimatch.h
@ -14,6 +14,7 @@ U_NAMESPACE_BEGIN

 class Replaceable;
 class UnicodeString;
+class UnicodeSet;

 /**
 * Constants returned by <code>UnicodeMatcher::matches()</code>
@ -128,6 +129,13 @@ public:
     * indexing.
     */
    virtual UBool matchesIndexValue(uint8_t v) const = 0;
+
+    /**
+     * Union the set of all characters that may be matched by this object
+     * into the given set.
+     * @param toUnionTo the set into which to union the source characters
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
 };

 U_NAMESPACE_END
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -536,6 +536,15 @@ public:
                             const UnicodeString& s);
    
 public:
+
+    /**
+     * Implementation of UnicodeMatcher API.  Union the set of all
+     * characters that may be matched by this object into the given
+     * set.
+     * @param toUnionTo the set into which to union the source characters
+     */
+    void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
    /**
     * Returns the index of the given character within this set, where
     * the set is ordered by ascending code point.  If the character
--- a/icu4c/source/common/unifilt.cpp
+++ b/icu4c/source/common/unifilt.cpp
@ -62,6 +62,10 @@ UBool UnicodeFilter::matchesIndexValue(uint8_t v) const {
    return FALSE;
 }

+// Stub this out for filters that do not implement this
+void UnicodeFilter::addMatchSetTo(UnicodeSet& toUnionTo) const {
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -900,6 +900,13 @@ int32_t UnicodeSet::matchRest(const Replaceable& text,
    return maxLen;
 }

+/**
+ * Implement of UnicodeMatcher
+ */
+void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
+    toUnionTo.addAll(*this);
+}
+
 /**
 * Returns the index of the given character within this set, where
 * the set is ordered by ascending code point.  If the character
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -366,6 +366,41 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
    return rulesSource;
 }

+/**
+ * Implement Transliterator framework
+ */
+void CompoundTransliterator::handleGetSourceSet(UnicodeSet& result) const {
+    UnicodeSet set;
+    result.clear();
+    for (int32_t i=0; i<count; ++i) {
+	result.addAll(trans[i]->getSourceSet(set));
+	// Take the example of Hiragana-Latin.  This is really
+	// Hiragana-Katakana; Katakana-Latin.  The source set of
+	// these two is roughly [:Hiragana:] and [:Katakana:].
+	// But the source set for the entire transliterator is
+	// actually [:Hiragana:] ONLY -- that is, the first
+	// non-empty source set.
+
+	// This is a heuristic, and not 100% reliable.
+	if (!result.isEmpty()) {
+	    break;
+	}
+    }
+}
+
+/**
+ * Override Transliterator framework
+ */
+UnicodeSet& CompoundTransliterator::getTargetSet(UnicodeSet& result) const {
+    UnicodeSet set;
+    result.clear();
+    for (int32_t i=0; i<count; ++i) {
+	// This is a heuristic, and not 100% reliable.
+	result.addAll(trans[i]->getTargetSet(set));
+    }
+    return result;
+}
+
 /**
 * Implements {@link Transliterator#handleTransliterate}.
 */
--- a/icu4c/source/i18n/funcrepl.cpp
+++ b/icu4c/source/i18n/funcrepl.cpp
@ -9,6 +9,7 @@
 */
 #include "funcrepl.h"
 #include "unicode/translit.h"
+#include "unicode/uniset.h"

 static const UChar AMPERSAND = 38; // '&'
 static const UChar OPEN[]    = {40,32,0}; // "( "
@ -91,6 +92,14 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule,
    return rule;
 }

+/**
+ * Implement UnicodeReplacer
+ */
+void FunctionReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
+    UnicodeSet set;
+    toUnionTo.addAll(translit->getTargetSet(set));
+}
+
 /**
 * UnicodeFunctor API
 */
--- a/icu4c/source/i18n/funcrepl.h
+++ b/icu4c/source/i18n/funcrepl.h
@ -81,6 +81,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer {
    virtual UnicodeString& toReplacerPattern(UnicodeString& rule,
                                             UBool escapeUnprintable) const;

+    /**
+     * Implement UnicodeReplacer
+     */
+    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
+
    /**
     * UnicodeFunctor API
     */
--- a/icu4c/source/i18n/quant.cpp
+++ b/icu4c/source/i18n/quant.cpp
@ -114,6 +114,15 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const {
    return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
 }

+/**
+ * Implement UnicodeMatcher
+ */
+void Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const {
+    if (maxCount > 0) {
+	matcher->toMatcher()->addMatchSetTo(toUnionTo);
+    }
+}
+
 /**
 * Implement UnicodeFunctor
 */
--- a/icu4c/source/i18n/quant.h
+++ b/icu4c/source/i18n/quant.h
@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher {
     */
    virtual UBool matchesIndexValue(uint8_t v) const;

+    /**
+     * Implement UnicodeMatcher
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
    /**
     * UnicodeFunctor API
     */
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -151,5 +151,19 @@ UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
    return data->ruleSet.toRules(rulesSource, escapeUnprintable);
 }

+/**
+ * Implement Transliterator framework
+ */
+void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
+    data->ruleSet.getSourceTargetSet(result, FALSE);
+}
+
+/**
+ * Override Transliterator framework
+ */
+UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
+    return data->ruleSet.getSourceTargetSet(result, TRUE);
+}
+
 U_NAMESPACE_END

--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -494,6 +494,32 @@ void TransliterationRule::setData(const TransliterationRuleData* d) {
    // Don't have to do segments since they are in the context or key
 }

+/**
+ * Union the set of all characters that may be modified by this rule
+ * into the given set.
+ */
+void TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const {
+    int32_t limit = anteContextLength + keyLength;
+    for (int32_t i=anteContextLength; i<limit; ) {
+	UChar32 ch = pattern.char32At(i);
+	i += UTF_CHAR_LENGTH(ch);
+	const UnicodeMatcher* matcher = data->lookupMatcher(ch);
+	if (matcher == NULL) {
+	    toUnionTo.add(ch);
+	} else {
+	    matcher->addMatchSetTo(toUnionTo);
+	}
+    }
+}
+
+/**
+ * Union the set of all characters that may be emitted by this rule
+ * into the given set.
+ */
+void TransliterationRule::addTargetSetTo(UnicodeSet& toUnionTo) const {
+    output->toReplacer()->addReplacementSetTo(toUnionTo);
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -268,6 +268,19 @@ public:
     */
    virtual UnicodeString& toRule(UnicodeString& pat,
                                  UBool escapeUnprintable) const;
+
+    /**
+     * Union the set of all characters that may be modified by this rule
+     * into the given set.
+     */
+    void addSourceSetTo(UnicodeSet& toUnionTo) const;
+
+    /**
+     * Union the set of all characters that may be emitted by this rule
+     * into the given set.
+     */
+    void addTargetSetTo(UnicodeSet& toUnionTo) const;
+
 private:

    friend class StringMatcher;
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -10,6 +10,7 @@
 #include "rbt_set.h"
 #include "rbt_rule.h"
 #include "unicode/unistr.h"
+#include "unicode/uniset.h"
 #include "cmemory.h"

 U_CDECL_BEGIN
@ -404,4 +405,24 @@ UnicodeString& TransliterationRuleSet::toRules(UnicodeString& ruleSource,
    return ruleSource;
 }

+/**
+ * Return the set of all characters that may be modified
+ * (getTarget=false) or emitted (getTarget=true) by this set.
+ */
+UnicodeSet& TransliterationRuleSet::getSourceTargetSet(UnicodeSet& result,
+						       UBool getTarget) const {
+    result.clear();
+    int32_t count = ruleVector->size();
+    for (int32_t i=0; i<count; ++i) {
+	TransliterationRule* r =
+	    (TransliterationRule*) ruleVector->elementAt(i);
+	if (getTarget) {
+	    r->addTargetSetTo(result);
+	} else {
+	    r->addSourceSetTo(result);
+	}
+    }
+    return result;
+}
+
 U_NAMESPACE_END
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -20,6 +20,7 @@ class TransliterationRule;
 class TransliterationRuleData;
 class UnicodeFilter;
 class UnicodeString;
+class UnicodeSet;

 /**
 * A set of rules for a <code>RuleBasedTransliterator</code>.
@ -132,6 +133,13 @@ public:
     */
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;
+
+    /**
+     * Return the set of all characters that may be modified
+     * (getTarget=false) or emitted (getTarget=true) by this set.
+     */
+    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
+				   UBool getTarget) const;
 };

 U_NAMESPACE_END
--- a/icu4c/source/i18n/strmatch.cpp
+++ b/icu4c/source/i18n/strmatch.cpp
@ -9,6 +9,7 @@
 #include "strmatch.h"
 #include "rbt_data.h"
 #include "util.h"
+#include "unicode/uniset.h"

 U_NAMESPACE_BEGIN

@ -180,6 +181,22 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
 }

+/**
+ * Implement UnicodeMatcher
+ */
+void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
+    UChar32 ch;
+    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
+	ch = pattern.char32At(i);
+	const UnicodeMatcher* matcher = data->lookupMatcher(ch);
+	if (matcher == NULL) {
+	    toUnionTo.add(ch);
+	} else {
+	    matcher->addMatchSetTo(toUnionTo);
+	}
+    }
+}
+
 /**
 * UnicodeReplacer API
 */
@ -226,6 +243,19 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
    matchStart = matchLimit = -1;
 }

+/**
+ * Union the set of all characters that may output by this object
+ * into the given set.
+ * @param toUnionTo the set into which to union the output characters
+ */
+void StringMatcher::addReplacementSetTo(UnicodeSet& toUnionTo) const {
+    // The output of this replacer varies; it is the source text between
+    // matchStart and matchLimit.  Since this varies depending on the
+    // input text, we can't compute it here.  We can either do nothing
+    // or we can add ALL characters to the set.  It's probably more useful
+    // to do nothing.
+}
+
 /**
 * Implement UnicodeFunctor
 */
--- a/icu4c/source/i18n/strmatch.h
+++ b/icu4c/source/i18n/strmatch.h
@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
     */
    virtual UBool matchesIndexValue(uint8_t v) const;

+    /**
+     * Implement UnicodeMatcher
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
    /**
     * Implement UnicodeFunctor
     */
@ -145,6 +150,13 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
     */
    void resetMatch();

+    /**
+     * Union the set of all characters that may output by this object
+     * into the given set.
+     * @param toUnionTo the set into which to union the output characters
+     */
+    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
+
 private:

    /**
--- a/icu4c/source/i18n/strrepl.cpp
+++ b/icu4c/source/i18n/strrepl.cpp
@ -11,6 +11,7 @@
 #include "strrepl.h"
 #include "rbt_data.h"
 #include "util.h"
+#include "unicode/uniset.h"

 U_NAMESPACE_BEGIN

@ -257,6 +258,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
    return rule;
 }

+/**
+ * Implement UnicodeReplacer
+ */
+void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
+    UChar32 ch;
+    for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
+	ch = output.char32At(i);
+	UnicodeReplacer* r = data->lookupReplacer(ch);
+	if (r == NULL) {
+	    toUnionTo.add(ch);
+	} else {
+	    r->addReplacementSetTo(toUnionTo);
+	}
+    }
+}
+
 /**
 * UnicodeFunctor API
 */
--- a/icu4c/source/i18n/strrepl.h
+++ b/icu4c/source/i18n/strrepl.h
@ -127,6 +127,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer {
    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
                                             UBool escapeUnprintable) const;

+    /**
+     * Implement UnicodeReplacer
+     */
+    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
+
    /**
     * UnicodeFunctor API
     */
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -1057,6 +1057,36 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
    return rulesSource;
 }

+UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {
+    handleGetSourceSet(result);
+    if (filter != NULL) {
+	UnicodeSet* filterSet;
+	UBool deleteFilterSet = FALSE;
+	// Most, but not all filters will be UnicodeSets.  Optimize for
+	// the high-runner case.
+	if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) {
+	    filterSet = (UnicodeSet*) filter;
+	} else {
+	    filterSet = new UnicodeSet();
+	    deleteFilterSet = TRUE;
+	    filter->addMatchSetTo(*filterSet);
+	}
+	result.retainAll(*filterSet);
+	if (deleteFilterSet) {
+	    delete filterSet;
+	}
+    }
+    return result;
+}
+
+void Transliterator::handleGetSourceSet(UnicodeSet& result) const {
+    result.clear();
+}
+
+UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const {
+    return result.clear();
+}
+
 // For public consumption
 void Transliterator::registerFactory(const UnicodeString& id,
                                     Transliterator::Factory factory,
--- a/icu4c/source/i18n/unicode/cpdtrans.h
+++ b/icu4c/source/i18n/unicode/cpdtrans.h
@ -159,6 +159,24 @@ public:
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

+ protected:
+    /**
+     * Implement Transliterator framework
+     */
+    virtual void handleGetSourceSet(UnicodeSet& result) const;
+
+ public:
+    /**
+     * Override Transliterator framework
+     */
+    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
+
+// handleTransliterate should be protected, but was declared public before ICU 2.2.
+// We do not have a separate deprecation date for this method since the entire class
+// will become internal after 2002-sep-30.
+#ifndef U_USE_DEPRECATED_TRANSLITERATOR_API
+ protected:
+#endif
    /**
     * Implements {@link Transliterator#handleTransliterate}.
     * @deprecated To be removed after 2002-sep-30.
--- a/icu4c/source/i18n/unicode/rbt.h
+++ b/icu4c/source/i18n/unicode/rbt.h
@ -382,6 +382,7 @@ public:
     */
    Transliterator* clone(void) const;

+ protected:
    /**
     * Implements {@link Transliterator#handleTransliterate}.
     * @deprecated To be removed after 2002-sep-30.
@ -389,6 +390,7 @@ public:
    virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                     UBool isIncremental) const;

+ public:
    /**
     * Return a representation of this transliterator as source rules.
     * These rules will produce an equivalent transliterator if used
@ -404,6 +406,18 @@ public:
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

+ protected:
+    /**
+     * Implement Transliterator framework
+     */
+    virtual void handleGetSourceSet(UnicodeSet& result) const;
+
+ public:
+    /**
+     * Override Transliterator framework
+     */
+    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
+
    /**
     * Return the class ID for this class.  This is useful only for
     * comparing to a return value from getDynamicClassID().  For example:
--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@ -780,6 +780,51 @@ public:
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

+    /**
+     * Returns the set of all characters that may be modified in the
+     * input text by this Transliterator.  This incorporates this
+     * object's current filter; if the filter is changed, the return
+     * value of this function will change.  The default implementation
+     * returns an empty set.  Some subclasses may override {@link
+     * #handleGetSourceSet} to return a more precise result.  The
+     * return result is approximate in any case and is intended for
+     * use by tests, tools, or utilities.
+     * @param result receives result set; previous contents lost
+     * @return a reference to result
+     * @see #getTargetSet
+     * @see #handleGetSourceSet
+     */
+    UnicodeSet& getSourceSet(UnicodeSet& result) const;
+
+    /**
+     * Framework method that returns the set of all characters that
+     * may be modified in the input text by this Transliterator,
+     * ignoring the effect of this object's filter.  The base class
+     * implementation returns the empty set.  Subclasses that wish to
+     * implement this should override this method.
+     * @return the set of characters that this transliterator may
+     * modify.  The set may be modified, so subclasses should return a
+     * newly-created object.
+     * @param result receives result set; previous contents lost
+     * @see #getSourceSet
+     * @see #getTargetSet
+     */
+    virtual void handleGetSourceSet(UnicodeSet& result) const;
+
+    /**
+     * Returns the set of all characters that may be generated as
+     * replacement text by this transliterator.  The default
+     * implementation returns the empty set.  Some subclasses may
+     * override this method to return a more precise result.  The
+     * return result is approximate in any case and is intended for
+     * use by tests, tools, or utilities requiring such
+     * meta-information.
+     * @param result receives result set; previous contents lost
+     * @return a reference to result
+     * @see #getTargetSet
+     */
+    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
+
 public:

    /**
--- a/icu4c/source/i18n/unicode/unirepl.h
+++ b/icu4c/source/i18n/unicode/unirepl.h
@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN

 class Replaceable;
 class UnicodeString;
+class UnicodeSet;

 /**
 * <code>UnicodeReplacer</code> defines a protocol for objects that
@ -67,6 +68,13 @@ class U_I18N_API UnicodeReplacer /* not : public UObject because this is an inte
     */
    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
                                             UBool escapeUnprintable) const = 0;
+
+    /**
+     * Union the set of all characters that may output by this object
+     * into the given set.
+     * @param toUnionTo the set into which to union the output characters
+     */
+    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0;
 };

 U_NAMESPACE_END
--- a/icu4c/source/test/intltest/cpdtrtst.cpp
+++ b/icu4c/source/test/intltest/cpdtrtst.cpp
@ -331,6 +331,9 @@ void CompoundTransliteratorTest::TestTransliterate(){
    if(U_FAILURE(status)){
        errln("CompoundTransliterator construction failed");
    }else {
+#if 0
+	// handleTransliterate is a protected method that was erroneously made
+	// public.  It is not public API that needs to be tested.
        UnicodeString s("abcabc");
        expect(*ct1, s, s);
        UTransPosition index = { 0, 0, 0, 0 };
@ -343,7 +346,7 @@ void CompoundTransliteratorTest::TestTransliterate(){
        UnicodeString rsource3(s);
        ct1->handleTransliterate(rsource3, index, TRUE); 
        expectAux(ct1->getID() + ":String, index(1,2,3), incremental=TRUE", rsource3 + "->" + rsource3, rsource3==expectedResult, expectedResult);
-
+#endif
    }
    delete ct1;
    UnicodeString Data[]={
@ -391,7 +394,7 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t,
    t.transliterate(rsource);
    expectAux(t.getID() + ":Replaceable", source + "->" + rsource, rsource==expectedResult, expectedResult);

-    // Test handleTransliterate (incremental) transliteration -- 
+    // Test transliterate (incremental) transliteration -- 
    rsource.remove();
    rsource.append(source);
    UTransPosition index;
@ -399,7 +402,8 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t,
    index.contextLimit = source.length();
    index.start = 0;
    index.limit = source.length();
-    t.handleTransliterate(rsource, index, TRUE);
+    UErrorCode ec = U_ZERO_ERROR;
+    t.transliterate(rsource, index, ec);
    t.finishTransliteration(rsource,index);
    expectAux(t.getID() + ":handleTransliterate ", source + "->" + rsource, rsource==expectedResult, expectedResult);

--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -159,6 +159,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        TESTCASE(69,TestMulticharStringSet);
        TESTCASE(70,TestUserFunction);
        TESTCASE(71,TestAnyX);
+        TESTCASE(72,TestSourceTargetSet);

        default: name = ""; break;
    }
@ -3570,6 +3571,53 @@ void TransliteratorTest::TestAnyX(void) {
    delete anyLatin;
 }

+/**
+ * Test the source and target set API.  These are only implemented
+ * for RBT and CompoundTransliterator at this time.
+ */
+void TransliteratorTest::TestSourceTargetSet() {
+    UErrorCode ec = U_ZERO_ERROR;
+
+    // Rules
+    const char* r =
+	"a > b; "
+	"r [x{lu}] > q;";
+
+    // Expected source
+    UnicodeSet expSrc("[arx{lu}]", ec);
+
+    // Expected target
+    UnicodeSet expTrg("[bq]", ec);
+
+    UParseError pe;
+    Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
+
+    if (U_FAILURE(ec)) {
+	delete t;
+	errln("FAIL: Couldn't set up test");
+	return;
+    }
+
+    UnicodeSet src; t->getSourceSet(src);
+    UnicodeSet trg; t->getTargetSet(trg);
+
+    if (src == expSrc && trg == expTrg) {
+	UnicodeString a, b;
+	logln((UnicodeString)"Ok: " +
+	      r + " => source = " + src.toPattern(a, TRUE) +
+	      ", target = " + trg.toPattern(b, TRUE));
+    } else {
+	UnicodeString a, b, c, d;
+	errln((UnicodeString)"FAIL: " +
+	      r + " => source = " + src.toPattern(a, TRUE) +
+	      ", expected " + expSrc.toPattern(b, TRUE) +
+	      "; target = " + trg.toPattern(c, TRUE) +
+	      ", expected " + expTrg.toPattern(d, TRUE));
+    }
+
+    delete t;
+}
+
 //======================================================================
 // Support methods
 //======================================================================
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -328,6 +328,8 @@ private:

    void TestAnyX(void);

+    void TestSourceTargetSet(void);
+
    //======================================================================
    // Support methods
    //======================================================================