scuffed-code/icu4c/source/i18n/strmatch.cpp

/*
* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   07/23/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "strmatch.h"
#include "rbt_data.h"
#include "util.h"
#include "unicode/uniset.h"

U_NAMESPACE_BEGIN

const UChar EMPTY[] = { 0 }; // empty string: ""

const char StringMatcher::fgClassID=0;

StringMatcher::StringMatcher(const UnicodeString& theString,
                             int32_t start,
                             int32_t limit,
                             int32_t segmentNum,
                             const TransliterationRuleData& theData) :
    data(&theData),
    segmentNumber(segmentNum),
    matchStart(-1),
    matchLimit(-1)
{
    theString.extractBetween(start, limit, pattern);
}

StringMatcher::StringMatcher(const StringMatcher& o) :
    UnicodeMatcher(o),
    pattern(o.pattern),
    data(o.data),
    segmentNumber(o.segmentNumber),
    matchStart(o.matchStart),
    matchLimit(o.matchLimit)
{
}

/**
 * Destructor
 */
StringMatcher::~StringMatcher() {
}

/**
 * Implement UnicodeFunctor
 */
UnicodeFunctor* StringMatcher::clone() const {
    return new StringMatcher(*this);
}

/**
 * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
 * and return the pointer.
 */
UnicodeMatcher* StringMatcher::toMatcher() const {
    return (UnicodeMatcher*) this;
}

/**
 * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
 * and return the pointer.
 */
UnicodeReplacer* StringMatcher::toReplacer() const {
    return (UnicodeReplacer*) this;
}

/**
 * Implement UnicodeMatcher
 */
UMatchDegree StringMatcher::matches(const Replaceable& text,
                                    int32_t& offset,
                                    int32_t limit,
                                    UBool incremental) {
    int32_t i;
    int32_t cursor = offset;
    if (limit < cursor) {
        // Match in the reverse direction
        for (i=pattern.length()-1; i>=0; --i) {
            UChar keyChar = pattern.charAt(i);
            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
            if (subm == 0) {
                if (cursor > limit &&
                    keyChar == text.charAt(cursor)) {
                    --cursor;
                } else {
                    return U_MISMATCH;
                }
            } else {
                UMatchDegree m =
                    subm->matches(text, cursor, limit, incremental);
                if (m != U_MATCH) {
                    return m;
                }
            }
        }
        // Record the match position, but adjust for a normal
        // forward start, limit, and only if a prior match does not
        // exist -- we want the rightmost match.
        if (matchStart < 0) {
            matchStart = cursor+1;
            matchLimit = offset+1;
        }
    } else {
        for (i=0; i<pattern.length(); ++i) {
            if (incremental && cursor == limit) {
                // We've reached the context limit without a mismatch and
                // without completing our match.
                return U_PARTIAL_MATCH;
            }
            UChar keyChar = pattern.charAt(i);
            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
            if (subm == 0) {
                // Don't need the cursor < limit check if
                // incremental is TRUE (because it's done above); do need
                // it otherwise.
                if (cursor < limit &&
                    keyChar == text.charAt(cursor)) {
                    ++cursor;
                } else {
                    return U_MISMATCH;
                }
            } else {
                UMatchDegree m =
                    subm->matches(text, cursor, limit, incremental);
                if (m != U_MATCH) {
                    return m;
                }
            }
        }
        // Record the match position
        matchStart = offset;
        matchLimit = cursor;
    }

    offset = cursor;
    return U_MATCH;
}

/**
 * Implement UnicodeMatcher
 */
UnicodeString& StringMatcher::toPattern(UnicodeString& result,
                                        UBool escapeUnprintable) const
{
    result.truncate(0);
    UnicodeString str, quoteBuf;
    if (segmentNumber > 0) {
        result.append((UChar)40); /*(*/
    }
    for (int32_t i=0; i<pattern.length(); ++i) {
        UChar keyChar = pattern.charAt(i);
        const UnicodeMatcher* m = data->lookupMatcher(keyChar);
        if (m == 0) {
            ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
        } else {
            ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),
                         TRUE, escapeUnprintable, quoteBuf);
        }
    }
    if (segmentNumber > 0) {
        result.append((UChar)41); /*)*/
    }
    // Flush quoteBuf out to result
    ICU_Utility::appendToRule(result, -1,
                              TRUE, escapeUnprintable, quoteBuf);
    return result;
}

/**
 * Implement UnicodeMatcher
 */
UBool StringMatcher::matchesIndexValue(uint8_t v) const {
    if (pattern.length() == 0) {
        return TRUE;
    }
    UChar32 c = pattern.char32At(0);
    const UnicodeMatcher *m = data->lookupMatcher(c);
    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
}

/**
 * Implement UnicodeMatcher
 */
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
    UChar32 ch;
    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
	ch = pattern.char32At(i);
	const UnicodeMatcher* matcher = data->lookupMatcher(ch);
	if (matcher == NULL) {
	    toUnionTo.add(ch);
	} else {
	    matcher->addMatchSetTo(toUnionTo);
	}
    }
}

/**
 * UnicodeReplacer API
 */
int32_t StringMatcher::replace(Replaceable& text,
                               int32_t start,
                               int32_t limit,
                               int32_t& cursor) {
    
    int32_t outLen = 0;
    
    // Copy segment with out-of-band data
    int32_t dest = limit;
    // If there was no match, that means that a quantifier
    // matched zero-length.  E.g., x (a)* y matched "xy".
    if (matchStart >= 0) {
        if (matchStart != matchLimit) {
            text.copy(matchStart, matchLimit, dest);
            outLen = matchLimit - matchStart;
        }
    }
    
    text.handleReplaceBetween(start, limit, EMPTY); // delete original text
    
    return outLen;
}

/**
 * UnicodeReplacer API
 */
UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
                                                UBool escapeUnprintable) const {
    // assert(segmentNumber > 0);
    rule.truncate(0);
    rule.append((UChar)0x0024 /*$*/);
    ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);
    return rule;
}

/**
 * Remove any match info.  This must be called before performing a
 * set of matches with this segment.
 */
 void StringMatcher::resetMatch() {
    matchStart = matchLimit = -1;
}

/**
 * Union the set of all characters that may output by this object
 * into the given set.
 * @param toUnionTo the set into which to union the output characters
 */
void StringMatcher::addReplacementSetTo(UnicodeSet& toUnionTo) const {
    // The output of this replacer varies; it is the source text between
    // matchStart and matchLimit.  Since this varies depending on the
    // input text, we can't compute it here.  We can either do nothing
    // or we can add ALL characters to the set.  It's probably more useful
    // to do nothing.
}

/**
 * Implement UnicodeFunctor
 */
void StringMatcher::setData(const TransliterationRuleData* d) {
    data = d;
    int32_t i = 0;
    while (i<pattern.length()) {
        UChar32 c = pattern.char32At(i);
        UnicodeFunctor* f = data->lookup(c);
        if (f != NULL) {
            f->setData(data);
        }
        i += UTF_CHAR_LENGTH(c);
    }    
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

//eof
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`/*`
			`* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 07/23/01 aliu Creation.`
			`**********************************************************************`
			`*/`

ICU-2248 modularize icu, allow parts to not be built X-SVN-Rev: 9900 2002-09-20 01:54:48 +00:00			`#include "unicode/utypes.h"`

			`#if !UCONFIG_NO_TRANSLITERATION`

ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`#include "strmatch.h"`
			`#include "rbt_data.h"`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`#include "util.h"`
ICU-1434 initial implementation of source/target set API X-SVN-Rev: 8971 2002-06-28 21:13:54 +00:00			`#include "unicode/uniset.h"`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00
ICU-1264 added namspace support where possible. X-SVN-Rev: 6124 2001-10-08 23:26:58 +00:00			`U_NAMESPACE_BEGIN`

ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`const UChar EMPTY[] = { 0 }; // empty string: ""`

ICU-1962 change UObject: RTTI pure virtual, remove other boilerplate for now X-SVN-Rev: 8977 2002-06-29 00:04:16 +00:00			`const char StringMatcher::fgClassID=0;`

ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`StringMatcher::StringMatcher(const UnicodeString& theString,`
			`int32_t start,`
			`int32_t limit,`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`int32_t segmentNum,`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`const TransliterationRuleData& theData) :`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`data(&theData),`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`segmentNumber(segmentNum),`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`matchStart(-1),`
			`matchLimit(-1)`
ICU-900 Fixed some compiler warnings. X-SVN-Rev: 6136 2001-10-09 22:21:01 +00:00			`{`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`theString.extractBetween(start, limit, pattern);`
			`}`

			`StringMatcher::StringMatcher(const StringMatcher& o) :`
ICU-900 Fixed some compiler warnings. X-SVN-Rev: 6136 2001-10-09 22:21:01 +00:00			`UnicodeMatcher(o),`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`pattern(o.pattern),`
ICU-900 Fixed some compiler warnings. X-SVN-Rev: 6136 2001-10-09 22:21:01 +00:00			`data(o.data),`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`segmentNumber(o.segmentNumber),`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`matchStart(o.matchStart),`
ICU-1533 fix typo X-SVN-Rev: 7004 2001-11-20 00:35:22 +00:00			`matchLimit(o.matchLimit)`
ICU-900 Fixed some compiler warnings. X-SVN-Rev: 6136 2001-10-09 22:21:01 +00:00			`{`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`}`

			`/**`
			`* Destructor`
			`*/`
			`StringMatcher::~StringMatcher() {`
			`}`

			`/**`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`* Implement UnicodeFunctor`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`*/`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`UnicodeFunctor* StringMatcher::clone() const {`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`return new StringMatcher(*this);`
			`}`

ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`/**`
			`* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer`
			`* and return the pointer.`
			`*/`
			`UnicodeMatcher* StringMatcher::toMatcher() const {`
			`return (UnicodeMatcher*) this;`
			`}`

			`/**`
			`* UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer`
			`* and return the pointer.`
			`*/`
			`UnicodeReplacer* StringMatcher::toReplacer() const {`
			`return (UnicodeReplacer*) this;`
			`}`

ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UMatchDegree StringMatcher::matches(const Replaceable& text,`
			`int32_t& offset,`
			`int32_t limit,`
ICU-1406 make UnicodeMatcher::matches non-const X-SVN-Rev: 6503 2001-10-30 23:55:09 +00:00			`UBool incremental) {`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`int32_t i;`
			`int32_t cursor = offset;`
			`if (limit < cursor) {`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`// Match in the reverse direction`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`for (i=pattern.length()-1; i>=0; --i) {`
			`UChar keyChar = pattern.charAt(i);`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`UnicodeMatcher* subm = data->lookupMatcher(keyChar);`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`if (subm == 0) {`
ICU-1591 clean up TransliterationRule X-SVN-Rev: 7342 2001-12-11 17:45:13 +00:00			`if (cursor > limit &&`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`keyChar == text.charAt(cursor)) {`
			`--cursor;`
			`} else {`
			`return U_MISMATCH;`
			`}`
			`} else {`
			`UMatchDegree m =`
			`subm->matches(text, cursor, limit, incremental);`
			`if (m != U_MATCH) {`
			`return m;`
			`}`
			`}`
			`}`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`// Record the match position, but adjust for a normal`
			`// forward start, limit, and only if a prior match does not`
			`// exist -- we want the rightmost match.`
			`if (matchStart < 0) {`
ICU-1406 make UnicodeMatcher::matches non-const X-SVN-Rev: 6503 2001-10-30 23:55:09 +00:00			`matchStart = cursor+1;`
			`matchLimit = offset+1;`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`}`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`} else {`
			`for (i=0; i<pattern.length(); ++i) {`
			`if (incremental && cursor == limit) {`
			`// We've reached the context limit without a mismatch and`
			`// without completing our match.`
			`return U_PARTIAL_MATCH;`
			`}`
			`UChar keyChar = pattern.charAt(i);`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`UnicodeMatcher* subm = data->lookupMatcher(keyChar);`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`if (subm == 0) {`
			`// Don't need the cursor < limit check if`
			`// incremental is TRUE (because it's done above); do need`
			`// it otherwise.`
			`if (cursor < limit &&`
			`keyChar == text.charAt(cursor)) {`
			`++cursor;`
			`} else {`
			`return U_MISMATCH;`
			`}`
			`} else {`
			`UMatchDegree m =`
			`subm->matches(text, cursor, limit, incremental);`
			`if (m != U_MATCH) {`
			`return m;`
			`}`
			`}`
			`}`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`// Record the match position`
ICU-1406 make UnicodeMatcher::matches non-const X-SVN-Rev: 6503 2001-10-30 23:55:09 +00:00			`matchStart = offset;`
			`matchLimit = cursor;`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`}`

			`offset = cursor;`
			`return U_MATCH;`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UnicodeString& StringMatcher::toPattern(UnicodeString& result,`
ICU-1733 Integrate some of the easier (and non-redundent) changes from Lotus. X-SVN-Rev: 7802 2002-02-28 01:42:40 +00:00			`UBool escapeUnprintable) const`
			`{`
			`result.truncate(0);`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`UnicodeString str, quoteBuf;`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`if (segmentNumber > 0) {`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`result.append((UChar)40); /(/`
			`}`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`for (int32_t i=0; i<pattern.length(); ++i) {`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`UChar keyChar = pattern.charAt(i);`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`const UnicodeMatcher* m = data->lookupMatcher(keyChar);`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`if (m == 0) {`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`} else {`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`TRUE, escapeUnprintable, quoteBuf);`
			`}`
			`}`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`if (segmentNumber > 0) {`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`result.append((UChar)41); /)/`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`}`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`// Flush quoteBuf out to result`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`ICU_Utility::appendToRule(result, -1,`
			`TRUE, escapeUnprintable, quoteBuf);`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`return result;`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UBool StringMatcher::matchesIndexValue(uint8_t v) const {`
			`if (pattern.length() == 0) {`
			`return TRUE;`
			`}`
			`UChar32 c = pattern.char32At(0);`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`const UnicodeMatcher *m = data->lookupMatcher(c);`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);`
			`}`

ICU-1434 initial implementation of source/target set API X-SVN-Rev: 8971 2002-06-28 21:13:54 +00:00			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {`
			`UChar32 ch;`
			`for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {`
			`ch = pattern.char32At(i);`
			`const UnicodeMatcher* matcher = data->lookupMatcher(ch);`
			`if (matcher == NULL) {`
			`toUnionTo.add(ch);`
			`} else {`
			`matcher->addMatchSetTo(toUnionTo);`
			`}`
			`}`
			`}`

ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`/**`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`* UnicodeReplacer API`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`*/`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`int32_t StringMatcher::replace(Replaceable& text,`
			`int32_t start,`
			`int32_t limit,`
			`int32_t& cursor) {`

			`int32_t outLen = 0;`

			`// Copy segment with out-of-band data`
			`int32_t dest = limit;`
			`// If there was no match, that means that a quantifier`
			`// matched zero-length. E.g., x (a)* y matched "xy".`
			`if (matchStart >= 0) {`
			`if (matchStart != matchLimit) {`
			`text.copy(matchStart, matchLimit, dest);`
			`outLen = matchLimit - matchStart;`
			`}`
			`}`

			`text.handleReplaceBetween(start, limit, EMPTY); // delete original text`

			`return outLen;`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`}`

			`/**`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`* UnicodeReplacer API`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`*/`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,`
			`UBool escapeUnprintable) const {`
			`// assert(segmentNumber > 0);`
			`rule.truncate(0);`
			`rule.append((UChar)0x0024 /$/);`
			`ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);`
			`return rule;`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`}`

			`/**`
ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`* Remove any match info. This must be called before performing a`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`* set of matches with this segment.`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`*/`
ICU-1234 make output side of RBTs object-oriented; rewrite ID parsers and modularize them; implement &Any-Lower() support X-SVN-Rev: 7582 2002-02-07 01:07:55 +00:00			`void StringMatcher::resetMatch() {`
			`matchStart = matchLimit = -1;`
ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6493 2001-10-30 18:08:53 +00:00			`}`

ICU-1434 initial implementation of source/target set API X-SVN-Rev: 8971 2002-06-28 21:13:54 +00:00			`/**`
			`* Union the set of all characters that may output by this object`
			`* into the given set.`
			`* @param toUnionTo the set into which to union the output characters`
			`*/`
			`void StringMatcher::addReplacementSetTo(UnicodeSet& toUnionTo) const {`
			`// The output of this replacer varies; it is the source text between`
			`// matchStart and matchLimit. Since this varies depending on the`
			`// input text, we can't compute it here. We can either do nothing`
			`// or we can add ALL characters to the set. It's probably more useful`
			`// to do nothing.`
			`}`

ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 2002-03-20 00:42:02 +00:00			`/**`
			`* Implement UnicodeFunctor`
			`*/`
			`void StringMatcher::setData(const TransliterationRuleData* d) {`
			`data = d;`
			`int32_t i = 0;`
			`while (i<pattern.length()) {`
			`UChar32 c = pattern.char32At(i);`
			`UnicodeFunctor* f = data->lookup(c);`
			`if (f != NULL) {`
			`f->setData(data);`
			`}`
			`i += UTF_CHAR_LENGTH(c);`
			`}`
			`}`

ICU-1264 added namspace support where possible. X-SVN-Rev: 6124 2001-10-08 23:26:58 +00:00			`U_NAMESPACE_END`

ICU-2248 modularize icu, allow parts to not be built X-SVN-Rev: 9900 2002-09-20 01:54:48 +00:00			`#endif /* #if !UCONFIG_NO_TRANSLITERATION */`

ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`//eof`