scuffed-code/icu4c/source/i18n/strmatch.cpp

/*
* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   07/23/01    aliu        Creation.
**********************************************************************
*/

#include "strmatch.h"
#include "rbt_data.h"
#include "rbt_rule.h"

StringMatcher::StringMatcher(const UnicodeString& theString,
                             int32_t start,
                             int32_t limit,
                             UBool isSeg,
                             const TransliterationRuleData& theData) :
    data(theData),
    isSegment(isSeg) {
    theString.extractBetween(start, limit, pattern);
}

StringMatcher::StringMatcher(const StringMatcher& o) :
    pattern(o.pattern),
    isSegment(o.isSegment),
    data(o.data) {
}

/**
 * Destructor
 */
StringMatcher::~StringMatcher() {
}

/**
 * Implement UnicodeMatcher
 */
UnicodeMatcher* StringMatcher::clone() const {
    return new StringMatcher(*this);
}

/**
 * Implement UnicodeMatcher
 */
UMatchDegree StringMatcher::matches(const Replaceable& text,
                                    int32_t& offset,
                                    int32_t limit,
                                    UBool incremental) const {
    int32_t i;
    int32_t cursor = offset;
    if (limit < cursor) {
        for (i=pattern.length()-1; i>=0; --i) {
            UChar keyChar = pattern.charAt(i);
            const UnicodeMatcher* subm = data.lookup(keyChar);
            if (subm == 0) {
                if (cursor >= limit &&
                    keyChar == text.charAt(cursor)) {
                    --cursor;
                } else {
                    return U_MISMATCH;
                }
            } else {
                UMatchDegree m =
                    subm->matches(text, cursor, limit, incremental);
                if (m != U_MATCH) {
                    return m;
                }
            }
        }
    } else {
        for (i=0; i<pattern.length(); ++i) {
            if (incremental && cursor == limit) {
                // We've reached the context limit without a mismatch and
                // without completing our match.
                return U_PARTIAL_MATCH;
            }
            UChar keyChar = pattern.charAt(i);
            const UnicodeMatcher* subm = data.lookup(keyChar);
            if (subm == 0) {
                // Don't need the cursor < limit check if
                // incremental is TRUE (because it's done above); do need
                // it otherwise.
                if (cursor < limit &&
                    keyChar == text.charAt(cursor)) {
                    ++cursor;
                } else {
                    return U_MISMATCH;
                }
            } else {
                UMatchDegree m =
                    subm->matches(text, cursor, limit, incremental);
                if (m != U_MATCH) {
                    return m;
                }
            }
        }
    }

    offset = cursor;
    return U_MATCH;
}

/**
 * Implement UnicodeMatcher
 */
UnicodeString& StringMatcher::toPattern(UnicodeString& result,
                                        UBool escapeUnprintable) const {
    UnicodeString str, quoteBuf;
    if (isSegment) {
        result.append((UChar)40); /*(*/
    }
    for (int32_t i=0; i<pattern.length(); ++i) {
        UChar keyChar = pattern.charAt(i);
        const UnicodeMatcher* m = data.lookup(keyChar);
        if (m == 0) {
            TransliterationRule::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
        } else {
            TransliterationRule::appendToRule(result, m->toPattern(str, escapeUnprintable),
                         TRUE, escapeUnprintable, quoteBuf);
        }
    }
    if (isSegment) {
        result.append((UChar)41); /*)*/
    }
    // Flush quoteBuf out to result
    TransliterationRule::appendToRule(result, (UChar32)(isSegment?41/*)*/:-1),
                                          TRUE, escapeUnprintable, quoteBuf);
    return result;
}

/**
 * Implement UnicodeMatcher
 */
UBool StringMatcher::matchesIndexValue(uint8_t v) const {
    if (pattern.length() == 0) {
        return TRUE;
    }
    UChar32 c = pattern.char32At(0);
    const UnicodeMatcher *m = data.lookup(c);
    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
}

//eof
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`/*`
			`* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 07/23/01 aliu Creation.`
			`**********************************************************************`
			`*/`

			`#include "strmatch.h"`
			`#include "rbt_data.h"`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`#include "rbt_rule.h"`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00
			`StringMatcher::StringMatcher(const UnicodeString& theString,`
			`int32_t start,`
			`int32_t limit,`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`UBool isSeg,`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`const TransliterationRuleData& theData) :`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`data(theData),`
			`isSegment(isSeg) {`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`theString.extractBetween(start, limit, pattern);`
			`}`

			`StringMatcher::StringMatcher(const StringMatcher& o) :`
			`pattern(o.pattern),`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`isSegment(o.isSegment),`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`data(o.data) {`
			`}`

			`/**`
			`* Destructor`
			`*/`
			`StringMatcher::~StringMatcher() {`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UnicodeMatcher* StringMatcher::clone() const {`
			`return new StringMatcher(*this);`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UMatchDegree StringMatcher::matches(const Replaceable& text,`
			`int32_t& offset,`
			`int32_t limit,`
			`UBool incremental) const {`
			`int32_t i;`
			`int32_t cursor = offset;`
			`if (limit < cursor) {`
			`for (i=pattern.length()-1; i>=0; --i) {`
			`UChar keyChar = pattern.charAt(i);`
			`const UnicodeMatcher* subm = data.lookup(keyChar);`
			`if (subm == 0) {`
			`if (cursor >= limit &&`
			`keyChar == text.charAt(cursor)) {`
			`--cursor;`
			`} else {`
			`return U_MISMATCH;`
			`}`
			`} else {`
			`UMatchDegree m =`
			`subm->matches(text, cursor, limit, incremental);`
			`if (m != U_MATCH) {`
			`return m;`
			`}`
			`}`
			`}`
			`} else {`
			`for (i=0; i<pattern.length(); ++i) {`
			`if (incremental && cursor == limit) {`
			`// We've reached the context limit without a mismatch and`
			`// without completing our match.`
			`return U_PARTIAL_MATCH;`
			`}`
			`UChar keyChar = pattern.charAt(i);`
			`const UnicodeMatcher* subm = data.lookup(keyChar);`
			`if (subm == 0) {`
			`// Don't need the cursor < limit check if`
			`// incremental is TRUE (because it's done above); do need`
			`// it otherwise.`
			`if (cursor < limit &&`
			`keyChar == text.charAt(cursor)) {`
			`++cursor;`
			`} else {`
			`return U_MISMATCH;`
			`}`
			`} else {`
			`UMatchDegree m =`
			`subm->matches(text, cursor, limit, incremental);`
			`if (m != U_MATCH) {`
			`return m;`
			`}`
			`}`
			`}`
			`}`

			`offset = cursor;`
			`return U_MATCH;`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UnicodeString& StringMatcher::toPattern(UnicodeString& result,`
			`UBool escapeUnprintable) const {`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`UnicodeString str, quoteBuf;`
			`if (isSegment) {`
			`result.append((UChar)40); /(/`
			`}`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`for (int32_t i=0; i<pattern.length(); ++i) {`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`UChar keyChar = pattern.charAt(i);`
			`const UnicodeMatcher* m = data.lookup(keyChar);`
			`if (m == 0) {`
			`TransliterationRule::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);`
			`} else {`
			`TransliterationRule::appendToRule(result, m->toPattern(str, escapeUnprintable),`
			`TRUE, escapeUnprintable, quoteBuf);`
			`}`
			`}`
			`if (isSegment) {`
			`result.append((UChar)41); /)/`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`}`
ICU-1076 implement toPattern X-SVN-Rev: 5379 2001-07-30 23:23:16 +00:00			`// Flush quoteBuf out to result`
			`TransliterationRule::appendToRule(result, (UChar32)(isSegment?41/)/:-1),`
			`TRUE, escapeUnprintable, quoteBuf);`
ICU-1076 initial limited support for Kleene star and plus operators X-SVN-Rev: 5359 2001-07-27 00:18:53 +00:00			`return result;`
			`}`

			`/**`
			`* Implement UnicodeMatcher`
			`*/`
			`UBool StringMatcher::matchesIndexValue(uint8_t v) const {`
			`if (pattern.length() == 0) {`
			`return TRUE;`
			`}`
			`UChar32 c = pattern.char32At(0);`
			`const UnicodeMatcher *m = data.lookup(c);`
			`return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);`
			`}`

			`//eof`