/* * Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 07/23/01 aliu Creation. ********************************************************************** */ #include "strmatch.h" #include "rbt_data.h" #include "rbt_rule.h" U_NAMESPACE_BEGIN StringMatcher::StringMatcher(const UnicodeString& theString, int32_t start, int32_t limit, UBool isSeg, const TransliterationRuleData& theData) : data(theData), isSegment(isSeg), matchStart(-1), matchLimit(-1) { theString.extractBetween(start, limit, pattern); } StringMatcher::StringMatcher(const StringMatcher& o) : UnicodeMatcher(o), pattern(o.pattern), data(o.data), isSegment(o.isSegment), matchStart(o.matchStart), matchLimit(o.matchLimit) { } /** * Destructor */ StringMatcher::~StringMatcher() { } /** * Implement UnicodeMatcher */ UnicodeMatcher* StringMatcher::clone() const { return new StringMatcher(*this); } /** * Implement UnicodeMatcher */ UMatchDegree StringMatcher::matches(const Replaceable& text, int32_t& offset, int32_t limit, UBool incremental) { int32_t i; int32_t cursor = offset; if (limit < cursor) { // Match in the reverse direction for (i=pattern.length()-1; i>=0; --i) { UChar keyChar = pattern.charAt(i); UnicodeMatcher* subm = data.lookup(keyChar); if (subm == 0) { if (cursor > limit && keyChar == text.charAt(cursor)) { --cursor; } else { return U_MISMATCH; } } else { UMatchDegree m = subm->matches(text, cursor, limit, incremental); if (m != U_MATCH) { return m; } } } // Record the match position, but adjust for a normal // forward start, limit, and only if a prior match does not // exist -- we want the rightmost match. if (matchStart < 0) { matchStart = cursor+1; matchLimit = offset+1; } } else { for (i=0; imatches(text, cursor, limit, incremental); if (m != U_MATCH) { return m; } } } // Record the match position matchStart = offset; matchLimit = cursor; } offset = cursor; return U_MATCH; } /** * Implement UnicodeMatcher */ UnicodeString& StringMatcher::toPattern(UnicodeString& result, UBool escapeUnprintable) const { result.truncate(0); UnicodeString str, quoteBuf; if (isSegment) { result.append((UChar)40); /*(*/ } for (int32_t i=0; itoPattern(str, escapeUnprintable), TRUE, escapeUnprintable, quoteBuf); } } if (isSegment) { result.append((UChar)41); /*)*/ } // Flush quoteBuf out to result TransliterationRule::appendToRule(result, -1, TRUE, escapeUnprintable, quoteBuf); return result; } /** * Implement UnicodeMatcher */ UBool StringMatcher::matchesIndexValue(uint8_t v) const { if (pattern.length() == 0) { return TRUE; } UChar32 c = pattern.char32At(0); const UnicodeMatcher *m = data.lookup(c); return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v); } /** * Remove any match data. This must be called before performing a * set of matches with this segment. */ void StringMatcher::resetMatch() { matchStart = matchLimit = -1; } /** * Return the start offset, in the match text, of the rightmost * match. This method may get moved up into the UnicodeMatcher if * it turns out to be useful to generalize this. */ int32_t StringMatcher::getMatchStart() const { return matchStart; } /** * Return the limit offset, in the match text, of the rightmost * match. This method may get moved up into the UnicodeMatcher if * it turns out to be useful to generalize this. */ int32_t StringMatcher::getMatchLimit() const { return matchLimit; } U_NAMESPACE_END //eof