scuffed-code/icu4c/source/i18n/rbt.cpp
Alan Liu aaab115b90 ICU-990 add toRules to RBT
X-SVN-Rev: 4966
2001-06-12 17:35:03 +00:00

150 lines
5.3 KiB
C++

/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
**********************************************************************
*/
#include "unicode/rbt.h"
#include "rbt_pars.h"
#include "rbt_data.h"
#include "rbt_rule.h"
#include "unicode/rep.h"
char RuleBasedTransliterator::fgClassID = 0; // Value is irrelevant
void RuleBasedTransliterator::_construct(const UnicodeString& rules,
UTransDirection direction,
UErrorCode& status,
UParseError* parseError) {
data = 0;
isDataOwned = TRUE;
if (U_SUCCESS(status)) {
data = TransliterationRuleParser::parse(rules, direction, parseError);
if (data == 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
setMaximumContextLength(data->ruleSet.getMaximumContextLength());
}
}
}
RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
const TransliterationRuleData* theData,
UnicodeFilter* adoptedFilter) :
Transliterator(id, adoptedFilter),
data((TransliterationRuleData*)theData), // cast away const
isDataOwned(FALSE) {
setMaximumContextLength(data->ruleSet.getMaximumContextLength());
}
/**
* Copy constructor. Since the data object is immutable, we can share
* it with other objects -- no need to clone it.
*/
RuleBasedTransliterator::RuleBasedTransliterator(
const RuleBasedTransliterator& other) :
Transliterator(other), data(other.data),
isDataOwned(other.isDataOwned) {
// Only do a deep copy if this is non-owned data, that is,
// data that will be later deleted. System transliterators
// contain owned data.
if (isDataOwned) {
data = new TransliterationRuleData(*other.data);
}
}
/**
* Destructor. We do NOT own the data object, so we do not delete it.
*/
RuleBasedTransliterator::~RuleBasedTransliterator() {
if (isDataOwned) {
delete data;
}
}
Transliterator* // Covariant return NOT ALLOWED (for portability)
RuleBasedTransliterator::clone(void) const {
return new RuleBasedTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void
RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
UBool isIncremental) const {
/* We keep start and limit fixed the entire time,
* relative to the text -- limit may move numerically if text is
* inserted or removed. The cursor moves from start to limit, with
* replacements happening under it.
*
* Example: rules 1. ab>x|y
* 2. yc>z
*
* |eabcd start - no match, advance cursor
* e|abcd match rule 1 - change text & adjust cursor
* ex|ycd match rule 2 - change text & adjust cursor
* exz|d no match, advance cursor
* exzd| done
*/
/* A rule like
* a>b|a
* creates an infinite loop. To prevent that, we put an arbitrary
* limit on the number of iterations that we take, one that is
* high enough that any reasonable rules are ok, but low enough to
* prevent a server from hanging. The limit is 16 times the
* number of characters n, unless n is so large that 16n exceeds a
* uint32_t.
*/
uint32_t loopCount = 0;
uint32_t loopLimit = index.limit - index.start;
if (loopLimit >= 0x10000000) {
loopLimit = 0xFFFFFFFF;
} else {
loopLimit <<= 4;
}
UBool isPartial = FALSE;
while (index.start < index.limit && loopCount <= loopLimit) {
TransliterationRule* r = isIncremental ?
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
getFilter()) :
data->ruleSet.findMatch(text, index, *data,
getFilter());
/* If we match a rule then apply it by replacing the key
* with the rule output and repositioning the cursor
* appropriately. If we get a partial match, then we
* can't do anything without more text; return with the
* cursor at the current position. If we get null, then
* there is no match at this position, and we can advance
* the cursor.
*/
if (r == 0) {
if (isPartial) { // always FALSE unless isIncremental
break;
} else {
++index.start;
}
} else {
// Delegate replacement to TransliterationRule object
int32_t lenDelta = r->replace(text, index.start, *data);
index.limit += lenDelta;
index.contextLimit += lenDelta;
index.start += r->getCursorPos();
++loopCount;
}
}
}
UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
UBool escapeUnprintable) const {
return data->ruleSet.toRules(rulesSource, *data, escapeUnprintable);
}