scuffed-code/icu4c/source/i18n/rbt.cpp

/*
**********************************************************************
*   Copyright (C) 1999, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#include "unicode/rbt.h"
#include "rbt_pars.h"
#include "rbt_data.h"
#include "rbt_rule.h"
#include "unicode/rep.h"

void RuleBasedTransliterator::_construct(const UnicodeString& rules,
                                         Direction direction,
                                         UErrorCode& status) {
    data = 0;
    isDataOwned = TRUE;
    if (U_SUCCESS(status)) {
        data = TransliterationRuleParser::parse(rules, direction);
        if (data == 0) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
    }
}

RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& ID,
                                 const TransliterationRuleData* theData,
                                 UnicodeFilter* adoptedFilter) :
    Transliterator(ID, adoptedFilter),
    data((TransliterationRuleData*)theData), // cast away const
    isDataOwned(FALSE) {}

/**
 * Copy constructor.  Since the data object is immutable, we can share
 * it with other objects -- no need to clone it.
 */
RuleBasedTransliterator::RuleBasedTransliterator(
        const RuleBasedTransliterator& other) :
    Transliterator(other), data(other.data) {
    // TODO: Finish this -- implement with correct data ownership handling
}

/**
 * Destructor.  We do NOT own the data object, so we do not delete it.
 */
RuleBasedTransliterator::~RuleBasedTransliterator() {
    if (isDataOwned) {
        delete data;
    }
}

Transliterator* // Covariant return NOT ALLOWED (for portability)
RuleBasedTransliterator::clone(void) const {
    return new RuleBasedTransliterator(*this);
}

/**
 * Transliterates a segment of a string.  <code>Transliterator</code> API.
 * @param text the string to be transliterated
 * @param start the beginning index, inclusive; <code>0 <= start
 * <= limit</code>.
 * @param limit the ending index, exclusive; <code>start <= limit
 * <= text.length()</code>.
 * @param result buffer to receive the transliterated text; previous
 * contents are discarded
 */
void RuleBasedTransliterator::transliterate(const UnicodeString& text,
                                            int32_t start, int32_t limit,
                                            UnicodeString& result) const {
    /* In the following loop there is a virtual buffer consisting of the
     * text transliterated so far followed by the untransliterated text.  There is
     * also a cursor, which may be in the already transliterated buffer or just
     * before the untransliterated text.
     *
     * Example: rules 1. ab>x|y
     *                2. yc>z
     *
     * []|eabcd  start - no match, copy e to tranlated buffer
     * [e]|abcd  match rule 1 - copy output & adjust cursor
     * [ex|y]cd  match rule 2 - copy output & adjust cursor
     * [exz]|d   no match, copy d to transliterated buffer
     * [exzd]|   done
     *
     * cursor: an index into the virtual buffer, 0..result.length()-1.
     * Matches take place at the cursor.  If there is no match, the cursor
     * is advanced, and one character is moved from the source text to the
     * result buffer.
     *         
     * start, limit: these designate the substring of the source text which
     * has not been processed yet.  The range of offsets is start..limit-1.
     * At any moment the virtual buffer consists of result +
     * text.substring(start, limit).
     */
    int32_t cursor = 0;
    result.remove();
    while (start < limit || cursor < result.length()) {
        TransliterationRule* r = data->ruleSet.findMatch(text, start, limit,
                                                         result,
                                                         cursor,
                                                         *data,
                                                         getFilter());
        if (r == 0) {
            if (cursor == result.length()) {
                result.append(text.charAt(start++));
            }
            ++cursor;
        } else {
            // At this point we have a match of one or more
            // characters.  The characters cover the range [cursor,
            // cursor + r->getKeyLength()) - a half-open interval.
            // The index values refer to a virtual buffer with result
            // holding [0, result.length()) and text holding
            // [result.length(),...).

            // First, figure out the range of result being replaced.
            int32_t rfirst = cursor;
            int32_t rlimit = uprv_min(result.length(),
                                     cursor + r->getKeyLength());

            // resultPad is length of result to right of cursor; >= 0
            int32_t resultPad = result.length() - cursor;

            if (r->getKeyLength() > resultPad) {
                start += r->getKeyLength() - resultPad;
            }
            
            result.replaceBetween(rfirst, rlimit,
                                  r->getOutput());

            cursor += r->getCursorPos();
        }
    }
}

/**
 * Transliterates a segment of a string.  <code>Transliterator</code> API.
 * @param text the string to be transliterated
 * @param start the beginning index, inclusive; <code>0 <= start
 * <= limit</code>.
 * @param limit the ending index, exclusive; <code>start <= limit
 * <= text.length()</code>.
 * @return The new limit index
 */
int32_t RuleBasedTransliterator::transliterate(Replaceable& text,
                                               int32_t start,
                                               int32_t limit) const {
    /* When using Replaceable, the algorithm is simpler, since we don't have
     * two separate buffers.  We keep start and limit fixed the entire time,
     * relative to the text -- limit may move numerically if text is
     * inserted or removed.  The cursor moves from start to limit, with
     * replacements happening under it.
     *
     * Example: rules 1. ab>x|y
     *                2. yc>z
     *
     * |eabcd   start - no match, advance cursor
     * e|abcd   match rule 1 - change text & adjust cursor
     * ex|ycd   match rule 2 - change text & adjust cursor
     * exz|d    no match, advance cursor
     * exzd|    done
     */
    int32_t cursor = start;
    while (cursor < limit) {
        TransliterationRule* r =
            data->ruleSet.findMatch(text, start, limit,
                                    cursor, *data,
                                    getFilter());
        if (r == 0) {
            ++cursor;
        } else {
            text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),
                                      r->getOutput());
            limit += r->getOutput().length() - r->getKeyLength();
            cursor += r->getCursorPos();
        }
    }
    return limit;
}

/**
 * Implements {@link Transliterator#handleKeyboardTransliterate}.
 */
void
RuleBasedTransliterator::handleKeyboardTransliterate(Replaceable& text,
                                                     int32_t index[3]) const {
    int32_t start = index[START];
    int32_t limit = index[LIMIT];
    int32_t cursor = index[CURSOR];

    bool_t isPartial;

    while (cursor < limit) {
        TransliterationRule* r = data->ruleSet.findIncrementalMatch(
                text, start, limit, cursor,
                *data, isPartial,
                getFilter());
        /* If we match a rule then apply it by replacing the key
         * with the rule output and repositioning the cursor
         * appropriately.  If we get a partial match, then we
         * can't do anything without more text; return with the
         * cursor at the current position.  If we get null, then
         * there is no match at this position, and we can advance
         * the cursor.
         */
        if (r == 0) {
            if (isPartial) {
                break;
            } else {
                ++cursor;
            }
        } else {
            text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),
                                      r->getOutput());
            limit += r->getOutput().length() - r->getKeyLength();
            cursor += r->getCursorPos();
        }
    }

    index[LIMIT] = limit;
    index[CURSOR] = cursor;
}

/**
 * Returns the length of the longest context required by this transliterator.
 * This is <em>preceding</em> context.
 * @return Maximum number of preceding context characters this
 * transliterator needs to examine
 */
int32_t RuleBasedTransliterator::getMaximumContextLength(void) const {
    return data->ruleSet.getMaximumContextLength();
}
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/*`
			`**********************************************************************`
			`* Copyright (C) 1999, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 11/17/99 aliu Creation.`
			`**********************************************************************`
			`*/`
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 473 1999-12-28 23:57:50 +00:00			`#include "unicode/rbt.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`#include "rbt_pars.h"`
			`#include "rbt_data.h"`
			`#include "rbt_rule.h"`
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 473 1999-12-28 23:57:50 +00:00			`#include "unicode/rep.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`void RuleBasedTransliterator::_construct(const UnicodeString& rules,`
			`Direction direction,`
			`UErrorCode& status) {`
			`data = 0;`
			`isDataOwned = TRUE;`
			`if (U_SUCCESS(status)) {`
			`data = TransliterationRuleParser::parse(rules, direction);`
			`if (data == 0) {`
			`status = U_ILLEGAL_ARGUMENT_ERROR;`
			`}`
			`}`
			`}`

			`RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& ID,`
			`const TransliterationRuleData* theData,`
			`UnicodeFilter* adoptedFilter) :`
			`Transliterator(ID, adoptedFilter),`
ICU-114 Fix compile errors X-SVN-Rev: 196 1999-11-20 01:04:34 +00:00			`data((TransliterationRuleData*)theData), // cast away const`
			`isDataOwned(FALSE) {}`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Copy constructor. Since the data object is immutable, we can share`
			`* it with other objects -- no need to clone it.`
			`*/`
			`RuleBasedTransliterator::RuleBasedTransliterator(`
			`const RuleBasedTransliterator& other) :`
ICU-114 Fix compile errors X-SVN-Rev: 196 1999-11-20 01:04:34 +00:00			`Transliterator(other), data(other.data) {`
			`// TODO: Finish this -- implement with correct data ownership handling`
			`}`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Destructor. We do NOT own the data object, so we do not delete it.`
			`*/`
ICU-114 Fix compile errors X-SVN-Rev: 196 1999-11-20 01:04:34 +00:00			`RuleBasedTransliterator::~RuleBasedTransliterator() {`
			`if (isDataOwned) {`
			`delete data;`
			`}`
			`}`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`Transliterator* // Covariant return NOT ALLOWED (for portability)`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`RuleBasedTransliterator::clone(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return new RuleBasedTransliterator(*this);`
			`}`

			`/**`
			`* Transliterates a segment of a string. <code>Transliterator</code> API.`
			`* @param text the string to be transliterated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @param result buffer to receive the transliterated text; previous`
			`* contents are discarded`
			`*/`
			`void RuleBasedTransliterator::transliterate(const UnicodeString& text,`
			`int32_t start, int32_t limit,`
			`UnicodeString& result) const {`
			`/* In the following loop there is a virtual buffer consisting of the`
			`* text transliterated so far followed by the untransliterated text. There is`
			`* also a cursor, which may be in the already transliterated buffer or just`
			`* before the untransliterated text.`
			`*`
			`* Example: rules 1. ab>x\|y`
			`* 2. yc>z`
			`*`
			`* []\|eabcd start - no match, copy e to tranlated buffer`
			`* [e]\|abcd match rule 1 - copy output & adjust cursor`
			`* [ex\|y]cd match rule 2 - copy output & adjust cursor`
			`* [exz]\|d no match, copy d to transliterated buffer`
			`* [exzd]\| done`
			`*`
			`* cursor: an index into the virtual buffer, 0..result.length()-1.`
			`* Matches take place at the cursor. If there is no match, the cursor`
			`* is advanced, and one character is moved from the source text to the`
			`* result buffer.`
			`*`
			`* start, limit: these designate the substring of the source text which`
			`* has not been processed yet. The range of offsets is start..limit-1.`
			`* At any moment the virtual buffer consists of result +`
			`* text.substring(start, limit).`
			`*/`
			`int32_t cursor = 0;`
			`result.remove();`
			`while (start < limit \|\| cursor < result.length()) {`
			`TransliterationRule* r = data->ruleSet.findMatch(text, start, limit,`
			`result,`
			`cursor,`
			`*data,`
			`getFilter());`
			`if (r == 0) {`
			`if (cursor == result.length()) {`
			`result.append(text.charAt(start++));`
			`}`
			`++cursor;`
			`} else {`
			`// At this point we have a match of one or more`
			`// characters. The characters cover the range [cursor,`
			`// cursor + r->getKeyLength()) - a half-open interval.`
			`// The index values refer to a virtual buffer with result`
			`// holding [0, result.length()) and text holding`
			`// [result.length(),...).`

			`// First, figure out the range of result being replaced.`
			`int32_t rfirst = cursor;`
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 473 1999-12-28 23:57:50 +00:00			`int32_t rlimit = uprv_min(result.length(),`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`cursor + r->getKeyLength());`

			`// resultPad is length of result to right of cursor; >= 0`
			`int32_t resultPad = result.length() - cursor;`

			`if (r->getKeyLength() > resultPad) {`
			`start += r->getKeyLength() - resultPad;`
			`}`

			`result.replaceBetween(rfirst, rlimit,`
			`r->getOutput());`

			`cursor += r->getCursorPos();`
			`}`
			`}`
			`}`

			`/**`
			`* Transliterates a segment of a string. <code>Transliterator</code> API.`
			`* @param text the string to be transliterated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @return The new limit index`
			`*/`
			`int32_t RuleBasedTransliterator::transliterate(Replaceable& text,`
			`int32_t start,`
			`int32_t limit) const {`
			`/* When using Replaceable, the algorithm is simpler, since we don't have`
			`* two separate buffers. We keep start and limit fixed the entire time,`
			`* relative to the text -- limit may move numerically if text is`
			`* inserted or removed. The cursor moves from start to limit, with`
			`* replacements happening under it.`
			`*`
			`* Example: rules 1. ab>x\|y`
			`* 2. yc>z`
			`*`
			`* \|eabcd start - no match, advance cursor`
			`* e\|abcd match rule 1 - change text & adjust cursor`
			`* ex\|ycd match rule 2 - change text & adjust cursor`
			`* exz\|d no match, advance cursor`
			`* exzd\| done`
			`*/`
			`int32_t cursor = start;`
			`while (cursor < limit) {`
			`TransliterationRule* r =`
			`data->ruleSet.findMatch(text, start, limit,`
			`cursor, *data,`
			`getFilter());`
			`if (r == 0) {`
			`++cursor;`
			`} else {`
			`text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),`
			`r->getOutput());`
			`limit += r->getOutput().length() - r->getKeyLength();`
			`cursor += r->getCursorPos();`
			`}`
			`}`
			`return limit;`
			`}`

			`/**`
			`* Implements {@link Transliterator#handleKeyboardTransliterate}.`
			`*/`
			`void`
			`RuleBasedTransliterator::handleKeyboardTransliterate(Replaceable& text,`
			`int32_t index[3]) const {`
			`int32_t start = index[START];`
			`int32_t limit = index[LIMIT];`
			`int32_t cursor = index[CURSOR];`

			`bool_t isPartial;`

			`while (cursor < limit) {`
			`TransliterationRule* r = data->ruleSet.findIncrementalMatch(`
			`text, start, limit, cursor,`
			`*data, isPartial,`
			`getFilter());`
			`/* If we match a rule then apply it by replacing the key`
			`* with the rule output and repositioning the cursor`
			`* appropriately. If we get a partial match, then we`
			`* can't do anything without more text; return with the`
			`* cursor at the current position. If we get null, then`
			`* there is no match at this position, and we can advance`
			`* the cursor.`
			`*/`
			`if (r == 0) {`
			`if (isPartial) {`
			`break;`
			`} else {`
			`++cursor;`
			`}`
			`} else {`
			`text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),`
			`r->getOutput());`
			`limit += r->getOutput().length() - r->getKeyLength();`
			`cursor += r->getCursorPos();`
			`}`
			`}`

			`index[LIMIT] = limit;`
			`index[CURSOR] = cursor;`
			`}`

			`/**`
			`* Returns the length of the longest context required by this transliterator.`
			`* This is <em>preceding</em> context.`
			`* @return Maximum number of preceding context characters this`
			`* transliterator needs to examine`
			`*/`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`int32_t RuleBasedTransliterator::getMaximumContextLength(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return data->ruleSet.getMaximumContextLength();`
			`}`