scuffed-code/icu4c/source/i18n/nortrans.cpp

/*
**********************************************************************
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   07/03/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/utypes.h"
#include "unicode/nortrans.h"
#include "unormimp.h"

U_NAMESPACE_BEGIN

U_CDECL_BEGIN

/*
 * This is an implementation of a code unit (UChar) iterator
 * based on a Replaceable object.
 * It is used with the internal API for incremental normalization.
 *
 * The UCharIterator.context field holds a pointer to the Replaceable.
 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
 * and the iteration index.
 */

static int32_t U_CALLCONV
replaceableIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
    int32_t pos;

    switch(origin) {
    case UITERATOR_START:
        pos=iter->start+delta;
        break;
    case UITERATOR_CURRENT:
        pos=iter->index+delta;
        break;
    case UITERATOR_END:
        pos=iter->limit+delta;
        break;
    default:
        /* not a valid origin, no move */
        break;
    }

    if(pos<iter->start) {
        pos=iter->start;
    } else if(pos>iter->limit) {
        pos=iter->limit;
    }

    return iter->index=pos;
}

static UBool U_CALLCONV
replaceableIteratorHasNext(UCharIterator *iter) {
    return iter->index<iter->limit;
}

static UBool U_CALLCONV
replaceableIteratorHasPrevious(UCharIterator *iter) {
    return iter->index>iter->start;
}

static UChar U_CALLCONV
replaceableIteratorCurrent(UCharIterator *iter) {
    if(iter->index<iter->limit) {
        return ((Replaceable *)(iter->context))->charAt(iter->index);
    } else {
        return 0xffff;
    }
}

static UChar U_CALLCONV
replaceableIteratorNext(UCharIterator *iter) {
    if(iter->index<iter->limit) {
        return ((Replaceable *)(iter->context))->charAt(iter->index++);
    } else {
        return 0xffff;
    }
}

static UChar U_CALLCONV
replaceableIteratorPrevious(UCharIterator *iter) {
    if(iter->index>iter->start) {
        return ((Replaceable *)(iter->context))->charAt(--iter->index);
    } else {
        return 0xffff;
    }
}

static const UCharIterator replaceableIterator={
    0, 0, 0, 0, 0,
    replaceableIteratorMove,
    replaceableIteratorHasNext,
    replaceableIteratorHasPrevious,
    replaceableIteratorCurrent,
    replaceableIteratorNext,
    replaceableIteratorPrevious
};

U_CDECL_END

/**
 * System registration hook.
 */
void NormalizationTransliterator::registerIDs() {
    UErrorCode errorCode = U_ZERO_ERROR;
    if(!unorm_haveData(&errorCode)) {
        return;
    }

    Transliterator::_registerFactory(UnicodeString("Any-NFC", ""),
                                     _create, integerToken(UNORM_NFC));
    Transliterator::_registerFactory(UnicodeString("Any-NFKC", ""),
                                     _create, integerToken(UNORM_NFKC));
    Transliterator::_registerFactory(UnicodeString("Any-NFD", ""),
                                     _create, integerToken(UNORM_NFD));
    Transliterator::_registerFactory(UnicodeString("Any-NFKD", ""),
                                     _create, integerToken(UNORM_NFKD));
}

/**
 * Factory methods
 */
Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
                                                     Token context) {
    return new NormalizationTransliterator(ID, (UNormalizationMode) context.integer, 0);
}

/**
 * Constructs a transliterator.
 */
NormalizationTransliterator::NormalizationTransliterator(
                                 const UnicodeString& id,
                                 UNormalizationMode mode, int32_t opt) :
    Transliterator(id, 0) {
    fMode = mode;
    options = opt;
}

/**
 * Destructor.
 */
NormalizationTransliterator::~NormalizationTransliterator() {
}

/**
 * Copy constructor.
 */
NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
Transliterator(o) {
    fMode = o.fMode;
    options = o.options;
}

/**
 * Assignment operator.
 */
NormalizationTransliterator& NormalizationTransliterator::operator=(const NormalizationTransliterator& o) {
    Transliterator::operator=(o);
    fMode = o.fMode;
    options = o.options;
    return *this;
}

/**
 * Transliterator API.
 */
Transliterator* NormalizationTransliterator::clone(void) const {
    return new NormalizationTransliterator(*this);
}

// TODO
// TODO
// TODO
// Get rid of this function and use the official Replaceable
// extractBetween() method, when possible
// TODO
// TODO
// TODO
static void _Replaceable_extractBetween(const Replaceable& text,
                                        int32_t start,
                                        int32_t limit,
                                        UChar* buffer) {
    while (start < limit) {
        *buffer++ = text.charAt(start++);
    }
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                      UBool isIncremental) const {
    // start and limit of the input range
    int32_t start = offsets.start;
    int32_t limit = offsets.limit;
    int32_t length, delta;

    if(start >= limit) {
        return;
    }

    // a C code unit iterator, implemented around the Replaceable
    UCharIterator iter = replaceableIterator;
    iter.context = &text;
    // iter.length = text.length(); is not used

    // the output string and buffer pointer
    UnicodeString output;
    UChar *buffer;

    UErrorCode errorCode;

    /*
     * Normalize as short chunks at a time as possible even in
     * bulk mode, so that styled text is minimally disrupted.
     * In incremental mode, a chunk that ends with offsets.limit
     * must not be normalized.
     *
     * If it was known that the input text is not styled, then
     * a bulk mode normalization could look like this:
     *

    UChar staticChars[256];
    UnicodeString input;

    length = limit - start;
    input.setTo(staticChars, 0, sizeof(staticChars)/U_SIZEOF_UCHAR); // writable alias

    _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
    input.releaseBuffer(length);

    UErrorCode status = U_ZERO_ERROR;
    Normalizer::normalize(input, fMode, options, output, status);

    text.handleReplaceBetween(start, limit, output);

    int32_t delta = output.length() - length;
    offsets.contextLimit += delta;
    offsets.limit += delta;
    offsets.start = limit + delta;

     *
     */
    while(start < limit) {
        // set the iterator limits for the remaining input range
        // this is a moving target because of the replacements in the text object
        iter.start = iter.index = start;
        iter.limit = limit;

        // incrementally normalize a small chunk of the input
        buffer = output.getBuffer(-1);
        errorCode = U_ZERO_ERROR;
        length = unorm_nextNormalize(buffer, output.getCapacity(), &iter,
                                     fMode, FALSE, &errorCode);
        output.releaseBuffer(length);

        if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
            // use a larger output string buffer and do it again from the start
            iter.index = start;
            buffer = output.getBuffer(length);
            errorCode = U_ZERO_ERROR;
            length = unorm_nextNormalize(buffer, output.getCapacity(), &iter,
                                         fMode, FALSE, &errorCode);
            output.releaseBuffer(length);
        }

        if(U_FAILURE(errorCode)) {
            break;
        }

        limit = iter.index;
        if(isIncremental && limit == iter.limit) {
            // stop in incremental mode when we reach the input limit
            // in case there are additional characters that could change the
            // normalization result
            break;
        }

        // replace the input chunk with its normalized form
        text.handleReplaceBetween(start, limit, output);

        // update all necessary indexes accordingly
        delta = length - (limit - start);   // length change in the text object
        start = limit += delta;             // the next chunk starts where this one ends, with adjustment
        limit = offsets.limit += delta;     // set the iteration limit to the adjusted end of the input range
        offsets.contextLimit += delta;
    }

    offsets.start = start;
}

U_NAMESPACE_END