scuffed-code/icu4c/source/i18n/nortrans.cpp

/*
**********************************************************************
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   07/03/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/nortrans.h"

U_NAMESPACE_BEGIN

/**
 * System registration hook.
 */
void NormalizationTransliterator::registerIDs() {
    Transliterator::_registerFactory(UnicodeString("Any-NFC", ""),
                                     _create, integerToken(UNORM_NFC));
    Transliterator::_registerFactory(UnicodeString("Any-NFKC", ""),
                                     _create, integerToken(UNORM_NFKC));
    Transliterator::_registerFactory(UnicodeString("Any-NFD", ""),
                                     _create, integerToken(UNORM_NFD));
    Transliterator::_registerFactory(UnicodeString("Any-NFKD", ""),
                                     _create, integerToken(UNORM_NFKD));
}

/**
 * Factory methods
 */
Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
                                                     Token context) {
    return new NormalizationTransliterator(ID, (UNormalizationMode) context.integer, 0);
}

/**
 * Constructs a transliterator.
 */
NormalizationTransliterator::NormalizationTransliterator(
                                 const UnicodeString& id,
                                 UNormalizationMode mode, int32_t opt) :
    Transliterator(id, 0) {
    fMode = mode;
    options = opt;
}

/**
 * Destructor.
 */
NormalizationTransliterator::~NormalizationTransliterator() {
}

/**
 * Copy constructor.
 */
NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
Transliterator(o) {
    fMode = o.fMode;
    options = o.options;
}

/**
 * Assignment operator.
 */
NormalizationTransliterator& NormalizationTransliterator::operator=(const NormalizationTransliterator& o) {
    Transliterator::operator=(o);
    fMode = o.fMode;
    options = o.options;
    return *this;
}

/**
 * Transliterator API.
 */
Transliterator* NormalizationTransliterator::clone(void) const {
    return new NormalizationTransliterator(*this);
}

// TODO
// TODO
// TODO
// Get rid of this function and use the official Replaceable
// extractBetween() method, when possible
// TODO
// TODO
// TODO
static void _Replaceable_extractBetween(const Replaceable& text,
                                        int32_t start,
                                        int32_t limit,
                                        UChar* buffer) {
    while (start < limit) {
        *buffer++ = text.charAt(start++);
    }
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                      UBool isIncremental) const {
    int32_t start = offsets.start;
    int32_t limit = offsets.limit;

    // For the non-incremental case normalize right up to
    // offsets.limit.  In the incremental case, find the last base
    // character b, and pass everything from the start up to the
    // character before b to normalizer.
    if (isIncremental) {
        // Wrinkle: Jamo has a combining class of zero, but we
        // don't want to normalize individual Jamo one at a time
        // if we're composing incrementally.  If we are composing
        // in incremental mode then we collect up trailing jamo
        // and save them for next time.
        UBool doStandardBackup = TRUE;
        if (fMode == UNORM_NFC || fMode == UNORM_NFKC) {
            // As a minor optimization, if there are three or more
            // trailing jamo, we let the first three through --
            // these should be handled correctly.
            UChar c;
            while (limit > offsets.start &&
                   (c=text.charAt(limit-1)) >= 0x1100 &&
                   c < 0x1200) {
                --limit;
            }
            // Characters in [limit, offsets.limit) are jamo.
            // If we have at least 3 jamo, then allow them
            // to be transliterated.  If we have zero jamo,
            // then proceed as usual.
            if (limit < offsets.limit) {
                if ((offsets.limit - limit) >= 3) {
                    limit += 3;
                }
                doStandardBackup = FALSE;
            }
        }

        if (doStandardBackup) {
            --limit;
            while (limit > start &&
                   u_getCombiningClass(text.charAt(limit)) != 0) {
                --limit;
            }
        }
    }

    if (limit > start) {

        UChar staticChars[256];
        UChar* chars = staticChars;

        if ((limit - start) > 255) {
            // Allocate extra buffer space if needed
            chars = new UChar[limit-start+1];
            if (chars == NULL) {
                return;
            }
        }

        _Replaceable_extractBetween(text, start, limit, chars);

        UnicodeString input(FALSE, chars, limit-start); // readonly alias
        UnicodeString output;
        UErrorCode status = U_ZERO_ERROR;
        Normalizer::normalize(input, fMode, options, output, status);

        if (chars != staticChars) {
            delete[] chars;
        }

        text.handleReplaceBetween(start, limit, output);

        int32_t delta = output.length() - input.length();
        offsets.contextLimit += delta;
        offsets.limit += delta;
        offsets.start = limit + delta;
    }
}

U_NAMESPACE_END