scuffed-code/icu4c/source/i18n/titletrn.cpp

/*
**********************************************************************
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   05/24/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/uchar.h"
#include "unicode/titletrn.h"

U_NAMESPACE_BEGIN

/**
 * ID for this transliterator.
 */
const char TitlecaseTransliterator::_ID[] = "Any-Title";

TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter) {
    // Need to look back 2 characters in the case of "can't"
    setMaximumContextLength(2);
}

/**
 * Destructor.
 */
TitlecaseTransliterator::~TitlecaseTransliterator() {}

/**
 * Copy constructor.
 */
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
    Transliterator(o) {}

/**
 * Assignment operator.
 */
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
                             const TitlecaseTransliterator& o) {
    Transliterator::operator=(o);
    return *this;
}

/**
 * Transliterator API.
 */
Transliterator* TitlecaseTransliterator::clone(void) const {
    return new TitlecaseTransliterator(*this);
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const {

    // NOTE: This method contains some special case code to handle
    // apostrophes between alpha characters.  We want to have
    // "can't" => "Can't" (not "Can'T").  This may be incorrect
    // for some locales, e.g., "l'arbre" => "L'Arbre" (?).
    // TODO: Revisit this.

    // Determine if there is a preceding letter character in the
    // left context (if there is any left context).
    UBool wasLastCharALetter = FALSE;
    if (offsets.start > offsets.contextStart) {
        UChar c = text.charAt(offsets.start - 1);
        // Handle the case "Can'|t", where the | marks the context
        // boundary.  We only handle a single apostrophe.
        if (c == 0x0027 /*'*/ && (offsets.start-2) >= offsets.contextStart) {
            c = text.charAt(offsets.start - 2);
        }
        wasLastCharALetter = u_isalpha(c);
    }

    // The buffer used to batch up changes to be made
    UnicodeString buffer;
    int32_t bufStart = 0;
    int32_t bufLimit = -1;

    int32_t start;
    for (start = offsets.start; start < offsets.limit; ++start) {
        // For each character, if the preceding character was a
        // non-letter, and this character is a letter, then apply
        // the titlecase transformation.  Otherwise apply the
        // lowercase transformation.
        UChar32 c = text.charAt(start);
        if (u_isalpha(c)) {
            UChar32 newChar;
            if (wasLastCharALetter) {
                newChar = u_tolower(c);
            } else {
                newChar = u_totitle(c);
            }
            if (c != newChar) {
                // This is the simple way of doing this:
                //text.replace(start, start+1,
                //             String.valueOf((char) newChar));

                // Instead, we do something more complicated that
                // minimizes the number of calls to
                // Replaceable.replace().  We batch up the changes
                // we want to make in a buffer, recording
                // our position and dumping the buffer out when a
                // non-contiguous change arrives.
                if (bufLimit == start) {
                    ++bufLimit;
                    // Fall through and append newChar below
                } else {
                    if (buffer.length() > 0) {
                        text.handleReplaceBetween(bufStart, bufLimit, buffer);
                        buffer.truncate(0);
                    }
                    bufStart = start;
                    bufLimit = start+1;
                    // Fall through and append newChar below
                }
                buffer.append(newChar);
            }
            wasLastCharALetter = TRUE;
        } else if (c == 0x0027 /*'*/ && wasLastCharALetter) {
            // Ignore a single embedded apostrophe, so that "can't" =>
            // "Can't", not "Can'T".
        } else {
            wasLastCharALetter = FALSE;
        }
    }
    // assert(start == offsets.limit);
    offsets.start = start;

    if (buffer.length() > 0) {
        text.handleReplaceBetween(bufStart, bufLimit, buffer);
    }
}

U_NAMESPACE_END