scuffed-code/icu4c/source/i18n/titletrn.cpp

/*
**********************************************************************
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   05/24/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/uchar.h"
#include "unicode/titletrn.h"

/**
 * ID for this transliterator.
 */
const char* TitlecaseTransliterator::_ID = "Any-Title";

TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter) {
    // Need to look back 2 characters in the case of "can't"
    setMaximumContextLength(2);
}

/**
 * Destructor.
 */
TitlecaseTransliterator::~TitlecaseTransliterator() {}

/**
 * Copy constructor.
 */
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
    Transliterator(o) {}

/**
 * Assignment operator.
 */
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
                             const TitlecaseTransliterator& o) {
    Transliterator::operator=(o);
    return *this;
}

/**
 * Transliterator API.
 */
Transliterator* TitlecaseTransliterator::clone(void) const {
    return new TitlecaseTransliterator(*this);
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const {

    // NOTE: This method contains some special case code to handle
    // apostrophes between alpha characters.  We want to have
    // "can't" => "Can't" (not "Can'T").  This may be incorrect
    // for some locales, e.g., "l'arbre" => "L'Arbre" (?).
    // TODO: Revisit this.

    // Determine if there is a preceding letter character in the
    // left context (if there is any left context).
    UBool wasLastCharALetter = FALSE;
    if (offsets.start > offsets.contextStart) {
        UChar c = text.charAt(offsets.start - 1);
        // Handle the case "Can'|t", where the | marks the context
        // boundary.  We only handle a single apostrophe.
        if (c == 0x0027 /*'*/ && (offsets.start-2) >= offsets.contextStart) {
            c = text.charAt(offsets.start - 2);
        }
        wasLastCharALetter = u_isalpha(c);
    }

    // The buffer used to batch up changes to be made
    UnicodeString buffer;
    int32_t bufStart = 0;
    int32_t bufLimit = -1;

    int32_t start;
    for (start = offsets.start; start < offsets.limit; ++start) {
        // For each character, if the preceding character was a
        // non-letter, and this character is a letter, then apply
        // the titlecase transformation.  Otherwise apply the
        // lowercase transformation.
        UChar32 c = text.charAt(start);
        if (u_isalpha(c)) {
            UChar32 newChar;
            if (wasLastCharALetter) {
                newChar = u_tolower(c);
            } else {
                newChar = u_totitle(c);
            }
            if (c != newChar) {
                // This is the simple way of doing this:
                //text.replace(start, start+1,
                //             String.valueOf((char) newChar));

                // Instead, we do something more complicated that
                // minimizes the number of calls to
                // Replaceable.replace().  We batch up the changes
                // we want to make in a buffer, recording
                // our position and dumping the buffer out when a
                // non-contiguous change arrives.
                if (bufLimit == start) {
                    ++bufLimit;
                    // Fall through and append newChar below
                } else {
                    if (buffer.length() > 0) {
                        text.handleReplaceBetween(bufStart, bufLimit, buffer);
                        buffer.truncate(0);
                    }
                    bufStart = start;
                    bufLimit = start+1;
                    // Fall through and append newChar below
                }
                buffer.append(newChar);
            }
            wasLastCharALetter = TRUE;
        } else if (c == 0x0027 /*'*/ && wasLastCharALetter) {
            // Ignore a single embedded apostrophe, so that "can't" =>
            // "Can't", not "Can'T".
        } else {
            wasLastCharALetter = FALSE;
        }
    }
    // assert(start == offsets.limit);
    offsets.start = start;

    if (buffer.length() > 0) {
        text.handleReplaceBetween(bufStart, bufLimit, buffer);
    }
}
ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`/*`
			`**********************************************************************`
			`* Copyright (C) 2001, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 05/24/01 aliu Creation.`
			`**********************************************************************`
			`*/`

			`#include "unicode/uchar.h"`
			`#include "unicode/titletrn.h"`

			`/**`
			`* ID for this transliterator.`
			`*/`
			`const char* TitlecaseTransliterator::_ID = "Any-Title";`

			`TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :`
			`Transliterator(_ID, adoptedFilter) {`
ICU-965 in Any-Title make can't -> Can't, not Can'T X-SVN-Rev: 5144 2001-06-29 21:19:49 +00:00			`// Need to look back 2 characters in the case of "can't"`
			`setMaximumContextLength(2);`
ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`}`

			`/**`
			`* Destructor.`
			`*/`
			`TitlecaseTransliterator::~TitlecaseTransliterator() {}`

			`/**`
			`* Copy constructor.`
			`*/`
			`TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :`
			`Transliterator(o) {}`

			`/**`
			`* Assignment operator.`
			`*/`
			`TitlecaseTransliterator& TitlecaseTransliterator::operator=(`
			`const TitlecaseTransliterator& o) {`
			`Transliterator::operator=(o);`
			`return *this;`
			`}`

			`/**`
			`* Transliterator API.`
			`*/`
			`Transliterator* TitlecaseTransliterator::clone(void) const {`
			`return new TitlecaseTransliterator(*this);`
			`}`

			`/**`
			`* Implements {@link Transliterator#handleTransliterate}.`
			`*/`
			`void TitlecaseTransliterator::handleTransliterate(`
			`Replaceable& text, UTransPosition& offsets,`
			`UBool isIncremental) const {`

ICU-965 in Any-Title make can't -> Can't, not Can'T X-SVN-Rev: 5144 2001-06-29 21:19:49 +00:00			`// NOTE: This method contains some special case code to handle`
			`// apostrophes between alpha characters. We want to have`
			`// "can't" => "Can't" (not "Can'T"). This may be incorrect`
			`// for some locales, e.g., "l'arbre" => "L'Arbre" (?).`
			`// TODO: Revisit this.`

ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`// Determine if there is a preceding letter character in the`
			`// left context (if there is any left context).`
			`UBool wasLastCharALetter = FALSE;`
			`if (offsets.start > offsets.contextStart) {`
ICU-965 in Any-Title make can't -> Can't, not Can'T X-SVN-Rev: 5144 2001-06-29 21:19:49 +00:00			`UChar c = text.charAt(offsets.start - 1);`
			`// Handle the case "Can'\|t", where the \| marks the context`
			`// boundary. We only handle a single apostrophe.`
			`if (c == 0x0027 /'/ && (offsets.start-2) >= offsets.contextStart) {`
			`c = text.charAt(offsets.start - 2);`
			`}`
			`wasLastCharALetter = u_isalpha(c);`
ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`}`

			`// The buffer used to batch up changes to be made`
			`UnicodeString buffer;`
			`int32_t bufStart = 0;`
			`int32_t bufLimit = -1;`

			`int32_t start;`
			`for (start = offsets.start; start < offsets.limit; ++start) {`
			`// For each character, if the preceding character was a`
			`// non-letter, and this character is a letter, then apply`
			`// the titlecase transformation. Otherwise apply the`
			`// lowercase transformation.`
ICU-1053 move filter logic into Transliterator.filteredTransliterate X-SVN-Rev: 5258 2001-07-17 23:36:41 +00:00			`UChar32 c = text.charAt(start);`
ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`if (u_isalpha(c)) {`
			`UChar32 newChar;`
			`if (wasLastCharALetter) {`
			`newChar = u_tolower(c);`
			`} else {`
			`newChar = u_totitle(c);`
			`}`
			`if (c != newChar) {`
			`// This is the simple way of doing this:`
			`//text.replace(start, start+1,`
			`// String.valueOf((char) newChar));`

			`// Instead, we do something more complicated that`
			`// minimizes the number of calls to`
			`// Replaceable.replace(). We batch up the changes`
			`// we want to make in a buffer, recording`
			`// our position and dumping the buffer out when a`
			`// non-contiguous change arrives.`
			`if (bufLimit == start) {`
			`++bufLimit;`
			`// Fall through and append newChar below`
			`} else {`
			`if (buffer.length() > 0) {`
			`text.handleReplaceBetween(bufStart, bufLimit, buffer);`
			`buffer.truncate(0);`
			`}`
			`bufStart = start;`
			`bufLimit = start+1;`
			`// Fall through and append newChar below`
			`}`
			`buffer.append(newChar);`
			`}`
			`wasLastCharALetter = TRUE;`
ICU-965 in Any-Title make can't -> Can't, not Can'T X-SVN-Rev: 5144 2001-06-29 21:19:49 +00:00			`} else if (c == 0x0027 /'/ && wasLastCharALetter) {`
			`// Ignore a single embedded apostrophe, so that "can't" =>`
			`// "Can't", not "Can'T".`
ICU-965 create Any-Lower, Any-Upper, and Any-Title transliterators X-SVN-Rev: 4941 2001-06-11 19:51:46 +00:00			`} else {`
			`wasLastCharALetter = FALSE;`
			`}`
			`}`
			`// assert(start == offsets.limit);`
			`offsets.start = start;`

			`if (buffer.length() > 0) {`
			`text.handleReplaceBetween(bufStart, bufLimit, buffer);`
			`}`
			`}`