2001-06-11 19:51:46 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (C) 2001, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 05/24/01 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#include "unicode/titletrn.h"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
|
|
|
const char* TitlecaseTransliterator::_ID = "Any-Title";
|
|
|
|
|
|
|
|
TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
|
|
|
|
Transliterator(_ID, adoptedFilter) {
|
2001-06-29 21:19:49 +00:00
|
|
|
// Need to look back 2 characters in the case of "can't"
|
|
|
|
setMaximumContextLength(2);
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Destructor.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator::~TitlecaseTransliterator() {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
|
|
|
|
Transliterator(o) {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
|
|
|
|
const TitlecaseTransliterator& o) {
|
|
|
|
Transliterator::operator=(o);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
|
|
|
*/
|
|
|
|
Transliterator* TitlecaseTransliterator::clone(void) const {
|
|
|
|
return new TitlecaseTransliterator(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
|
|
|
*/
|
|
|
|
void TitlecaseTransliterator::handleTransliterate(
|
|
|
|
Replaceable& text, UTransPosition& offsets,
|
|
|
|
UBool isIncremental) const {
|
|
|
|
|
2001-06-29 21:19:49 +00:00
|
|
|
// NOTE: This method contains some special case code to handle
|
|
|
|
// apostrophes between alpha characters. We want to have
|
|
|
|
// "can't" => "Can't" (not "Can'T"). This may be incorrect
|
|
|
|
// for some locales, e.g., "l'arbre" => "L'Arbre" (?).
|
|
|
|
// TODO: Revisit this.
|
|
|
|
|
2001-06-11 19:51:46 +00:00
|
|
|
// Determine if there is a preceding letter character in the
|
|
|
|
// left context (if there is any left context).
|
|
|
|
UBool wasLastCharALetter = FALSE;
|
|
|
|
if (offsets.start > offsets.contextStart) {
|
2001-06-29 21:19:49 +00:00
|
|
|
UChar c = text.charAt(offsets.start - 1);
|
|
|
|
// Handle the case "Can'|t", where the | marks the context
|
|
|
|
// boundary. We only handle a single apostrophe.
|
|
|
|
if (c == 0x0027 /*'*/ && (offsets.start-2) >= offsets.contextStart) {
|
|
|
|
c = text.charAt(offsets.start - 2);
|
|
|
|
}
|
|
|
|
wasLastCharALetter = u_isalpha(c);
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The buffer used to batch up changes to be made
|
|
|
|
UnicodeString buffer;
|
|
|
|
int32_t bufStart = 0;
|
|
|
|
int32_t bufLimit = -1;
|
|
|
|
|
|
|
|
int32_t start;
|
|
|
|
for (start = offsets.start; start < offsets.limit; ++start) {
|
|
|
|
// For each character, if the preceding character was a
|
|
|
|
// non-letter, and this character is a letter, then apply
|
|
|
|
// the titlecase transformation. Otherwise apply the
|
|
|
|
// lowercase transformation.
|
2001-07-17 23:36:41 +00:00
|
|
|
UChar32 c = text.charAt(start);
|
2001-06-11 19:51:46 +00:00
|
|
|
if (u_isalpha(c)) {
|
|
|
|
UChar32 newChar;
|
|
|
|
if (wasLastCharALetter) {
|
|
|
|
newChar = u_tolower(c);
|
|
|
|
} else {
|
|
|
|
newChar = u_totitle(c);
|
|
|
|
}
|
|
|
|
if (c != newChar) {
|
|
|
|
// This is the simple way of doing this:
|
|
|
|
//text.replace(start, start+1,
|
|
|
|
// String.valueOf((char) newChar));
|
|
|
|
|
|
|
|
// Instead, we do something more complicated that
|
|
|
|
// minimizes the number of calls to
|
|
|
|
// Replaceable.replace(). We batch up the changes
|
|
|
|
// we want to make in a buffer, recording
|
|
|
|
// our position and dumping the buffer out when a
|
|
|
|
// non-contiguous change arrives.
|
|
|
|
if (bufLimit == start) {
|
|
|
|
++bufLimit;
|
|
|
|
// Fall through and append newChar below
|
|
|
|
} else {
|
|
|
|
if (buffer.length() > 0) {
|
|
|
|
text.handleReplaceBetween(bufStart, bufLimit, buffer);
|
|
|
|
buffer.truncate(0);
|
|
|
|
}
|
|
|
|
bufStart = start;
|
|
|
|
bufLimit = start+1;
|
|
|
|
// Fall through and append newChar below
|
|
|
|
}
|
|
|
|
buffer.append(newChar);
|
|
|
|
}
|
|
|
|
wasLastCharALetter = TRUE;
|
2001-06-29 21:19:49 +00:00
|
|
|
} else if (c == 0x0027 /*'*/ && wasLastCharALetter) {
|
|
|
|
// Ignore a single embedded apostrophe, so that "can't" =>
|
|
|
|
// "Can't", not "Can'T".
|
2001-06-11 19:51:46 +00:00
|
|
|
} else {
|
|
|
|
wasLastCharALetter = FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// assert(start == offsets.limit);
|
|
|
|
offsets.start = start;
|
|
|
|
|
|
|
|
if (buffer.length() > 0) {
|
|
|
|
text.handleReplaceBetween(bufStart, bufLimit, buffer);
|
|
|
|
}
|
|
|
|
}
|