scuffed-code/icu4c/source/i18n/titletrn.cpp

194 lines
5.8 KiB
C++
Raw Normal View History

/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 05/24/01 aliu Creation.
**********************************************************************
*/
#include "unicode/uchar.h"
#include "titletrn.h"
#include "unicode/uniset.h"
#include "mutex.h"
#include "ucln_in.h"
#include "unicode/ustring.h"
#include "ustr_imp.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
/**
* ID for this transliterator.
*/
const char TitlecaseTransliterator::_ID[] = "Any-Title";
/**
* Mutex for statics IN THIS FILE
*/
static UMTX MUTEX = 0;
/**
* The set of characters we skip. These are neither cased nor
* non-cased, to us; we copy them verbatim.
*/
static UnicodeSet* SKIP = NULL;
/**
* The set of characters that cause the next non-SKIP character
* to be lowercased.
*/
static UnicodeSet* CASED = NULL;
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
Transliterator(_ID, 0),
loc(theLoc),
buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
// Need to look back 2 characters in the case of "can't"
setMaximumContextLength(2);
}
/**
* Destructor.
*/
TitlecaseTransliterator::~TitlecaseTransliterator() {
delete [] buffer;
}
/**
* Copy constructor.
*/
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
Transliterator(o),
loc(o.loc),
buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
}
/**
* Assignment operator.
*/
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
const TitlecaseTransliterator& o) {
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
return *this;
}
/**
* Transliterator API.
*/
Transliterator* TitlecaseTransliterator::clone(void) const {
return new TitlecaseTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void TitlecaseTransliterator::handleTransliterate(
Replaceable& text, UTransPosition& offsets,
UBool isIncremental) const {
if (SKIP == NULL) {
Mutex lock(&MUTEX);
if (SKIP == NULL) {
UErrorCode ec = U_ZERO_ERROR;
SKIP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"), ec);
CASED = new UnicodeSet(UNICODE_STRING_SIMPLE("[[:Lu:] [:Ll:] [:Lt:]]"), ec);
ucln_i18n_registerCleanup();
}
}
// Our mode; we are either converting letter toTitle or
// toLower.
UBool doTitle = TRUE;
// Determine if there is a preceding context of CASED SKIP*,
// in which case we want to start in toLower mode. If the
// prior context is anything else (including empty) then start
// in toTitle mode.
UChar32 c;
int32_t start;
for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
c = text.char32At(start);
if (SKIP->contains(c)) {
continue;
}
doTitle = !CASED->contains(c);
break;
}
// Convert things after a CASED character toLower; things
// after a non-CASED, non-SKIP character toTitle. SKIP
// characters are copied directly and do not change the mode.
int32_t textPos = offsets.start;
if (textPos >= offsets.limit) return;
// get string for context
// TODO: add convenience method to do this, since we do it all over
int32_t loop = 0;
UnicodeString original;
/* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
/* Extract the characters from Replaceable */
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
original.append(text.charAt(loop));
}
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp, bufferCH;
int32_t oldLen;
int32_t newLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
if (!SKIP->contains(cp)) {
if (doTitle) {
newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
} else {
int32_t len = u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
UTF_GET_CHAR(buffer, 0, 0, len, bufferCH);
newLen = (bufferCH == original.char32At(s) ? -1 : len);
}
doTitle = !CASED->contains(cp);
if (newLen >= 0) {
UnicodeString temp(buffer, newLen);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (newLen != oldLen) {
textPos += newLen;
offsets.limit += newLen - oldLen;
offsets.contextLimit += newLen - oldLen;
continue;
}
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
/**
* Static memory cleanup function.
*/
void TitlecaseTransliterator::cleanup() {
if (SKIP != NULL) {
delete SKIP; SKIP = NULL;
delete CASED; CASED = NULL;
umtx_destroy(&MUTEX);
}
}
U_NAMESPACE_END