2001-06-11 19:51:46 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (C) 2001, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 05/24/01 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/uchar.h"
|
2001-11-16 23:51:15 +00:00
|
|
|
#include "titletrn.h"
|
2001-11-01 04:37:27 +00:00
|
|
|
#include "unicode/uniset.h"
|
|
|
|
#include "mutex.h"
|
2001-11-13 23:47:11 +00:00
|
|
|
#include "ucln_in.h"
|
2001-06-11 19:51:46 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2001-06-11 19:51:46 +00:00
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
2001-10-11 23:54:55 +00:00
|
|
|
const char TitlecaseTransliterator::_ID[] = "Any-Title";
|
2001-06-11 19:51:46 +00:00
|
|
|
|
2001-11-01 04:37:27 +00:00
|
|
|
/**
|
|
|
|
* The set of characters we skip. These are neither cased nor
|
|
|
|
* non-cased, to us; we copy them verbatim.
|
|
|
|
*/
|
2001-11-14 17:23:01 +00:00
|
|
|
static UnicodeSet* SKIP = NULL;
|
2001-11-01 04:37:27 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The set of characters that cause the next non-SKIP character
|
|
|
|
* to be lowercased.
|
|
|
|
*/
|
2001-11-14 17:23:01 +00:00
|
|
|
static UnicodeSet* CASED = NULL;
|
2001-11-01 04:37:27 +00:00
|
|
|
|
2001-06-11 19:51:46 +00:00
|
|
|
TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
|
|
|
|
Transliterator(_ID, adoptedFilter) {
|
2001-06-29 21:19:49 +00:00
|
|
|
// Need to look back 2 characters in the case of "can't"
|
|
|
|
setMaximumContextLength(2);
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Destructor.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator::~TitlecaseTransliterator() {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
|
|
|
|
Transliterator(o) {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
|
|
|
*/
|
|
|
|
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
|
|
|
|
const TitlecaseTransliterator& o) {
|
|
|
|
Transliterator::operator=(o);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
|
|
|
*/
|
|
|
|
Transliterator* TitlecaseTransliterator::clone(void) const {
|
|
|
|
return new TitlecaseTransliterator(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
|
|
|
*/
|
|
|
|
void TitlecaseTransliterator::handleTransliterate(
|
|
|
|
Replaceable& text, UTransPosition& offsets,
|
|
|
|
UBool isIncremental) const {
|
2001-11-01 04:37:27 +00:00
|
|
|
if (SKIP == NULL) {
|
|
|
|
Mutex lock;
|
|
|
|
if (SKIP == NULL) {
|
|
|
|
UErrorCode ec = U_ZERO_ERROR;
|
2001-11-21 21:23:48 +00:00
|
|
|
SKIP = new UnicodeSet(UnicodeString("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:]]", ""), ec);
|
2001-11-01 04:37:27 +00:00
|
|
|
CASED = new UnicodeSet(UnicodeString("[[:Lu:] [:Ll:] [:Lt:]]", ""), ec);
|
2001-11-13 23:47:11 +00:00
|
|
|
ucln_i18n_registerCleanup();
|
2001-06-29 21:19:49 +00:00
|
|
|
}
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
|
|
|
|
2001-11-01 04:37:27 +00:00
|
|
|
// Our mode; we are either converting letter toTitle or
|
|
|
|
// toLower.
|
|
|
|
UBool doTitle = TRUE;
|
|
|
|
|
|
|
|
// Determine if there is a preceding context of CASED SKIP*,
|
|
|
|
// in which case we want to start in toLower mode. If the
|
|
|
|
// prior context is anything else (including empty) then start
|
|
|
|
// in toTitle mode.
|
|
|
|
int32_t start = offsets.start;
|
|
|
|
while (start > offsets.contextStart) {
|
|
|
|
UChar c = text.charAt(--start);
|
|
|
|
if (SKIP->contains(c)) {
|
|
|
|
continue;
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
2001-11-01 04:37:27 +00:00
|
|
|
doTitle = !CASED->contains(c);
|
|
|
|
break;
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
2001-11-01 04:37:27 +00:00
|
|
|
|
|
|
|
// Convert things after a CASED character toLower; things
|
|
|
|
// after a non-CASED, non-SKIP character toTitle. SKIP
|
|
|
|
// characters are copied directly and do not change the mode.
|
|
|
|
UnicodeString str("A", "");
|
|
|
|
for (start=offsets.start; start<offsets.limit; ++start) {
|
|
|
|
UChar c = text.charAt(start);
|
|
|
|
if (SKIP->contains(c)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
UChar d = (UChar) (doTitle ? u_totitle(c)
|
|
|
|
: u_tolower(c));
|
|
|
|
if (c != d) {
|
|
|
|
str.setCharAt(0, d);
|
|
|
|
text.handleReplaceBetween(start, start+1, str);
|
|
|
|
}
|
|
|
|
doTitle = !CASED->contains(c);
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
2001-11-01 04:37:27 +00:00
|
|
|
|
|
|
|
offsets.start = start;
|
2001-06-11 19:51:46 +00:00
|
|
|
}
|
2001-10-08 23:26:58 +00:00
|
|
|
|
2001-11-13 23:47:11 +00:00
|
|
|
/**
|
|
|
|
* Static memory cleanup function.
|
|
|
|
*/
|
|
|
|
void TitlecaseTransliterator::cleanup() {
|
|
|
|
if (SKIP != NULL) {
|
|
|
|
delete SKIP; SKIP = NULL;
|
|
|
|
delete CASED; CASED = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|