2016-06-15 18:58:17 +00:00
|
|
|
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2001-11-21 07:02:15 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (c) 2001-2011, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2001-11-21 07:02:15 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/19/2001 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
|
|
|
|
2011-07-27 05:53:56 +00:00
|
|
|
#include "unicode/utf16.h"
|
2001-11-21 07:02:15 +00:00
|
|
|
#include "esctrn.h"
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
static const UChar UNIPRE[] = {85,43,0}; // "U+"
|
|
|
|
static const UChar BS_u[] = {92,117,0}; // "\\u"
|
|
|
|
static const UChar BS_U[] = {92,85,0}; // "\\U"
|
|
|
|
static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
|
|
|
|
static const UChar XML10PRE[] = {38,35,0}; // "&#"
|
|
|
|
static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
|
|
|
|
static const UChar SEMI[] = {59,0}; // ";"
|
|
|
|
static const UChar RBRACE[] = {125,0}; // "}"
|
|
|
|
|
2003-08-31 20:53:46 +00:00
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
|
2002-06-29 00:04:16 +00:00
|
|
|
|
2001-11-21 07:02:15 +00:00
|
|
|
/**
|
|
|
|
* Factory methods
|
|
|
|
*/
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// Unicode: "U+10FFFF" hex, min=4, max=6
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// Java: "\\uFFFF" hex, min=4, max=4
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
|
|
|
|
new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// XML: "" hex, min=1, max=6
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
2006-02-06 06:41:52 +00:00
|
|
|
static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
2001-11-21 07:02:15 +00:00
|
|
|
// Perl: "\\x{263A}" hex, min=1, max=6
|
2011-07-07 18:46:19 +00:00
|
|
|
return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Registers standard variants with the system. Called by
|
|
|
|
* Transliterator during initialization.
|
|
|
|
*/
|
|
|
|
void EscapeTransliterator::registerIDs() {
|
|
|
|
Token t = integerToken(0);
|
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
|
2006-02-06 06:41:52 +00:00
|
|
|
Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
|
2001-11-21 07:02:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs an escape transliterator with the given ID and
|
|
|
|
* parameters. See the class member documentation for details.
|
|
|
|
*/
|
2001-12-03 23:56:30 +00:00
|
|
|
EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
|
2001-11-30 23:01:40 +00:00
|
|
|
const UnicodeString& _prefix, const UnicodeString& _suffix,
|
|
|
|
int32_t _radix, int32_t _minDigits,
|
|
|
|
UBool _grokSupplementals,
|
2001-11-21 07:02:15 +00:00
|
|
|
EscapeTransliterator* adoptedSupplementalHandler) :
|
2001-12-03 23:56:30 +00:00
|
|
|
Transliterator(newID, NULL)
|
2001-11-30 23:01:40 +00:00
|
|
|
{
|
|
|
|
this->prefix = _prefix;
|
|
|
|
this->suffix = _suffix;
|
|
|
|
this->radix = _radix;
|
|
|
|
this->minDigits = _minDigits;
|
|
|
|
this->grokSupplementals = _grokSupplementals;
|
2001-11-21 07:02:15 +00:00
|
|
|
this->supplementalHandler = adoptedSupplementalHandler;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
|
|
|
EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
|
|
|
|
Transliterator(o),
|
|
|
|
prefix(o.prefix),
|
|
|
|
suffix(o.suffix),
|
|
|
|
radix(o.radix),
|
|
|
|
minDigits(o.minDigits),
|
|
|
|
grokSupplementals(o.grokSupplementals) {
|
|
|
|
supplementalHandler = (o.supplementalHandler != 0) ?
|
|
|
|
new EscapeTransliterator(*o.supplementalHandler) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
EscapeTransliterator::~EscapeTransliterator() {
|
|
|
|
delete supplementalHandler;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
|
|
|
*/
|
|
|
|
Transliterator* EscapeTransliterator::clone() const {
|
|
|
|
return new EscapeTransliterator(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
|
|
|
*/
|
2003-12-06 00:52:51 +00:00
|
|
|
void EscapeTransliterator::handleTransliterate(Replaceable& text,
|
|
|
|
UTransPosition& pos,
|
|
|
|
UBool /*isIncremental*/) const
|
|
|
|
{
|
|
|
|
/* TODO: Verify that isIncremental can be ignored */
|
2001-11-21 07:02:15 +00:00
|
|
|
int32_t start = pos.start;
|
|
|
|
int32_t limit = pos.limit;
|
|
|
|
|
|
|
|
UnicodeString buf(prefix);
|
|
|
|
int32_t prefixLen = prefix.length();
|
|
|
|
UBool redoPrefix = FALSE;
|
|
|
|
|
|
|
|
while (start < limit) {
|
|
|
|
int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
|
2011-07-27 05:53:56 +00:00
|
|
|
int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
|
2001-11-21 07:02:15 +00:00
|
|
|
|
|
|
|
if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
|
|
|
|
buf.truncate(0);
|
|
|
|
buf.append(supplementalHandler->prefix);
|
2001-12-03 20:19:08 +00:00
|
|
|
ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
|
2001-11-21 07:02:15 +00:00
|
|
|
supplementalHandler->minDigits);
|
|
|
|
buf.append(supplementalHandler->suffix);
|
|
|
|
redoPrefix = TRUE;
|
|
|
|
} else {
|
|
|
|
if (redoPrefix) {
|
|
|
|
buf.truncate(0);
|
|
|
|
buf.append(prefix);
|
|
|
|
redoPrefix = FALSE;
|
|
|
|
} else {
|
|
|
|
buf.truncate(prefixLen);
|
|
|
|
}
|
2001-12-03 20:19:08 +00:00
|
|
|
ICU_Utility::appendNumber(buf, c, radix, minDigits);
|
2001-11-21 07:02:15 +00:00
|
|
|
buf.append(suffix);
|
|
|
|
}
|
|
|
|
|
|
|
|
text.handleReplaceBetween(start, start + charLen, buf);
|
|
|
|
start += buf.length();
|
|
|
|
limit += buf.length() - charLen;
|
|
|
|
}
|
|
|
|
|
|
|
|
pos.contextLimit += limit - pos.limit;
|
|
|
|
pos.limit = limit;
|
|
|
|
pos.start = start;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
|
2001-11-21 07:02:15 +00:00
|
|
|
//eof
|