1999-11-20 00:40:50 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (C) 1999, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#include "hextouni.h"
|
|
|
|
#include "rep.h"
|
|
|
|
#include "unifilt.h"
|
1999-11-23 01:31:13 +00:00
|
|
|
#include "unicode.h"
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
|
|
|
const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator.
|
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator::HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
|
|
|
|
Transliterator(_ID, adoptedFilter) {
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator::HexToUnicodeTransliterator(const HexToUnicodeTransliterator& o) :
|
|
|
|
Transliterator(o) {
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator& HexToUnicodeTransliterator::operator=(
|
|
|
|
const HexToUnicodeTransliterator& o) {
|
|
|
|
Transliterator::operator=(o);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
|
|
|
*/
|
|
|
|
Transliterator* HexToUnicodeTransliterator::clone() const {
|
|
|
|
return new HexToUnicodeTransliterator(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
|
|
|
* @param text the string to be transliterated
|
|
|
|
* @param start the beginning index, inclusive; <code>0 <= start
|
|
|
|
* <= limit</code>.
|
|
|
|
* @param limit the ending index, exclusive; <code>start <= limit
|
|
|
|
* <= text.length()</code>.
|
|
|
|
* @return the new limit index
|
|
|
|
*/
|
|
|
|
int32_t HexToUnicodeTransliterator::transliterate(Replaceable& text,
|
|
|
|
int32_t start, int32_t limit) const {
|
|
|
|
int32_t offsets[3] = { start, limit, start };
|
|
|
|
handleKeyboardTransliterate(text, offsets);
|
|
|
|
return offsets[LIMIT];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
|
|
|
*/
|
|
|
|
void HexToUnicodeTransliterator::handleKeyboardTransliterate(Replaceable& text,
|
|
|
|
int32_t offsets[3]) const {
|
|
|
|
/**
|
|
|
|
* Performs transliteration changing Unicode hexadecimal
|
|
|
|
* escapes to characters. For example, "U+0040" -> '@'. A fixed
|
|
|
|
* set of prefixes is recognized: "\u", "\U", "u+", "U+".
|
|
|
|
*/
|
|
|
|
int32_t cursor = offsets[CURSOR];
|
|
|
|
int32_t limit = offsets[LIMIT];
|
|
|
|
|
|
|
|
int32_t maxCursor = limit - 6;
|
|
|
|
|
|
|
|
while (cursor <= maxCursor) {
|
|
|
|
UChar c = filteredCharAt(text, cursor + 5);
|
1999-11-23 01:31:13 +00:00
|
|
|
int32_t digit0 = Unicode::digit(c, 16);
|
1999-11-20 00:40:50 +00:00
|
|
|
if (digit0 < 0) {
|
|
|
|
if (c == '\\') {
|
|
|
|
cursor += 5;
|
|
|
|
} else if (c == 'U' || c == 'u' || c == '+') {
|
|
|
|
cursor += 4;
|
|
|
|
} else {
|
|
|
|
cursor += 6;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t u = digit0;
|
|
|
|
bool_t toTop = FALSE;
|
|
|
|
|
|
|
|
for (int32_t i=4; i>=2; --i) {
|
|
|
|
c = filteredCharAt(text, cursor + i);
|
1999-11-23 01:31:13 +00:00
|
|
|
int32_t digit = Unicode::digit(c, 16);
|
1999-11-20 00:40:50 +00:00
|
|
|
if (digit < 0) {
|
|
|
|
if (c == 'U' || c == 'u' || c == '+') {
|
|
|
|
cursor += i-1;
|
|
|
|
} else {
|
|
|
|
cursor += 6;
|
|
|
|
}
|
|
|
|
toTop = TRUE; // This is a little awkward -- it was a "continue loop:"
|
|
|
|
break; // statement in Java, where loop marked the while().
|
|
|
|
} else {
|
|
|
|
u |= digit << (4 * (5-i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (toTop) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = filteredCharAt(text, cursor);
|
|
|
|
UChar d = filteredCharAt(text, cursor + 1);
|
|
|
|
if (((c == 'U' || c == 'u') && d == '+')
|
|
|
|
|| (c == '\\' && (d == 'U' || d == 'u'))) {
|
|
|
|
|
|
|
|
// At this point, we have a match; replace cursor..cursor+5
|
|
|
|
// with u.
|
|
|
|
text.handleReplaceBetween(cursor, cursor+6, UnicodeString((UChar)u));
|
|
|
|
limit -= 5;
|
|
|
|
maxCursor -= 5;
|
|
|
|
|
|
|
|
++cursor;
|
|
|
|
} else {
|
|
|
|
cursor += 6;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
offsets[LIMIT] = limit;
|
|
|
|
offsets[CURSOR] = cursor;
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar HexToUnicodeTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
|
|
|
|
UChar c;
|
|
|
|
const UnicodeFilter* filter = getFilter();
|
|
|
|
return (filter == 0) ? text.charAt(i) :
|
|
|
|
(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the length of the longest context required by this transliterator.
|
|
|
|
* This is <em>preceding</em> context.
|
|
|
|
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
|
|
|
* @return maximum number of preceding context characters this
|
|
|
|
* transliterator needs to examine
|
|
|
|
*/
|
|
|
|
int32_t HexToUnicodeTransliterator::getMaximumContextLength() const {
|
|
|
|
return 0;
|
|
|
|
}
|