1999-11-20 00:40:50 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2001-03-21 20:31:13 +00:00
|
|
|
* Copyright (C) 1999-2001, International Business Machines
|
1999-11-20 00:40:50 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/unitohex.h"
|
|
|
|
#include "unicode/rep.h"
|
|
|
|
#include "unicode/unifilt.h"
|
1999-11-20 00:40:50 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
2001-10-11 23:54:55 +00:00
|
|
|
const char UnicodeToHexTransliterator::_ID[] = "Any-Hex";
|
1999-11-20 00:40:50 +00:00
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
|
|
|
|
// Use Unicode hex values for EBCDIC compatibility
|
|
|
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
|
|
|
|
0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef
|
|
|
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
|
|
|
|
0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF
|
|
|
|
};
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator.
|
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
|
2000-03-08 19:26:17 +00:00
|
|
|
const UnicodeString& thePattern,
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool isUppercase,
|
2000-03-08 19:26:17 +00:00
|
|
|
UnicodeFilter* adoptedFilter,
|
|
|
|
UErrorCode& status) :
|
1999-11-20 00:40:50 +00:00
|
|
|
Transliterator(_ID, adoptedFilter),
|
|
|
|
uppercase(isUppercase) {
|
2000-03-08 19:26:17 +00:00
|
|
|
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
applyPattern(thePattern, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator.
|
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
|
|
|
|
const UnicodeString& thePattern,
|
|
|
|
UErrorCode& status) :
|
|
|
|
Transliterator(_ID, 0),
|
|
|
|
uppercase(TRUE) {
|
|
|
|
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
applyPattern(thePattern, status);
|
1999-11-20 00:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator with the default prefix "\u"
|
2000-03-08 19:26:17 +00:00
|
|
|
* that outputs four uppercase hex digits.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
|
|
|
|
UnicodeFilter* adoptedFilter) :
|
|
|
|
Transliterator(_ID, adoptedFilter),
|
2000-08-02 19:05:12 +00:00
|
|
|
pattern("\\\\u0000", ""),
|
|
|
|
prefix("\\u", 2, ""),
|
2000-03-08 19:26:17 +00:00
|
|
|
suffix(),
|
|
|
|
minDigits(4),
|
1999-11-20 00:40:50 +00:00
|
|
|
uppercase(TRUE) {
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
|
|
|
|
const UnicodeToHexTransliterator& other) :
|
2000-03-08 19:26:17 +00:00
|
|
|
Transliterator(other),
|
|
|
|
pattern(other.pattern),
|
|
|
|
prefix(other.prefix),
|
|
|
|
suffix(other.suffix),
|
|
|
|
minDigits(other.minDigits),
|
1999-11-20 00:40:50 +00:00
|
|
|
uppercase(other.uppercase) {
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator&
|
|
|
|
UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
|
|
|
|
Transliterator::operator=(other);
|
2000-03-08 19:26:17 +00:00
|
|
|
pattern = other.pattern;
|
1999-11-20 00:40:50 +00:00
|
|
|
prefix = other.prefix;
|
2000-03-08 19:26:17 +00:00
|
|
|
suffix = other.suffix;
|
|
|
|
minDigits = other.minDigits;
|
1999-11-20 00:40:50 +00:00
|
|
|
uppercase = other.uppercase;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Transliterator*
|
1999-12-22 22:57:04 +00:00
|
|
|
UnicodeToHexTransliterator::clone(void) const {
|
1999-11-20 00:40:50 +00:00
|
|
|
return new UnicodeToHexTransliterator(*this);
|
|
|
|
}
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,
|
|
|
|
UErrorCode& status) {
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// POSSIBILE FUTURE MODIFICATION
|
|
|
|
// Parse thePattern, and if this succeeds, set pattern to thePattern.
|
|
|
|
// If it fails, call applyPattern(pattern) to restore the original
|
|
|
|
// conditions.
|
|
|
|
|
|
|
|
pattern = thePattern;
|
|
|
|
prefix.truncate(0);
|
|
|
|
suffix.truncate(0);
|
|
|
|
minDigits = 0;
|
|
|
|
int32_t maxDigits = 0;
|
|
|
|
|
|
|
|
/* The mode specifies where we are in each spec.
|
|
|
|
* mode 0 = in prefix
|
|
|
|
* mode 1 = in optional digits (#)
|
|
|
|
* mode 2 = in required digits (0)
|
|
|
|
* mode 3 = in suffix
|
|
|
|
*/
|
|
|
|
int32_t mode = 0;
|
|
|
|
|
|
|
|
for (int32_t i=0; i<pattern.length(); ++i) {
|
|
|
|
UChar c = pattern.charAt(i);
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool isLiteral = FALSE;
|
2000-03-08 19:26:17 +00:00
|
|
|
if (c == BACKSLASH) {
|
|
|
|
if ((i+1)<pattern.length()) {
|
|
|
|
isLiteral = TRUE;
|
|
|
|
c = pattern.charAt(++i);
|
|
|
|
} else {
|
|
|
|
// Trailing '\\'
|
|
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isLiteral) {
|
|
|
|
switch (c) {
|
|
|
|
case POUND:
|
|
|
|
// Seeing a '#' moves us from mode 0 (prefix) to mode 1
|
|
|
|
// (optional digits).
|
|
|
|
if (mode == 0) {
|
|
|
|
++mode;
|
|
|
|
} else if (mode != 1) {
|
|
|
|
// Unquoted '#'
|
|
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
++maxDigits;
|
|
|
|
break;
|
|
|
|
case ZERO:
|
|
|
|
// Seeing a '0' moves us to mode 2 (required digits)
|
|
|
|
if (mode < 2) {
|
|
|
|
mode = 2;
|
|
|
|
} else if (mode != 2) {
|
|
|
|
// Unquoted '0'
|
|
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
++minDigits;
|
|
|
|
++maxDigits;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
isLiteral = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isLiteral) {
|
|
|
|
if (mode == 0) {
|
|
|
|
prefix.append(c);
|
|
|
|
} else {
|
|
|
|
// Any literal outside the prefix moves us into mode 3
|
|
|
|
// (suffix)
|
|
|
|
mode = 3;
|
|
|
|
suffix.append(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (minDigits < 1 || maxDigits > 4) {
|
|
|
|
// Invalid min/max digit count
|
|
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
1999-11-20 00:40:50 +00:00
|
|
|
}
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {
|
|
|
|
return pattern;
|
1999-11-20 00:40:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if this transliterator outputs uppercase hex digits.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool UnicodeToHexTransliterator::isUppercase(void) const {
|
1999-11-20 00:40:50 +00:00
|
|
|
return uppercase;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets if this transliterator outputs uppercase hex digits.
|
|
|
|
*
|
|
|
|
* <p>Callers must take care if a transliterator is in use by
|
|
|
|
* multiple threads. The uppercase mode should not be changed by
|
|
|
|
* one thread while another thread may be transliterating.
|
|
|
|
* @param outputUppercase if true, then this transliterator
|
|
|
|
* outputs uppercase hex digits.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {
|
1999-11-20 00:40:50 +00:00
|
|
|
uppercase = outputUppercase;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2000-01-18 18:27:27 +00:00
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
1999-11-20 00:40:50 +00:00
|
|
|
*/
|
2000-06-27 19:00:38 +00:00
|
|
|
void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
2000-08-15 18:25:20 +00:00
|
|
|
UBool /*isIncremental*/) const {
|
1999-11-20 00:40:50 +00:00
|
|
|
/**
|
|
|
|
* Performs transliteration changing all characters to
|
|
|
|
* Unicode hexadecimal escapes. For example, '@' -> "U+0040",
|
|
|
|
* assuming the prefix is "U+".
|
|
|
|
*/
|
2000-06-27 20:06:52 +00:00
|
|
|
int32_t cursor = offsets.start;
|
2000-01-19 19:02:10 +00:00
|
|
|
int32_t limit = offsets.limit;
|
1999-11-20 00:40:50 +00:00
|
|
|
|
|
|
|
UnicodeString hex;
|
|
|
|
|
|
|
|
while (cursor < limit) {
|
|
|
|
UChar c = text.charAt(cursor);
|
2000-03-08 19:26:17 +00:00
|
|
|
|
|
|
|
hex = prefix;
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool showRest = FALSE;
|
2000-03-08 19:26:17 +00:00
|
|
|
for (int32_t i=3; i>=0; --i) {
|
2001-01-23 23:01:33 +00:00
|
|
|
/* Get each nibble from left to right */
|
|
|
|
int32_t d = (c >> (i<<2)) & 0xF;
|
2000-03-08 19:26:17 +00:00
|
|
|
if (showRest || (d != 0) || minDigits > i) {
|
|
|
|
hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
|
|
|
|
showRest = TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
hex.append(suffix);
|
|
|
|
|
1999-11-20 00:40:50 +00:00
|
|
|
text.handleReplaceBetween(cursor, cursor+1, hex);
|
|
|
|
int32_t len = hex.length();
|
|
|
|
cursor += len; // Advance cursor by 1 and adjust for new text
|
|
|
|
--len;
|
|
|
|
limit += len;
|
|
|
|
}
|
|
|
|
|
2000-06-27 20:06:52 +00:00
|
|
|
offsets.contextLimit += limit - offsets.limit;
|
2000-01-19 19:02:10 +00:00
|
|
|
offsets.limit = limit;
|
2000-06-27 20:06:52 +00:00
|
|
|
offsets.start = cursor;
|
1999-11-20 00:40:50 +00:00
|
|
|
}
|
2001-10-08 23:26:58 +00:00
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|