/* ********************************************************************** * Copyright (C) 1999-2001, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/17/99 aliu Creation. ********************************************************************** */ #include "unicode/unitohex.h" #include "unicode/rep.h" #include "unicode/unifilt.h" /** * ID for this transliterator. */ const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex"; const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = { // Use Unicode hex values for EBCDIC compatibility 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF }; /** * Constructs a transliterator. */ UnicodeToHexTransliterator::UnicodeToHexTransliterator( const UnicodeString& thePattern, UBool isUppercase, UnicodeFilter* adoptedFilter, UErrorCode& status) : Transliterator(_ID, adoptedFilter), uppercase(isUppercase) { if (U_FAILURE(status)) { return; } applyPattern(thePattern, status); } /** * Constructs a transliterator. */ UnicodeToHexTransliterator::UnicodeToHexTransliterator( const UnicodeString& thePattern, UErrorCode& status) : Transliterator(_ID, 0), uppercase(TRUE) { if (U_FAILURE(status)) { return; } applyPattern(thePattern, status); } /** * Constructs a transliterator with the default prefix "\u" * that outputs four uppercase hex digits. */ UnicodeToHexTransliterator::UnicodeToHexTransliterator( UnicodeFilter* adoptedFilter) : Transliterator(_ID, adoptedFilter), pattern("\\\\u0000", ""), prefix("\\u", 2, ""), suffix(), minDigits(4), uppercase(TRUE) { } /** * Copy constructor. */ UnicodeToHexTransliterator::UnicodeToHexTransliterator( const UnicodeToHexTransliterator& other) : Transliterator(other), pattern(other.pattern), prefix(other.prefix), suffix(other.suffix), minDigits(other.minDigits), uppercase(other.uppercase) { } /** * Assignment operator. */ UnicodeToHexTransliterator& UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) { Transliterator::operator=(other); pattern = other.pattern; prefix = other.prefix; suffix = other.suffix; minDigits = other.minDigits; uppercase = other.uppercase; return *this; } Transliterator* UnicodeToHexTransliterator::clone(void) const { return new UnicodeToHexTransliterator(*this); } void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern, UErrorCode& status) { if (U_FAILURE(status)) { return; } // POSSIBILE FUTURE MODIFICATION // Parse thePattern, and if this succeeds, set pattern to thePattern. // If it fails, call applyPattern(pattern) to restore the original // conditions. pattern = thePattern; prefix.truncate(0); suffix.truncate(0); minDigits = 0; int32_t maxDigits = 0; /* The mode specifies where we are in each spec. * mode 0 = in prefix * mode 1 = in optional digits (#) * mode 2 = in required digits (0) * mode 3 = in suffix */ int32_t mode = 0; for (int32_t i=0; i 4) { // Invalid min/max digit count status = U_ILLEGAL_ARGUMENT_ERROR; return; } } const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const { return pattern; } /** * Returns true if this transliterator outputs uppercase hex digits. */ UBool UnicodeToHexTransliterator::isUppercase(void) const { return uppercase; } /** * Sets if this transliterator outputs uppercase hex digits. * *

Callers must take care if a transliterator is in use by * multiple threads. The uppercase mode should not be changed by * one thread while another thread may be transliterating. * @param outputUppercase if true, then this transliterator * outputs uppercase hex digits. */ void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) { uppercase = outputUppercase; } /** * Implements {@link Transliterator#handleTransliterate}. */ void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool /*isIncremental*/) const { /** * Performs transliteration changing all characters to * Unicode hexadecimal escapes. For example, '@' -> "U+0040", * assuming the prefix is "U+". */ int32_t cursor = offsets.start; int32_t limit = offsets.limit; const UnicodeFilter* localFilter = getFilter(); UnicodeString hex; while (cursor < limit) { UChar c = text.charAt(cursor); if (localFilter != 0 && !localFilter->contains(c)) { ++cursor; continue; } hex = prefix; UBool showRest = FALSE; for (int32_t i=3; i>=0; --i) { /* Get each nibble from left to right */ int32_t d = (c >> (i<<2)) & 0xF; if (showRest || (d != 0) || minDigits > i) { hex.append(HEX_DIGITS[uppercase ? (d|16) : d]); showRest = TRUE; } } hex.append(suffix); text.handleReplaceBetween(cursor, cursor+1, hex); int32_t len = hex.length(); cursor += len; // Advance cursor by 1 and adjust for new text --len; limit += len; } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; offsets.start = cursor; }