scuffed-code/icu4c/source/i18n/unitohex.cpp

/*
**********************************************************************
*   Copyright (C) 1999, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#include "unicode/unitohex.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"

/**
 * ID for this transliterator.
 */
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";

const char* UnicodeToHexTransliterator::DEFAULT_PREFIX = "\\u";

/**
 * Constructs a transliterator.
 * @param prefix the string that will precede the four hex
 * digits for UNICODE_HEX transliterators.  Ignored
 * if direction is HEX_UNICODE.
 * @param uppercase if true, the four hex digits will be
 * converted to uppercase; otherwise they will be lowercase.
 * Ignored if direction is HEX_UNICODE.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                const UnicodeString& hexPrefix,
                                bool_t isUppercase,
                                UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter),
    prefix(hexPrefix),
    uppercase(isUppercase) {
}

/**
 * Constructs a transliterator with the default prefix "&#092;u"
 * that outputs uppercase hex digits.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter),
    prefix(DEFAULT_PREFIX),
    uppercase(TRUE) {
}

/**
 * Copy constructor.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                const UnicodeToHexTransliterator& other) :
    Transliterator(other), prefix(other.prefix),
    uppercase(other.uppercase) {
}

/**
 * Assignment operator.
 */
UnicodeToHexTransliterator&
UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
    Transliterator::operator=(other);
    prefix = other.prefix;
    uppercase = other.uppercase;
    return *this;
}

Transliterator*
UnicodeToHexTransliterator::clone(void) const {
    return new UnicodeToHexTransliterator(*this);
}

/**
 * Returns the string that precedes the four hex digits.
 * @return prefix string
 */
const UnicodeString& UnicodeToHexTransliterator::getPrefix(void) const {
    return prefix;
}

/**
 * Sets the string that precedes the four hex digits.
 *
 * <p>Callers must take care if a transliterator is in use by
 * multiple threads.  The prefix should not be changed by one
 * thread while another thread may be transliterating.
 * @param prefix prefix string
 */
void UnicodeToHexTransliterator::setPrefix(const UnicodeString& hexPrefix) {
    prefix = hexPrefix;
}

/**
 * Returns true if this transliterator outputs uppercase hex digits.
 */
bool_t UnicodeToHexTransliterator::isUppercase(void) const {
    return uppercase;
}

/**
 * Sets if this transliterator outputs uppercase hex digits.
 *
 * <p>Callers must take care if a transliterator is in use by
 * multiple threads.  The uppercase mode should not be changed by
 * one thread while another thread may be transliterating.
 * @param outputUppercase if true, then this transliterator
 * outputs uppercase hex digits.
 */
void UnicodeToHexTransliterator::setUppercase(bool_t outputUppercase) {
    uppercase = outputUppercase;
}

/**
 * Transliterates a segment of a string.  <code>Transliterator</code> API.
 * @param text the string to be transliterated
 * @param start the beginning index, inclusive; <code>0 <= start
 * <= limit</code>.
 * @param limit the ending index, exclusive; <code>start <= limit
 * <= text.length()</code>.
 * @return the new limit index
 */
int32_t UnicodeToHexTransliterator::transliterate(Replaceable& text,
                                                  int32_t start,
                                                  int32_t limit) const {
    int32_t offsets[3] = { start, limit, start };
    handleKeyboardTransliterate(text, offsets);
    return offsets[LIMIT];
}

/**
 * Implements {@link Transliterator#handleKeyboardTransliterate}.
 */
void UnicodeToHexTransliterator::handleKeyboardTransliterate(Replaceable& text,
                                     int32_t offsets[3]) const {
    /**
     * Performs transliteration changing all characters to
     * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
     * assuming the prefix is "U+". 
     */
    int32_t cursor = offsets[CURSOR];
    int32_t limit = offsets[LIMIT];

    const UnicodeFilter* filter = getFilter();
    UnicodeString hex;

    while (cursor < limit) {
        UChar c = text.charAt(cursor);
        if (filter != 0 && !filter->isIn(c)) {
            ++cursor;
            continue;
        }
        toHex(hex, c);
        text.handleReplaceBetween(cursor, cursor+1, hex);
        int32_t len = hex.length();
        cursor += len; // Advance cursor by 1 and adjust for new text
        --len;
        limit += len;
    }

    offsets[LIMIT] = limit;
    offsets[CURSOR] = cursor;
}

/**
 * Return the length of the longest context required by this transliterator.
 * This is <em>preceding</em> context.
 * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
 * @return maximum number of preceding context characters this
 * transliterator needs to examine
 */
int32_t UnicodeToHexTransliterator::getMaximumContextLength(void) {
    return 0;
}

UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
    // If necessary, replace these character constants with their hex values
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};

/**
 * Given an integer, return its least significant hex digit.
 */
UChar UnicodeToHexTransliterator::itoh(int32_t i) const {
    i &= 0xF;
    return HEX_DIGITS[uppercase ? (i|16) : i];
}

/**
 * Form escape sequence.
 */
UnicodeString& UnicodeToHexTransliterator::toHex(UnicodeString& result,
                                                 UChar c) const {
    result = prefix;
    result.append(itoh(c >> 12));
    result.append(itoh(c >> 8));
    result.append(itoh(c >> 4));
    result.append(itoh(c));
    return result;
}
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/*`
			`**********************************************************************`
			`* Copyright (C) 1999, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 11/17/99 aliu Creation.`
			`**********************************************************************`
			`*/`
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 473 1999-12-28 23:57:50 +00:00			`#include "unicode/unitohex.h"`
			`#include "unicode/rep.h"`
			`#include "unicode/unifilt.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* ID for this transliterator.`
			`*/`
			`const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";`

			`const char* UnicodeToHexTransliterator::DEFAULT_PREFIX = "\\u";`

			`/**`
			`* Constructs a transliterator.`
			`* @param prefix the string that will precede the four hex`
			`* digits for UNICODE_HEX transliterators. Ignored`
			`* if direction is HEX_UNICODE.`
			`* @param uppercase if true, the four hex digits will be`
			`* converted to uppercase; otherwise they will be lowercase.`
			`* Ignored if direction is HEX_UNICODE.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`const UnicodeString& hexPrefix,`
			`bool_t isUppercase,`
			`UnicodeFilter* adoptedFilter) :`
			`Transliterator(_ID, adoptedFilter),`
			`prefix(hexPrefix),`
			`uppercase(isUppercase) {`
			`}`

			`/**`
			`* Constructs a transliterator with the default prefix "\u"`
			`* that outputs uppercase hex digits.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`UnicodeFilter* adoptedFilter) :`
			`Transliterator(_ID, adoptedFilter),`
			`prefix(DEFAULT_PREFIX),`
			`uppercase(TRUE) {`
			`}`

			`/**`
			`* Copy constructor.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`const UnicodeToHexTransliterator& other) :`
			`Transliterator(other), prefix(other.prefix),`
			`uppercase(other.uppercase) {`
			`}`

			`/**`
			`* Assignment operator.`
			`*/`
			`UnicodeToHexTransliterator&`
			`UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {`
			`Transliterator::operator=(other);`
			`prefix = other.prefix;`
			`uppercase = other.uppercase;`
			`return *this;`
			`}`

			`Transliterator*`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`UnicodeToHexTransliterator::clone(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return new UnicodeToHexTransliterator(*this);`
			`}`

			`/**`
			`* Returns the string that precedes the four hex digits.`
			`* @return prefix string`
			`*/`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`const UnicodeString& UnicodeToHexTransliterator::getPrefix(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return prefix;`
			`}`

			`/**`
			`* Sets the string that precedes the four hex digits.`
			`*`
			`* <p>Callers must take care if a transliterator is in use by`
			`* multiple threads. The prefix should not be changed by one`
			`* thread while another thread may be transliterating.`
			`* @param prefix prefix string`
			`*/`
			`void UnicodeToHexTransliterator::setPrefix(const UnicodeString& hexPrefix) {`
			`prefix = hexPrefix;`
			`}`

			`/**`
			`* Returns true if this transliterator outputs uppercase hex digits.`
			`*/`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`bool_t UnicodeToHexTransliterator::isUppercase(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return uppercase;`
			`}`

			`/**`
			`* Sets if this transliterator outputs uppercase hex digits.`
			`*`
			`* <p>Callers must take care if a transliterator is in use by`
			`* multiple threads. The uppercase mode should not be changed by`
			`* one thread while another thread may be transliterating.`
			`* @param outputUppercase if true, then this transliterator`
			`* outputs uppercase hex digits.`
			`*/`
			`void UnicodeToHexTransliterator::setUppercase(bool_t outputUppercase) {`
			`uppercase = outputUppercase;`
			`}`

			`/**`
			`* Transliterates a segment of a string. <code>Transliterator</code> API.`
			`* @param text the string to be transliterated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @return the new limit index`
			`*/`
			`int32_t UnicodeToHexTransliterator::transliterate(Replaceable& text,`
			`int32_t start,`
			`int32_t limit) const {`
			`int32_t offsets[3] = { start, limit, start };`
			`handleKeyboardTransliterate(text, offsets);`
			`return offsets[LIMIT];`
			`}`

			`/**`
			`* Implements {@link Transliterator#handleKeyboardTransliterate}.`
			`*/`
			`void UnicodeToHexTransliterator::handleKeyboardTransliterate(Replaceable& text,`
			`int32_t offsets[3]) const {`
			`/**`
			`* Performs transliteration changing all characters to`
			`* Unicode hexadecimal escapes. For example, '@' -> "U+0040",`
			`* assuming the prefix is "U+".`
			`*/`
			`int32_t cursor = offsets[CURSOR];`
			`int32_t limit = offsets[LIMIT];`

			`const UnicodeFilter* filter = getFilter();`
			`UnicodeString hex;`

			`while (cursor < limit) {`
			`UChar c = text.charAt(cursor);`
			`if (filter != 0 && !filter->isIn(c)) {`
			`++cursor;`
			`continue;`
			`}`
			`toHex(hex, c);`
			`text.handleReplaceBetween(cursor, cursor+1, hex);`
			`int32_t len = hex.length();`
			`cursor += len; // Advance cursor by 1 and adjust for new text`
			`--len;`
			`limit += len;`
			`}`

			`offsets[LIMIT] = limit;`
			`offsets[CURSOR] = cursor;`
			`}`

			`/**`
			`* Return the length of the longest context required by this transliterator.`
			`* This is <em>preceding</em> context.`
			`* @param direction either <code>FORWARD</code> or <code>REVERSE</code>`
			`* @return maximum number of preceding context characters this`
			`* transliterator needs to examine`
			`*/`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`int32_t UnicodeToHexTransliterator::getMaximumContextLength(void) {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return 0;`
			`}`

			`UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {`
			`// If necessary, replace these character constants with their hex values`
			`'0', '1', '2', '3', '4', '5', '6', '7',`
			`'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',`
			`'0', '1', '2', '3', '4', '5', '6', '7',`
			`'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'`
			`};`

			`/**`
			`* Given an integer, return its least significant hex digit.`
			`*/`
			`UChar UnicodeToHexTransliterator::itoh(int32_t i) const {`
			`i &= 0xF;`
			`return HEX_DIGITS[uppercase ? (i\|16) : i];`
			`}`

			`/**`
			`* Form escape sequence.`
			`*/`
			`UnicodeString& UnicodeToHexTransliterator::toHex(UnicodeString& result,`
			`UChar c) const {`
			`result = prefix;`
			`result.append(itoh(c >> 12));`
			`result.append(itoh(c >> 8));`
			`result.append(itoh(c >> 4));`
			`result.append(itoh(c));`
			`return result;`
			`}`