scuffed-code/icu4c/source/i18n/unitohex.cpp

/*
**********************************************************************
*   Copyright (C) 1999, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#include "unicode/unitohex.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"

/**
 * ID for this transliterator.
 */
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";

const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
    // Use Unicode hex values for EBCDIC compatibility
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
    0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
    0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF
};

/**
 * Constructs a transliterator.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                const UnicodeString& thePattern,
                                UBool isUppercase,
                                UnicodeFilter* adoptedFilter,
                                UErrorCode& status) :
    Transliterator(_ID, adoptedFilter),
    uppercase(isUppercase) {

    if (U_FAILURE(status)) {
        return;
    }
    applyPattern(thePattern, status);
}

/**
 * Constructs a transliterator.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                const UnicodeString& thePattern,
                                UErrorCode& status) :
    Transliterator(_ID, 0),
    uppercase(TRUE) {

    if (U_FAILURE(status)) {
        return;
    }
    applyPattern(thePattern, status);
}

/**
 * Constructs a transliterator with the default prefix "&#092;u"
 * that outputs four uppercase hex digits.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter),
    pattern("\\\\u0000", ""),
    prefix("\\u", 2, ""),
    suffix(),
    minDigits(4),
    uppercase(TRUE) {
}

/**
 * Copy constructor.
 */
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                const UnicodeToHexTransliterator& other) :
    Transliterator(other),
    pattern(other.pattern),
    prefix(other.prefix),
    suffix(other.suffix),
    minDigits(other.minDigits),
    uppercase(other.uppercase) {
}

/**
 * Assignment operator.
 */
UnicodeToHexTransliterator&
UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
    Transliterator::operator=(other);
    pattern = other.pattern;
    prefix = other.prefix;
    suffix = other.suffix;
    minDigits = other.minDigits;
    uppercase = other.uppercase;
    return *this;
}

Transliterator*
UnicodeToHexTransliterator::clone(void) const {
    return new UnicodeToHexTransliterator(*this);
}

void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,
                                              UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }

    // POSSIBILE FUTURE MODIFICATION
    // Parse thePattern, and if this succeeds, set pattern to thePattern.
    // If it fails, call applyPattern(pattern) to restore the original
    // conditions.

    pattern = thePattern;
    prefix.truncate(0);
    suffix.truncate(0);
    minDigits = 0;
    int32_t maxDigits = 0;

    /* The mode specifies where we are in each spec.
     * mode 0 = in prefix
     * mode 1 = in optional digits (#)
     * mode 2 = in required digits (0)
     * mode 3 = in suffix
     */
    int32_t mode = 0;

    for (int32_t i=0; i<pattern.length(); ++i) {
        UChar c = pattern.charAt(i);
        UBool isLiteral = FALSE;
        if (c == BACKSLASH) {
            if ((i+1)<pattern.length()) {
                isLiteral = TRUE;
                c = pattern.charAt(++i);
            } else {
                // Trailing '\\'
                status = U_ILLEGAL_ARGUMENT_ERROR;
                return;
            }
        }

        if (!isLiteral) {
            switch (c) {
            case POUND:
                // Seeing a '#' moves us from mode 0 (prefix) to mode 1
                // (optional digits).
                if (mode == 0) {
                    ++mode;
                } else if (mode != 1) {
                    // Unquoted '#'
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                ++maxDigits;
                break;
            case ZERO:
                // Seeing a '0' moves us to mode 2 (required digits)
                if (mode < 2) {
                    mode = 2;
                } else if (mode != 2) {
                    // Unquoted '0'
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                ++minDigits;
                ++maxDigits;
                break;
            default:
                isLiteral = TRUE;
                break;
            }
        }

        if (isLiteral) {
            if (mode == 0) {
                prefix.append(c);
            } else {
                // Any literal outside the prefix moves us into mode 3
                // (suffix)
                mode = 3;
                suffix.append(c);
            }
        }
    }

    if (minDigits < 1 || maxDigits > 4) {
        // Invalid min/max digit count
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
}

const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {
    return pattern;
}

/**
 * Returns true if this transliterator outputs uppercase hex digits.
 */
UBool UnicodeToHexTransliterator::isUppercase(void) const {
    return uppercase;
}

/**
 * Sets if this transliterator outputs uppercase hex digits.
 *
 * <p>Callers must take care if a transliterator is in use by
 * multiple threads.  The uppercase mode should not be changed by
 * one thread while another thread may be transliterating.
 * @param outputUppercase if true, then this transliterator
 * outputs uppercase hex digits.
 */
void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {
    uppercase = outputUppercase;
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                     UBool /*isIncremental*/) const {
    /**
     * Performs transliteration changing all characters to
     * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
     * assuming the prefix is "U+". 
     */
    int32_t cursor = offsets.start;
    int32_t limit = offsets.limit;

    const UnicodeFilter* filter = getFilter();
    UnicodeString hex;

    while (cursor < limit) {
        UChar c = text.charAt(cursor);
        if (filter != 0 && !filter->contains(c)) {
            ++cursor;
            continue;
        }

        hex = prefix;
        UBool showRest = FALSE;
        for (int32_t i=3; i>=0; --i) {
            int32_t d = (c >> (i*4)) & 0xF;
            if (showRest || (d != 0) || minDigits > i) {
                hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
                showRest = TRUE;
            }
        }
        hex.append(suffix);

        text.handleReplaceBetween(cursor, cursor+1, hex);
        int32_t len = hex.length();
        cursor += len; // Advance cursor by 1 and adjust for new text
        --len;
        limit += len;
    }

    offsets.contextLimit += limit - offsets.limit;
    offsets.limit = limit;
    offsets.start = cursor;
}
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/*`
			`**********************************************************************`
			`* Copyright (C) 1999, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 11/17/99 aliu Creation.`
			`**********************************************************************`
			`*/`
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 473 1999-12-28 23:57:50 +00:00			`#include "unicode/unitohex.h"`
			`#include "unicode/rep.h"`
			`#include "unicode/unifilt.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* ID for this transliterator.`
			`*/`
			`const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";`

ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {`
			`// Use Unicode hex values for EBCDIC compatibility`
			`0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567`
			`0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef`
			`0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567`
			`0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF`
			`};`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* Constructs a transliterator.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`const UnicodeString& thePattern,`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool isUppercase,`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`UnicodeFilter* adoptedFilter,`
			`UErrorCode& status) :`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`Transliterator(_ID, adoptedFilter),`
			`uppercase(isUppercase) {`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00
			`if (U_FAILURE(status)) {`
			`return;`
			`}`
			`applyPattern(thePattern, status);`
			`}`

			`/**`
			`* Constructs a transliterator.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`const UnicodeString& thePattern,`
			`UErrorCode& status) :`
			`Transliterator(_ID, 0),`
			`uppercase(TRUE) {`

			`if (U_FAILURE(status)) {`
			`return;`
			`}`
			`applyPattern(thePattern, status);`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`}`

			`/**`
			`* Constructs a transliterator with the default prefix "\u"`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`* that outputs four uppercase hex digits.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`UnicodeFilter* adoptedFilter) :`
			`Transliterator(_ID, adoptedFilter),`
ICU-432 ctor didn't use invariant conversion X-SVN-Rev: 2095 2000-08-02 19:05:12 +00:00			`pattern("\\\\u0000", ""),`
			`prefix("\\u", 2, ""),`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`suffix(),`
			`minDigits(4),`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`uppercase(TRUE) {`
			`}`

			`/**`
			`* Copy constructor.`
			`*/`
			`UnicodeToHexTransliterator::UnicodeToHexTransliterator(`
			`const UnicodeToHexTransliterator& other) :`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`Transliterator(other),`
			`pattern(other.pattern),`
			`prefix(other.prefix),`
			`suffix(other.suffix),`
			`minDigits(other.minDigits),`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`uppercase(other.uppercase) {`
			`}`

			`/**`
			`* Assignment operator.`
			`*/`
			`UnicodeToHexTransliterator&`
			`UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {`
			`Transliterator::operator=(other);`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`pattern = other.pattern;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`prefix = other.prefix;`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`suffix = other.suffix;`
			`minDigits = other.minDigits;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`uppercase = other.uppercase;`
			`return *this;`
			`}`

			`Transliterator*`
ICU-200 Updated with OS/400 specific port changes. X-SVN-Rev: 459 1999-12-22 22:57:04 +00:00			`UnicodeToHexTransliterator::clone(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return new UnicodeToHexTransliterator(*this);`
			`}`

ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,`
			`UErrorCode& status) {`
			`if (U_FAILURE(status)) {`
			`return;`
			`}`

			`// POSSIBILE FUTURE MODIFICATION`
			`// Parse thePattern, and if this succeeds, set pattern to thePattern.`
			`// If it fails, call applyPattern(pattern) to restore the original`
			`// conditions.`

			`pattern = thePattern;`
			`prefix.truncate(0);`
			`suffix.truncate(0);`
			`minDigits = 0;`
			`int32_t maxDigits = 0;`

			`/* The mode specifies where we are in each spec.`
			`* mode 0 = in prefix`
			`* mode 1 = in optional digits (#)`
			`* mode 2 = in required digits (0)`
			`* mode 3 = in suffix`
			`*/`
			`int32_t mode = 0;`

			`for (int32_t i=0; i<pattern.length(); ++i) {`
			`UChar c = pattern.charAt(i);`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool isLiteral = FALSE;`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`if (c == BACKSLASH) {`
			`if ((i+1)<pattern.length()) {`
			`isLiteral = TRUE;`
			`c = pattern.charAt(++i);`
			`} else {`
			`// Trailing '\\'`
			`status = U_ILLEGAL_ARGUMENT_ERROR;`
			`return;`
			`}`
			`}`

			`if (!isLiteral) {`
			`switch (c) {`
			`case POUND:`
			`// Seeing a '#' moves us from mode 0 (prefix) to mode 1`
			`// (optional digits).`
			`if (mode == 0) {`
			`++mode;`
			`} else if (mode != 1) {`
			`// Unquoted '#'`
			`status = U_ILLEGAL_ARGUMENT_ERROR;`
			`return;`
			`}`
			`++maxDigits;`
			`break;`
			`case ZERO:`
			`// Seeing a '0' moves us to mode 2 (required digits)`
			`if (mode < 2) {`
			`mode = 2;`
			`} else if (mode != 2) {`
			`// Unquoted '0'`
			`status = U_ILLEGAL_ARGUMENT_ERROR;`
			`return;`
			`}`
			`++minDigits;`
			`++maxDigits;`
			`break;`
			`default:`
			`isLiteral = TRUE;`
			`break;`
			`}`
			`}`

			`if (isLiteral) {`
			`if (mode == 0) {`
			`prefix.append(c);`
			`} else {`
			`// Any literal outside the prefix moves us into mode 3`
			`// (suffix)`
			`mode = 3;`
			`suffix.append(c);`
			`}`
			`}`
			`}`

			`if (minDigits < 1 \|\| maxDigits > 4) {`
			`// Invalid min/max digit count`
			`status = U_ILLEGAL_ARGUMENT_ERROR;`
			`return;`
			`}`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`}`

ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {`
			`return pattern;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`}`

			`/**`
			`* Returns true if this transliterator outputs uppercase hex digits.`
			`*/`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool UnicodeToHexTransliterator::isUppercase(void) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`return uppercase;`
			`}`

			`/**`
			`* Sets if this transliterator outputs uppercase hex digits.`
			`*`
			`* <p>Callers must take care if a transliterator is in use by`
			`* multiple threads. The uppercase mode should not be changed by`
			`* one thread while another thread may be transliterating.`
			`* @param outputUppercase if true, then this transliterator`
			`* outputs uppercase hex digits.`
			`*/`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`uppercase = outputUppercase;`
			`}`

			`/**`
ICU-199 remove "keyboard" from method names, make max context len a member of Transliterator set by subclasses X-SVN-Rev: 629 2000-01-18 18:27:27 +00:00			`* Implements {@link Transliterator#handleTransliterate}.`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`*/`
ICU-450 change ParseError, Transliterator::Direction, Transliterator::Position to C structs X-SVN-Rev: 1655 2000-06-27 19:00:38 +00:00			`void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,`
ICU-535 fix MSVC level 4 warnings X-SVN-Rev: 2259 2000-08-15 18:25:20 +00:00			`UBool /isIncremental/) const {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/**`
			`* Performs transliteration changing all characters to`
			`* Unicode hexadecimal escapes. For example, '@' -> "U+0040",`
			`* assuming the prefix is "U+".`
			`*/`
ICU-450 fix obviously erroneous usage of UTransPosition fields X-SVN-Rev: 1659 2000-06-27 20:06:52 +00:00			`int32_t cursor = offsets.start;`
ICU-199 cleanup API, change int[] to Position X-SVN-Rev: 649 2000-01-19 19:02:10 +00:00			`int32_t limit = offsets.limit;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`const UnicodeFilter* filter = getFilter();`
			`UnicodeString hex;`

			`while (cursor < limit) {`
			`UChar c = text.charAt(cursor);`
ICU-199 UnicodeSet inherits from UnicodeFilter X-SVN-Rev: 634 2000-01-18 20:00:56 +00:00			`if (filter != 0 && !filter->contains(c)) {`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`++cursor;`
			`continue;`
			`}`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00
			`hex = prefix;`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool showRest = FALSE;`
ICU-243 add prefix/suffix support to Hex-Unicode and Unicode-Hex X-SVN-Rev: 916 2000-03-08 19:26:17 +00:00			`for (int32_t i=3; i>=0; --i) {`
			`int32_t d = (c >> (i*4)) & 0xF;`
			`if (showRest \|\| (d != 0) \|\| minDigits > i) {`
			`hex.append(HEX_DIGITS[uppercase ? (d\|16) : d]);`
			`showRest = TRUE;`
			`}`
			`}`
			`hex.append(suffix);`

ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`text.handleReplaceBetween(cursor, cursor+1, hex);`
			`int32_t len = hex.length();`
			`cursor += len; // Advance cursor by 1 and adjust for new text`
			`--len;`
			`limit += len;`
			`}`

ICU-450 fix obviously erroneous usage of UTransPosition fields X-SVN-Rev: 1659 2000-06-27 20:06:52 +00:00			`offsets.contextLimit += limit - offsets.limit;`
ICU-199 cleanup API, change int[] to Position X-SVN-Rev: 649 2000-01-19 19:02:10 +00:00			`offsets.limit = limit;`
ICU-450 fix obviously erroneous usage of UTransPosition fields X-SVN-Rev: 1659 2000-06-27 20:06:52 +00:00			`offsets.start = cursor;`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`}`