scuffed-code/icu4c/source/i18n/hextouni.cpp

/*
**********************************************************************
*   Copyright (C) 1999, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#include "hextouni.h"
#include "rep.h"
#include "unifilt.h"
#include "unicode.h"

/**
 * ID for this transliterator.
 */
const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";

/**
 * Constructs a transliterator.
 */
HexToUnicodeTransliterator::HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(_ID, adoptedFilter) {
}

/**
 * Copy constructor.
 */
HexToUnicodeTransliterator::HexToUnicodeTransliterator(const HexToUnicodeTransliterator& o) :
    Transliterator(o) {
}

/**
 * Assignment operator.
 */
HexToUnicodeTransliterator& HexToUnicodeTransliterator::operator=(
                                             const HexToUnicodeTransliterator& o) {
    Transliterator::operator=(o);
    return *this;
}

/**
 * Transliterator API.
 */
Transliterator* HexToUnicodeTransliterator::clone() const {
    return new HexToUnicodeTransliterator(*this);
}

/**
 * Transliterates a segment of a string.  <code>Transliterator</code> API.
 * @param text the string to be transliterated
 * @param start the beginning index, inclusive; <code>0 <= start
 * <= limit</code>.
 * @param limit the ending index, exclusive; <code>start <= limit
 * <= text.length()</code>.
 * @return the new limit index
 */
int32_t HexToUnicodeTransliterator::transliterate(Replaceable& text,
                                                  int32_t start, int32_t limit) const {
    int32_t offsets[3] = { start, limit, start };
    handleKeyboardTransliterate(text, offsets);
    return offsets[LIMIT];
}

/**
 * Implements {@link Transliterator#handleKeyboardTransliterate}.
 */
void HexToUnicodeTransliterator::handleKeyboardTransliterate(Replaceable& text,
                                                             int32_t offsets[3]) const {
    /**
     * Performs transliteration changing Unicode hexadecimal
     * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
     * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
     */
    int32_t cursor = offsets[CURSOR];
    int32_t limit = offsets[LIMIT];

    int32_t maxCursor = limit - 6;

    while (cursor <= maxCursor) {
        UChar c = filteredCharAt(text, cursor + 5);
        int32_t digit0 = Unicode::digit(c, 16);
        if (digit0 < 0) {
            if (c == '\\') {
                cursor += 5;
            } else if (c == 'U' || c == 'u' || c == '+') {
                cursor += 4;
            } else {
                cursor += 6;
            }
            continue;
        }

        int32_t u = digit0;
        bool_t toTop = FALSE;

        for (int32_t i=4; i>=2; --i) {
            c = filteredCharAt(text, cursor + i);
            int32_t digit = Unicode::digit(c, 16);
            if (digit < 0) {
                if (c == 'U' || c == 'u' || c == '+') {
                    cursor += i-1;
                } else {
                    cursor += 6;
                }
                toTop = TRUE; // This is a little awkward -- it was a "continue loop:"
                break;        // statement in Java, where loop marked the while().
            } else {
                u |= digit << (4 * (5-i));
            }
        }

        if (toTop) {
            continue;
        }

        c = filteredCharAt(text, cursor);
        UChar d = filteredCharAt(text, cursor + 1);
        if (((c == 'U' || c == 'u') && d == '+')
            || (c == '\\' && (d == 'U' || d == 'u'))) {
            
            // At this point, we have a match; replace cursor..cursor+5
            // with u.
            text.handleReplaceBetween(cursor, cursor+6, UnicodeString((UChar)u));
            limit -= 5;
            maxCursor -= 5;

            ++cursor;
        } else {
            cursor += 6;
        }
    }

    offsets[LIMIT] = limit;
    offsets[CURSOR] = cursor;
}

UChar HexToUnicodeTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
    UChar c;
    const UnicodeFilter* filter = getFilter();
    return (filter == 0) ? text.charAt(i) :
        (filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
}

/**
 * Return the length of the longest context required by this transliterator.
 * This is <em>preceding</em> context.
 * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
 * @return maximum number of preceding context characters this
 * transliterator needs to examine
 */
int32_t HexToUnicodeTransliterator::getMaximumContextLength() const {
    return 0;
}
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`/*`
			`**********************************************************************`
			`* Copyright (C) 1999, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Date Name Description`
			`* 11/17/99 aliu Creation.`
			`**********************************************************************`
			`*/`
			`#include "hextouni.h"`
			`#include "rep.h"`
			`#include "unifilt.h"`
ICU-44 Update transliterator code to use Unicode::digit. X-SVN-Rev: 232 1999-11-23 01:31:13 +00:00			`#include "unicode.h"`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00
			`/**`
			`* ID for this transliterator.`
			`*/`
			`const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";`

			`/**`
			`* Constructs a transliterator.`
			`*/`
			`HexToUnicodeTransliterator::HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter) :`
			`Transliterator(_ID, adoptedFilter) {`
			`}`

			`/**`
			`* Copy constructor.`
			`*/`
			`HexToUnicodeTransliterator::HexToUnicodeTransliterator(const HexToUnicodeTransliterator& o) :`
			`Transliterator(o) {`
			`}`

			`/**`
			`* Assignment operator.`
			`*/`
			`HexToUnicodeTransliterator& HexToUnicodeTransliterator::operator=(`
			`const HexToUnicodeTransliterator& o) {`
			`Transliterator::operator=(o);`
			`return *this;`
			`}`

			`/**`
			`* Transliterator API.`
			`*/`
			`Transliterator* HexToUnicodeTransliterator::clone() const {`
			`return new HexToUnicodeTransliterator(*this);`
			`}`

			`/**`
			`* Transliterates a segment of a string. <code>Transliterator</code> API.`
			`* @param text the string to be transliterated`
			`* @param start the beginning index, inclusive; <code>0 <= start`
			`* <= limit</code>.`
			`* @param limit the ending index, exclusive; <code>start <= limit`
			`* <= text.length()</code>.`
			`* @return the new limit index`
			`*/`
			`int32_t HexToUnicodeTransliterator::transliterate(Replaceable& text,`
			`int32_t start, int32_t limit) const {`
			`int32_t offsets[3] = { start, limit, start };`
			`handleKeyboardTransliterate(text, offsets);`
			`return offsets[LIMIT];`
			`}`

			`/**`
			`* Implements {@link Transliterator#handleKeyboardTransliterate}.`
			`*/`
			`void HexToUnicodeTransliterator::handleKeyboardTransliterate(Replaceable& text,`
			`int32_t offsets[3]) const {`
			`/**`
			`* Performs transliteration changing Unicode hexadecimal`
			`* escapes to characters. For example, "U+0040" -> '@'. A fixed`
			`* set of prefixes is recognized: "\u", "\U", "u+", "U+".`
			`*/`
			`int32_t cursor = offsets[CURSOR];`
			`int32_t limit = offsets[LIMIT];`

			`int32_t maxCursor = limit - 6;`

			`while (cursor <= maxCursor) {`
			`UChar c = filteredCharAt(text, cursor + 5);`
ICU-44 Update transliterator code to use Unicode::digit. X-SVN-Rev: 232 1999-11-23 01:31:13 +00:00			`int32_t digit0 = Unicode::digit(c, 16);`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`if (digit0 < 0) {`
			`if (c == '\\') {`
			`cursor += 5;`
			`} else if (c == 'U' \|\| c == 'u' \|\| c == '+') {`
			`cursor += 4;`
			`} else {`
			`cursor += 6;`
			`}`
			`continue;`
			`}`

			`int32_t u = digit0;`
			`bool_t toTop = FALSE;`

			`for (int32_t i=4; i>=2; --i) {`
			`c = filteredCharAt(text, cursor + i);`
ICU-44 Update transliterator code to use Unicode::digit. X-SVN-Rev: 232 1999-11-23 01:31:13 +00:00			`int32_t digit = Unicode::digit(c, 16);`
ICU-114 Transliterator framework first working version X-SVN-Rev: 194 1999-11-20 00:40:50 +00:00			`if (digit < 0) {`
			`if (c == 'U' \|\| c == 'u' \|\| c == '+') {`
			`cursor += i-1;`
			`} else {`
			`cursor += 6;`
			`}`
			`toTop = TRUE; // This is a little awkward -- it was a "continue loop:"`
			`break; // statement in Java, where loop marked the while().`
			`} else {`
			`u \|= digit << (4 * (5-i));`
			`}`
			`}`

			`if (toTop) {`
			`continue;`
			`}`

			`c = filteredCharAt(text, cursor);`
			`UChar d = filteredCharAt(text, cursor + 1);`
			`if (((c == 'U' \|\| c == 'u') && d == '+')`
			`\|\| (c == '\\' && (d == 'U' \|\| d == 'u'))) {`

			`// At this point, we have a match; replace cursor..cursor+5`
			`// with u.`
			`text.handleReplaceBetween(cursor, cursor+6, UnicodeString((UChar)u));`
			`limit -= 5;`
			`maxCursor -= 5;`

			`++cursor;`
			`} else {`
			`cursor += 6;`
			`}`
			`}`

			`offsets[LIMIT] = limit;`
			`offsets[CURSOR] = cursor;`
			`}`

			`UChar HexToUnicodeTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {`
			`UChar c;`
			`const UnicodeFilter* filter = getFilter();`
			`return (filter == 0) ? text.charAt(i) :`
			`(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);`
			`}`

			`/**`
			`* Return the length of the longest context required by this transliterator.`
			`* This is <em>preceding</em> context.`
			`* @param direction either <code>FORWARD</code> or <code>REVERSE</code>`
			`* @return maximum number of preceding context characters this`
			`* transliterator needs to examine`
			`*/`
			`int32_t HexToUnicodeTransliterator::getMaximumContextLength() const {`
			`return 0;`
			`}`