From 22807808245f88caa6ebdeb9897b04c7fc94f6cd Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Wed, 21 Nov 2001 22:43:21 +0000 Subject: [PATCH] ICU-1533 incorporate Mark's review comments; move escape handling methods to Utility X-SVN-Rev: 7067 --- icu4c/source/i18n/rbt_rule.cpp | 5 ++-- icu4c/source/i18n/translit.cpp | 3 +- icu4c/source/i18n/uniset.cpp | 50 +++------------------------------- icu4c/source/i18n/util.cpp | 43 +++++++++++++++++++++++++++++ icu4c/source/i18n/util.h | 15 ++++++++++ 5 files changed, 67 insertions(+), 49 deletions(-) diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 6c776f10f3..d69465bd67 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -15,6 +15,7 @@ #include "unicode/unicode.h" #include "cmemory.h" #include "strmatch.h" +#include "util.h" static const UChar APOSTROPHE = 0x0027; // '\'' static const UChar BACKSLASH = 0x005C; // '\' @@ -574,7 +575,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule, // quotes. \u and \U are not recognized within quotes. The same // logic applies to literals, but literals are never escaped. if (isLiteral || - (escapeUnprintable && UnicodeSet::_isUnprintable(c))) { + (escapeUnprintable && Utility::isUnprintable(c))) { if (quoteBuf.length() > 0) { // We prefer backslash APOSTROPHE to double APOSTROPHE // (more readable, less similar to ") so if there are @@ -609,7 +610,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule, } } if (c != (UChar32)-1) { - if (!escapeUnprintable || !UnicodeSet::_escapeUnprintable(rule, c)) { + if (!escapeUnprintable || !Utility::escapeUnprintable(rule, c)) { rule.append(c); } } diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index f0e986141a..e85ec39e36 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -37,6 +37,7 @@ #include "unicode/uscript.h" #include "esctrn.h" #include "unesctrn.h" +#include "util.h" // keep in sync with CompoundTransliterator @@ -915,7 +916,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource, UnicodeString id = getID(); for (int32_t i=0; i all valid values. 110000 for codepoints #define UNICODESET_HIGH 0x0110000 @@ -387,7 +388,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape) if (useHexEscape) { // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything // unprintable - if (_escapeUnprintable(buf, c)) { + if (Utility::escapeUnprintable(buf, c)) { return; } } @@ -413,49 +414,6 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape) buf.append((UChar) c); } -static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7 - 56,57,65,66,67,68,69,70}; // 8-9 A-F - -/** - * Return true if the character is NOT printable ASCII. - * - * This method should really be in UnicodeString (or similar). For - * now, we implement it here and share it with friend classes. - */ -UBool UnicodeSet::_isUnprintable(UChar32 c) { - return !(c == 0x0A || (c >= 0x20 && c <= 0x7E)); -} - -/** - * Escape unprintable characters using \uxxxx notation for U+0000 to - * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is - * printable ASCII, then do nothing and return FALSE. Otherwise, - * append the escaped notation and return TRUE. - * - * This method should really be in UnicodeString. For now, we - * implement it here and share it with friend classes. - */ -UBool UnicodeSet::_escapeUnprintable(UnicodeString& result, UChar32 c) { - if (_isUnprintable(c)) { - result.append(BACKSLASH); - if (c & ~0xFFFF) { - result.append(UPPER_U); - result.append(HEX[0xF&(c>>28)]); - result.append(HEX[0xF&(c>>24)]); - result.append(HEX[0xF&(c>>20)]); - result.append(HEX[0xF&(c>>16)]); - } else { - result.append(LOWER_U); - } - result.append(HEX[0xF&(c>>12)]); - result.append(HEX[0xF&(c>>8)]); - result.append(HEX[0xF&(c>>4)]); - result.append(HEX[0xF&c]); - return TRUE; - } - return FALSE; -} - /** * Returns a string representation of this set. If the result of * calling this function is passed to a UnicodeSet constructor, it @@ -479,7 +437,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, int32_t backslashCount = 0; for (i=0; i= 0x20 && c <= 0x7E)); +} + +/** + * Escape unprintable characters using \uxxxx notation for U+0000 to + * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is + * printable ASCII, then do nothing and return FALSE. Otherwise, + * append the escaped notation and return TRUE. + */ +UBool Utility::escapeUnprintable(UnicodeString& result, UChar32 c) { + if (isUnprintable(c)) { + result.append(BACKSLASH); + if (c & ~0xFFFF) { + result.append(UPPER_U); + result.append(HEX[0xF&(c>>28)]); + result.append(HEX[0xF&(c>>24)]); + result.append(HEX[0xF&(c>>20)]); + result.append(HEX[0xF&(c>>16)]); + } else { + result.append(LOWER_U); + } + result.append(HEX[0xF&(c>>12)]); + result.append(HEX[0xF&(c>>8)]); + result.append(HEX[0xF&(c>>4)]); + result.append(HEX[0xF&c]); + return TRUE; + } + return FALSE; +} + //eof diff --git a/icu4c/source/i18n/util.h b/icu4c/source/i18n/util.h index 728d84a5e2..9f0117a0ef 100644 --- a/icu4c/source/i18n/util.h +++ b/icu4c/source/i18n/util.h @@ -40,6 +40,21 @@ class Utility { int32_t radix = 10, int32_t minDigits = 1); + /** + * Return true if the character is NOT printable ASCII. + * + * This method should really be in UnicodeString (or similar). For + * now, we implement it here and share it with friend classes. + */ + static UBool isUnprintable(UChar32 c); + + /** + * Escape unprintable characters using \uxxxx notation for U+0000 to + * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is + * printable ASCII, then do nothing and return FALSE. Otherwise, + * append the escaped notation and return TRUE. + */ + static UBool escapeUnprintable(UnicodeString& result, UChar32 c); }; U_NAMESPACE_END