ICU-1533 incorporate Mark's review comments; move escape handling methods to Utility

X-SVN-Rev: 7067
2001-11-21 22:43:21 +00:00 · 2001-11-21 22:43:21 +00:00 · 2280780824
commit 2280780824
parent e28956def1
5 changed files with 67 additions and 49 deletions
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -15,6 +15,7 @@
 #include "unicode/unicode.h"
 #include "cmemory.h"
 #include "strmatch.h"
+#include "util.h"

 static const UChar APOSTROPHE = 0x0027; // '\''
 static const UChar BACKSLASH  = 0x005C; // '\' 
@ -574,7 +575,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
    // quotes.  \u and \U are not recognized within quotes.  The same
    // logic applies to literals, but literals are never escaped.
    if (isLiteral ||
-        (escapeUnprintable && UnicodeSet::_isUnprintable(c))) {
+        (escapeUnprintable && Utility::isUnprintable(c))) {
        if (quoteBuf.length() > 0) {
            // We prefer backslash APOSTROPHE to double APOSTROPHE
            // (more readable, less similar to ") so if there are
@ -609,7 +610,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
            }
        }
        if (c != (UChar32)-1) {
-            if (!escapeUnprintable || !UnicodeSet::_escapeUnprintable(rule, c)) {
+            if (!escapeUnprintable || !Utility::escapeUnprintable(rule, c)) {
                rule.append(c);
            }
        }
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -37,6 +37,7 @@
 #include "unicode/uscript.h"
 #include "esctrn.h"
 #include "unesctrn.h"
+#include "util.h"


 // keep in sync with CompoundTransliterator
@ -915,7 +916,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
        UnicodeString id = getID();
        for (int32_t i=0; i<id.length();) {
            UChar32 c = id.char32At(i);
-            if (!UnicodeSet::_escapeUnprintable(rulesSource, c)) {
+            if (!Utility::escapeUnprintable(rulesSource, c)) {
                rulesSource.append(c);
            }
            i += UTF_CHAR_LENGTH(c);
--- a/icu4c/source/i18n/uniset.cpp
+++ b/icu4c/source/i18n/uniset.cpp
@ -18,6 +18,7 @@
 #include "umutex.h"
 #include "ucln_in.h"
 #include "upropset.h"
+#include "util.h"

 // HIGH_VALUE > all valid values. 110000 for codepoints
 #define UNICODESET_HIGH 0x0110000
@ -387,7 +388,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
    if (useHexEscape) {
        // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
        // unprintable
-        if (_escapeUnprintable(buf, c)) {
+        if (Utility::escapeUnprintable(buf, c)) {
            return;
        }
    }
@ -413,49 +414,6 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
    buf.append((UChar) c);
 }

-static const UChar HEX[16] = {48,49,50,51,52,53,54,55,  // 0-7
-                              56,57,65,66,67,68,69,70}; // 8-9 A-F
-
-/**
- * Return true if the character is NOT printable ASCII.
- *
- * This method should really be in UnicodeString (or similar).  For
- * now, we implement it here and share it with friend classes.
- */
-UBool UnicodeSet::_isUnprintable(UChar32 c) {
-    return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
-}
-
-/**
- * Escape unprintable characters using \uxxxx notation for U+0000 to
- * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
- * printable ASCII, then do nothing and return FALSE.  Otherwise,
- * append the escaped notation and return TRUE.
- *
- * This method should really be in UnicodeString.  For now, we
- * implement it here and share it with friend classes.
- */
-UBool UnicodeSet::_escapeUnprintable(UnicodeString& result, UChar32 c) {
-    if (_isUnprintable(c)) {
-        result.append(BACKSLASH);
-        if (c & ~0xFFFF) {
-            result.append(UPPER_U);
-            result.append(HEX[0xF&(c>>28)]);
-            result.append(HEX[0xF&(c>>24)]);
-            result.append(HEX[0xF&(c>>20)]);
-            result.append(HEX[0xF&(c>>16)]);
-        } else {
-            result.append(LOWER_U);
-        }
-        result.append(HEX[0xF&(c>>12)]);
-        result.append(HEX[0xF&(c>>8)]);
-        result.append(HEX[0xF&(c>>4)]);
-        result.append(HEX[0xF&c]);
-        return TRUE;
-    }
-    return FALSE;
-}
-
 /**
 * Returns a string representation of this set.  If the result of
 * calling this function is passed to a UnicodeSet constructor, it
@ -479,7 +437,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
        int32_t backslashCount = 0;
        for (i=0; i<pat.length(); ++i) {
            UChar c = pat.charAt(i);
-            if (escapeUnprintable && _isUnprintable(c)) {
+            if (escapeUnprintable && Utility::isUnprintable(c)) {
                // If the unprintable character is preceded by an odd
                // number of backslashes, then it has been escaped.
                // Before unescaping it, we delete the final
@ -487,7 +445,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
                if ((backslashCount % 2) == 1) {
                    result.truncate(result.length() - 1);
                }
-                _escapeUnprintable(result, c);
+                Utility::escapeUnprintable(result, c);
                backslashCount = 0;
            } else {
                result.append(c);
--- a/icu4c/source/i18n/util.cpp
+++ b/icu4c/source/i18n/util.cpp
@ -10,6 +10,12 @@

 #include "util.h"

+// Define UChar constants using hex for EBCDIC compatibility
+// Used #define to reduce private static exports and memory access time.
+#define BACKSLASH       ((UChar)0x005C) /*\*/
+#define UPPER_U         ((UChar)0x0055) /*U*/
+#define LOWER_U         ((UChar)0x0075) /*u*/
+
 // "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 static const UChar DIGITS[] = {
    48,49,50,51,52,53,54,55,56,57,
@ -50,4 +56,41 @@ UnicodeString& Utility::appendNumber(UnicodeString& result, int32_t n,
    return result;
 }

+static const UChar HEX[16] = {48,49,50,51,52,53,54,55,  // 0-7
+                              56,57,65,66,67,68,69,70}; // 8-9 A-F
+
+/**
+ * Return true if the character is NOT printable ASCII.
+ */
+UBool Utility::isUnprintable(UChar32 c) {
+    return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
+}
+
+/**
+ * Escape unprintable characters using \uxxxx notation for U+0000 to
+ * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
+ * printable ASCII, then do nothing and return FALSE.  Otherwise,
+ * append the escaped notation and return TRUE.
+ */
+UBool Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
+    if (isUnprintable(c)) {
+        result.append(BACKSLASH);
+        if (c & ~0xFFFF) {
+            result.append(UPPER_U);
+            result.append(HEX[0xF&(c>>28)]);
+            result.append(HEX[0xF&(c>>24)]);
+            result.append(HEX[0xF&(c>>20)]);
+            result.append(HEX[0xF&(c>>16)]);
+        } else {
+            result.append(LOWER_U);
+        }
+        result.append(HEX[0xF&(c>>12)]);
+        result.append(HEX[0xF&(c>>8)]);
+        result.append(HEX[0xF&(c>>4)]);
+        result.append(HEX[0xF&c]);
+        return TRUE;
+    }
+    return FALSE;
+}
+
 //eof
--- a/icu4c/source/i18n/util.h
+++ b/icu4c/source/i18n/util.h
@ -40,6 +40,21 @@ class Utility {
                                       int32_t radix = 10,
                                       int32_t minDigits = 1);

+    /**
+     * Return true if the character is NOT printable ASCII.
+     *
+     * This method should really be in UnicodeString (or similar).  For
+     * now, we implement it here and share it with friend classes.
+     */
+    static UBool isUnprintable(UChar32 c);
+
+    /**
+     * Escape unprintable characters using \uxxxx notation for U+0000 to
+     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
+     * printable ASCII, then do nothing and return FALSE.  Otherwise,
+     * append the escaped notation and return TRUE.
+     */
+    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
 };

 U_NAMESPACE_END