From 22807808245f88caa6ebdeb9897b04c7fc94f6cd Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Wed, 21 Nov 2001 22:43:21 +0000
Subject: [PATCH] ICU-1533 incorporate Mark's review comments; move escape
 handling methods to Utility

X-SVN-Rev: 7067
---
 icu4c/source/i18n/rbt_rule.cpp |  5 ++--
 icu4c/source/i18n/translit.cpp |  3 +-
 icu4c/source/i18n/uniset.cpp   | 50 +++-------------------------------
 icu4c/source/i18n/util.cpp     | 43 +++++++++++++++++++++++++++++
 icu4c/source/i18n/util.h       | 15 ++++++++++
 5 files changed, 67 insertions(+), 49 deletions(-)

diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp
index 6c776f10f3..d69465bd67 100644
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@@ -15,6 +15,7 @@
 #include "unicode/unicode.h"
 #include "cmemory.h"
 #include "strmatch.h"
+#include "util.h"
 
 static const UChar APOSTROPHE = 0x0027; // '\''
 static const UChar BACKSLASH  = 0x005C; // '\' 
@@ -574,7 +575,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
     // quotes.  \u and \U are not recognized within quotes.  The same
     // logic applies to literals, but literals are never escaped.
     if (isLiteral ||
-        (escapeUnprintable && UnicodeSet::_isUnprintable(c))) {
+        (escapeUnprintable && Utility::isUnprintable(c))) {
         if (quoteBuf.length() > 0) {
             // We prefer backslash APOSTROPHE to double APOSTROPHE
             // (more readable, less similar to ") so if there are
@@ -609,7 +610,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
             }
         }
         if (c != (UChar32)-1) {
-            if (!escapeUnprintable || !UnicodeSet::_escapeUnprintable(rule, c)) {
+            if (!escapeUnprintable || !Utility::escapeUnprintable(rule, c)) {
                 rule.append(c);
             }
         }
diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp
index f0e986141a..e85ec39e36 100644
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@@ -37,6 +37,7 @@
 #include "unicode/uscript.h"
 #include "esctrn.h"
 #include "unesctrn.h"
+#include "util.h"
 
 
 // keep in sync with CompoundTransliterator
@@ -915,7 +916,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
         UnicodeString id = getID();
         for (int32_t i=0; i<id.length();) {
             UChar32 c = id.char32At(i);
-            if (!UnicodeSet::_escapeUnprintable(rulesSource, c)) {
+            if (!Utility::escapeUnprintable(rulesSource, c)) {
                 rulesSource.append(c);
             }
             i += UTF_CHAR_LENGTH(c);
diff --git a/icu4c/source/i18n/uniset.cpp b/icu4c/source/i18n/uniset.cpp
index 5970d1d577..528281bedc 100644
--- a/icu4c/source/i18n/uniset.cpp
+++ b/icu4c/source/i18n/uniset.cpp
@@ -18,6 +18,7 @@
 #include "umutex.h"
 #include "ucln_in.h"
 #include "upropset.h"
+#include "util.h"
 
 // HIGH_VALUE > all valid values. 110000 for codepoints
 #define UNICODESET_HIGH 0x0110000
@@ -387,7 +388,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
     if (useHexEscape) {
         // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
         // unprintable
-        if (_escapeUnprintable(buf, c)) {
+        if (Utility::escapeUnprintable(buf, c)) {
             return;
         }
     }
@@ -413,49 +414,6 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
     buf.append((UChar) c);
 }
 
-static const UChar HEX[16] = {48,49,50,51,52,53,54,55,  // 0-7
-                              56,57,65,66,67,68,69,70}; // 8-9 A-F
-
-/**
- * Return true if the character is NOT printable ASCII.
- *
- * This method should really be in UnicodeString (or similar).  For
- * now, we implement it here and share it with friend classes.
- */
-UBool UnicodeSet::_isUnprintable(UChar32 c) {
-    return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
-}
-
-/**
- * Escape unprintable characters using \uxxxx notation for U+0000 to
- * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
- * printable ASCII, then do nothing and return FALSE.  Otherwise,
- * append the escaped notation and return TRUE.
- *
- * This method should really be in UnicodeString.  For now, we
- * implement it here and share it with friend classes.
- */
-UBool UnicodeSet::_escapeUnprintable(UnicodeString& result, UChar32 c) {
-    if (_isUnprintable(c)) {
-        result.append(BACKSLASH);
-        if (c & ~0xFFFF) {
-            result.append(UPPER_U);
-            result.append(HEX[0xF&(c>>28)]);
-            result.append(HEX[0xF&(c>>24)]);
-            result.append(HEX[0xF&(c>>20)]);
-            result.append(HEX[0xF&(c>>16)]);
-        } else {
-            result.append(LOWER_U);
-        }
-        result.append(HEX[0xF&(c>>12)]);
-        result.append(HEX[0xF&(c>>8)]);
-        result.append(HEX[0xF&(c>>4)]);
-        result.append(HEX[0xF&c]);
-        return TRUE;
-    }
-    return FALSE;
-}
-
 /**
  * Returns a string representation of this set.  If the result of
  * calling this function is passed to a UnicodeSet constructor, it
@@ -479,7 +437,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
         int32_t backslashCount = 0;
         for (i=0; i<pat.length(); ++i) {
             UChar c = pat.charAt(i);
-            if (escapeUnprintable && _isUnprintable(c)) {
+            if (escapeUnprintable && Utility::isUnprintable(c)) {
                 // If the unprintable character is preceded by an odd
                 // number of backslashes, then it has been escaped.
                 // Before unescaping it, we delete the final
@@ -487,7 +445,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
                 if ((backslashCount % 2) == 1) {
                     result.truncate(result.length() - 1);
                 }
-                _escapeUnprintable(result, c);
+                Utility::escapeUnprintable(result, c);
                 backslashCount = 0;
             } else {
                 result.append(c);
diff --git a/icu4c/source/i18n/util.cpp b/icu4c/source/i18n/util.cpp
index 1c7aacfd01..0c03882c7d 100644
--- a/icu4c/source/i18n/util.cpp
+++ b/icu4c/source/i18n/util.cpp
@@ -10,6 +10,12 @@
 
 #include "util.h"
 
+// Define UChar constants using hex for EBCDIC compatibility
+// Used #define to reduce private static exports and memory access time.
+#define BACKSLASH       ((UChar)0x005C) /*\*/
+#define UPPER_U         ((UChar)0x0055) /*U*/
+#define LOWER_U         ((UChar)0x0075) /*u*/
+
 // "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 static const UChar DIGITS[] = {
     48,49,50,51,52,53,54,55,56,57,
@@ -50,4 +56,41 @@ UnicodeString& Utility::appendNumber(UnicodeString& result, int32_t n,
     return result;
 }
 
+static const UChar HEX[16] = {48,49,50,51,52,53,54,55,  // 0-7
+                              56,57,65,66,67,68,69,70}; // 8-9 A-F
+
+/**
+ * Return true if the character is NOT printable ASCII.
+ */
+UBool Utility::isUnprintable(UChar32 c) {
+    return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
+}
+
+/**
+ * Escape unprintable characters using \uxxxx notation for U+0000 to
+ * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
+ * printable ASCII, then do nothing and return FALSE.  Otherwise,
+ * append the escaped notation and return TRUE.
+ */
+UBool Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
+    if (isUnprintable(c)) {
+        result.append(BACKSLASH);
+        if (c & ~0xFFFF) {
+            result.append(UPPER_U);
+            result.append(HEX[0xF&(c>>28)]);
+            result.append(HEX[0xF&(c>>24)]);
+            result.append(HEX[0xF&(c>>20)]);
+            result.append(HEX[0xF&(c>>16)]);
+        } else {
+            result.append(LOWER_U);
+        }
+        result.append(HEX[0xF&(c>>12)]);
+        result.append(HEX[0xF&(c>>8)]);
+        result.append(HEX[0xF&(c>>4)]);
+        result.append(HEX[0xF&c]);
+        return TRUE;
+    }
+    return FALSE;
+}
+
 //eof
diff --git a/icu4c/source/i18n/util.h b/icu4c/source/i18n/util.h
index 728d84a5e2..9f0117a0ef 100644
--- a/icu4c/source/i18n/util.h
+++ b/icu4c/source/i18n/util.h
@@ -40,6 +40,21 @@ class Utility {
                                        int32_t radix = 10,
                                        int32_t minDigits = 1);
 
+    /**
+     * Return true if the character is NOT printable ASCII.
+     *
+     * This method should really be in UnicodeString (or similar).  For
+     * now, we implement it here and share it with friend classes.
+     */
+    static UBool isUnprintable(UChar32 c);
+
+    /**
+     * Escape unprintable characters using \uxxxx notation for U+0000 to
+     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
+     * printable ASCII, then do nothing and return FALSE.  Otherwise,
+     * append the escaped notation and return TRUE.
+     */
+    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
 };
 
 U_NAMESPACE_END