ICU-1533 incorporate Mark's review comments; move escape handling methods to Utility

X-SVN-Rev: 7067
This commit is contained in:
Alan Liu 2001-11-21 22:43:21 +00:00
parent e28956def1
commit 2280780824
5 changed files with 67 additions and 49 deletions

View File

@ -15,6 +15,7 @@
#include "unicode/unicode.h"
#include "cmemory.h"
#include "strmatch.h"
#include "util.h"
static const UChar APOSTROPHE = 0x0027; // '\''
static const UChar BACKSLASH = 0x005C; // '\'
@ -574,7 +575,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
// quotes. \u and \U are not recognized within quotes. The same
// logic applies to literals, but literals are never escaped.
if (isLiteral ||
(escapeUnprintable && UnicodeSet::_isUnprintable(c))) {
(escapeUnprintable && Utility::isUnprintable(c))) {
if (quoteBuf.length() > 0) {
// We prefer backslash APOSTROPHE to double APOSTROPHE
// (more readable, less similar to ") so if there are
@ -609,7 +610,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
}
}
if (c != (UChar32)-1) {
if (!escapeUnprintable || !UnicodeSet::_escapeUnprintable(rule, c)) {
if (!escapeUnprintable || !Utility::escapeUnprintable(rule, c)) {
rule.append(c);
}
}

View File

@ -37,6 +37,7 @@
#include "unicode/uscript.h"
#include "esctrn.h"
#include "unesctrn.h"
#include "util.h"
// keep in sync with CompoundTransliterator
@ -915,7 +916,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
UnicodeString id = getID();
for (int32_t i=0; i<id.length();) {
UChar32 c = id.char32At(i);
if (!UnicodeSet::_escapeUnprintable(rulesSource, c)) {
if (!Utility::escapeUnprintable(rulesSource, c)) {
rulesSource.append(c);
}
i += UTF_CHAR_LENGTH(c);

View File

@ -18,6 +18,7 @@
#include "umutex.h"
#include "ucln_in.h"
#include "upropset.h"
#include "util.h"
// HIGH_VALUE > all valid values. 110000 for codepoints
#define UNICODESET_HIGH 0x0110000
@ -387,7 +388,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
if (useHexEscape) {
// Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
// unprintable
if (_escapeUnprintable(buf, c)) {
if (Utility::escapeUnprintable(buf, c)) {
return;
}
}
@ -413,49 +414,6 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
buf.append((UChar) c);
}
static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7
56,57,65,66,67,68,69,70}; // 8-9 A-F
/**
* Return true if the character is NOT printable ASCII.
*
* This method should really be in UnicodeString (or similar). For
* now, we implement it here and share it with friend classes.
*/
UBool UnicodeSet::_isUnprintable(UChar32 c) {
return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
}
/**
* Escape unprintable characters using \uxxxx notation for U+0000 to
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
* printable ASCII, then do nothing and return FALSE. Otherwise,
* append the escaped notation and return TRUE.
*
* This method should really be in UnicodeString. For now, we
* implement it here and share it with friend classes.
*/
UBool UnicodeSet::_escapeUnprintable(UnicodeString& result, UChar32 c) {
if (_isUnprintable(c)) {
result.append(BACKSLASH);
if (c & ~0xFFFF) {
result.append(UPPER_U);
result.append(HEX[0xF&(c>>28)]);
result.append(HEX[0xF&(c>>24)]);
result.append(HEX[0xF&(c>>20)]);
result.append(HEX[0xF&(c>>16)]);
} else {
result.append(LOWER_U);
}
result.append(HEX[0xF&(c>>12)]);
result.append(HEX[0xF&(c>>8)]);
result.append(HEX[0xF&(c>>4)]);
result.append(HEX[0xF&c]);
return TRUE;
}
return FALSE;
}
/**
* Returns a string representation of this set. If the result of
* calling this function is passed to a UnicodeSet constructor, it
@ -479,7 +437,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
int32_t backslashCount = 0;
for (i=0; i<pat.length(); ++i) {
UChar c = pat.charAt(i);
if (escapeUnprintable && _isUnprintable(c)) {
if (escapeUnprintable && Utility::isUnprintable(c)) {
// If the unprintable character is preceded by an odd
// number of backslashes, then it has been escaped.
// Before unescaping it, we delete the final
@ -487,7 +445,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
if ((backslashCount % 2) == 1) {
result.truncate(result.length() - 1);
}
_escapeUnprintable(result, c);
Utility::escapeUnprintable(result, c);
backslashCount = 0;
} else {
result.append(c);

View File

@ -10,6 +10,12 @@
#include "util.h"
// Define UChar constants using hex for EBCDIC compatibility
// Used #define to reduce private static exports and memory access time.
#define BACKSLASH ((UChar)0x005C) /*\*/
#define UPPER_U ((UChar)0x0055) /*U*/
#define LOWER_U ((UChar)0x0075) /*u*/
// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
static const UChar DIGITS[] = {
48,49,50,51,52,53,54,55,56,57,
@ -50,4 +56,41 @@ UnicodeString& Utility::appendNumber(UnicodeString& result, int32_t n,
return result;
}
static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7
56,57,65,66,67,68,69,70}; // 8-9 A-F
/**
* Return true if the character is NOT printable ASCII.
*/
UBool Utility::isUnprintable(UChar32 c) {
return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
}
/**
* Escape unprintable characters using \uxxxx notation for U+0000 to
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
* printable ASCII, then do nothing and return FALSE. Otherwise,
* append the escaped notation and return TRUE.
*/
UBool Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
if (isUnprintable(c)) {
result.append(BACKSLASH);
if (c & ~0xFFFF) {
result.append(UPPER_U);
result.append(HEX[0xF&(c>>28)]);
result.append(HEX[0xF&(c>>24)]);
result.append(HEX[0xF&(c>>20)]);
result.append(HEX[0xF&(c>>16)]);
} else {
result.append(LOWER_U);
}
result.append(HEX[0xF&(c>>12)]);
result.append(HEX[0xF&(c>>8)]);
result.append(HEX[0xF&(c>>4)]);
result.append(HEX[0xF&c]);
return TRUE;
}
return FALSE;
}
//eof

View File

@ -40,6 +40,21 @@ class Utility {
int32_t radix = 10,
int32_t minDigits = 1);
/**
* Return true if the character is NOT printable ASCII.
*
* This method should really be in UnicodeString (or similar). For
* now, we implement it here and share it with friend classes.
*/
static UBool isUnprintable(UChar32 c);
/**
* Escape unprintable characters using \uxxxx notation for U+0000 to
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
* printable ASCII, then do nothing and return FALSE. Otherwise,
* append the escaped notation and return TRUE.
*/
static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
};
U_NAMESPACE_END