ICU-1533 incorporate Mark's review comments; move escape handling methods to Utility
X-SVN-Rev: 7067
This commit is contained in:
parent
e28956def1
commit
2280780824
@ -15,6 +15,7 @@
|
||||
#include "unicode/unicode.h"
|
||||
#include "cmemory.h"
|
||||
#include "strmatch.h"
|
||||
#include "util.h"
|
||||
|
||||
static const UChar APOSTROPHE = 0x0027; // '\''
|
||||
static const UChar BACKSLASH = 0x005C; // '\'
|
||||
@ -574,7 +575,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
|
||||
// quotes. \u and \U are not recognized within quotes. The same
|
||||
// logic applies to literals, but literals are never escaped.
|
||||
if (isLiteral ||
|
||||
(escapeUnprintable && UnicodeSet::_isUnprintable(c))) {
|
||||
(escapeUnprintable && Utility::isUnprintable(c))) {
|
||||
if (quoteBuf.length() > 0) {
|
||||
// We prefer backslash APOSTROPHE to double APOSTROPHE
|
||||
// (more readable, less similar to ") so if there are
|
||||
@ -609,7 +610,7 @@ void TransliterationRule::appendToRule(UnicodeString& rule,
|
||||
}
|
||||
}
|
||||
if (c != (UChar32)-1) {
|
||||
if (!escapeUnprintable || !UnicodeSet::_escapeUnprintable(rule, c)) {
|
||||
if (!escapeUnprintable || !Utility::escapeUnprintable(rule, c)) {
|
||||
rule.append(c);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "unicode/uscript.h"
|
||||
#include "esctrn.h"
|
||||
#include "unesctrn.h"
|
||||
#include "util.h"
|
||||
|
||||
|
||||
// keep in sync with CompoundTransliterator
|
||||
@ -915,7 +916,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
|
||||
UnicodeString id = getID();
|
||||
for (int32_t i=0; i<id.length();) {
|
||||
UChar32 c = id.char32At(i);
|
||||
if (!UnicodeSet::_escapeUnprintable(rulesSource, c)) {
|
||||
if (!Utility::escapeUnprintable(rulesSource, c)) {
|
||||
rulesSource.append(c);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "umutex.h"
|
||||
#include "ucln_in.h"
|
||||
#include "upropset.h"
|
||||
#include "util.h"
|
||||
|
||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||
#define UNICODESET_HIGH 0x0110000
|
||||
@ -387,7 +388,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
|
||||
if (useHexEscape) {
|
||||
// Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
|
||||
// unprintable
|
||||
if (_escapeUnprintable(buf, c)) {
|
||||
if (Utility::escapeUnprintable(buf, c)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -413,49 +414,6 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
|
||||
buf.append((UChar) c);
|
||||
}
|
||||
|
||||
static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7
|
||||
56,57,65,66,67,68,69,70}; // 8-9 A-F
|
||||
|
||||
/**
|
||||
* Return true if the character is NOT printable ASCII.
|
||||
*
|
||||
* This method should really be in UnicodeString (or similar). For
|
||||
* now, we implement it here and share it with friend classes.
|
||||
*/
|
||||
UBool UnicodeSet::_isUnprintable(UChar32 c) {
|
||||
return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape unprintable characters using \uxxxx notation for U+0000 to
|
||||
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
|
||||
* printable ASCII, then do nothing and return FALSE. Otherwise,
|
||||
* append the escaped notation and return TRUE.
|
||||
*
|
||||
* This method should really be in UnicodeString. For now, we
|
||||
* implement it here and share it with friend classes.
|
||||
*/
|
||||
UBool UnicodeSet::_escapeUnprintable(UnicodeString& result, UChar32 c) {
|
||||
if (_isUnprintable(c)) {
|
||||
result.append(BACKSLASH);
|
||||
if (c & ~0xFFFF) {
|
||||
result.append(UPPER_U);
|
||||
result.append(HEX[0xF&(c>>28)]);
|
||||
result.append(HEX[0xF&(c>>24)]);
|
||||
result.append(HEX[0xF&(c>>20)]);
|
||||
result.append(HEX[0xF&(c>>16)]);
|
||||
} else {
|
||||
result.append(LOWER_U);
|
||||
}
|
||||
result.append(HEX[0xF&(c>>12)]);
|
||||
result.append(HEX[0xF&(c>>8)]);
|
||||
result.append(HEX[0xF&(c>>4)]);
|
||||
result.append(HEX[0xF&c]);
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of this set. If the result of
|
||||
* calling this function is passed to a UnicodeSet constructor, it
|
||||
@ -479,7 +437,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
||||
int32_t backslashCount = 0;
|
||||
for (i=0; i<pat.length(); ++i) {
|
||||
UChar c = pat.charAt(i);
|
||||
if (escapeUnprintable && _isUnprintable(c)) {
|
||||
if (escapeUnprintable && Utility::isUnprintable(c)) {
|
||||
// If the unprintable character is preceded by an odd
|
||||
// number of backslashes, then it has been escaped.
|
||||
// Before unescaping it, we delete the final
|
||||
@ -487,7 +445,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
||||
if ((backslashCount % 2) == 1) {
|
||||
result.truncate(result.length() - 1);
|
||||
}
|
||||
_escapeUnprintable(result, c);
|
||||
Utility::escapeUnprintable(result, c);
|
||||
backslashCount = 0;
|
||||
} else {
|
||||
result.append(c);
|
||||
|
@ -10,6 +10,12 @@
|
||||
|
||||
#include "util.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
|
||||
// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
static const UChar DIGITS[] = {
|
||||
48,49,50,51,52,53,54,55,56,57,
|
||||
@ -50,4 +56,41 @@ UnicodeString& Utility::appendNumber(UnicodeString& result, int32_t n,
|
||||
return result;
|
||||
}
|
||||
|
||||
static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7
|
||||
56,57,65,66,67,68,69,70}; // 8-9 A-F
|
||||
|
||||
/**
|
||||
* Return true if the character is NOT printable ASCII.
|
||||
*/
|
||||
UBool Utility::isUnprintable(UChar32 c) {
|
||||
return !(c == 0x0A || (c >= 0x20 && c <= 0x7E));
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape unprintable characters using \uxxxx notation for U+0000 to
|
||||
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
|
||||
* printable ASCII, then do nothing and return FALSE. Otherwise,
|
||||
* append the escaped notation and return TRUE.
|
||||
*/
|
||||
UBool Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
|
||||
if (isUnprintable(c)) {
|
||||
result.append(BACKSLASH);
|
||||
if (c & ~0xFFFF) {
|
||||
result.append(UPPER_U);
|
||||
result.append(HEX[0xF&(c>>28)]);
|
||||
result.append(HEX[0xF&(c>>24)]);
|
||||
result.append(HEX[0xF&(c>>20)]);
|
||||
result.append(HEX[0xF&(c>>16)]);
|
||||
} else {
|
||||
result.append(LOWER_U);
|
||||
}
|
||||
result.append(HEX[0xF&(c>>12)]);
|
||||
result.append(HEX[0xF&(c>>8)]);
|
||||
result.append(HEX[0xF&(c>>4)]);
|
||||
result.append(HEX[0xF&c]);
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//eof
|
||||
|
@ -40,6 +40,21 @@ class Utility {
|
||||
int32_t radix = 10,
|
||||
int32_t minDigits = 1);
|
||||
|
||||
/**
|
||||
* Return true if the character is NOT printable ASCII.
|
||||
*
|
||||
* This method should really be in UnicodeString (or similar). For
|
||||
* now, we implement it here and share it with friend classes.
|
||||
*/
|
||||
static UBool isUnprintable(UChar32 c);
|
||||
|
||||
/**
|
||||
* Escape unprintable characters using \uxxxx notation for U+0000 to
|
||||
* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
|
||||
* printable ASCII, then do nothing and return FALSE. Otherwise,
|
||||
* append the escaped notation and return TRUE.
|
||||
*/
|
||||
static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
Loading…
Reference in New Issue
Block a user