ICU-410 use unified unescape function(s)

X-SVN-Rev: 1897
This commit is contained in:
Alan Liu 2000-07-16 13:42:38 +00:00
parent cb74716658
commit 5e9d055754
5 changed files with 21 additions and 143 deletions

View File

@ -240,27 +240,11 @@ int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit,
if (pos == limit) {
return syntaxError(RuleBasedTransliterator::TRAILING_BACKSLASH, rule, start);
}
// UNLIKE THE JAVA version, we parse \uXXXX escapes. We
// do not do this in Java because the compiler has already
// done it when the ResourceBundle file was compiled.
// Parse \uXXXX escapes
c = rule.charAt(pos++);
if (c == 0x0075/*u*/) {
if ((pos+4) > limit) {
UChar32 escaped = rule.unescapeAt(pos); // pos is already past '\\'
if (escaped == (UChar32) -1) {
return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
}
c = (UChar)0x0000;
for (int32_t plim=pos+4; pos<plim; ++pos) { // [sic]
int32_t digit = Unicode::digit(rule.charAt(pos), 16);
if (digit<0) {
return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
}
c = (UChar) ((c << 4) | digit);
}
}
buf.append(c);
buf.append((UChar) escaped);
continue;
}
// Handle quoted matter

View File

@ -812,30 +812,15 @@ void UnicodeSet::applyPattern(const UnicodeString& pattern,
* interpret '\\uxxxx' Unicode escapes here (as literals).
*/
if (c == BACKSLASH) {
++i;
if (i < pattern.length()) {
c = pattern.charAt(i);
++i; // Advance past '\\'
UChar32 escaped = pattern.unescapeAt(i);
if (escaped == (UChar32) -1) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
isLiteral = TRUE;
if (c == 0x0075 /*u*/) {
if ((i+4) >= pattern.length()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
c = (UChar)0x0000;
for (int32_t j=(++i)+4; i<j; ++i) { // [sic]
int32_t digit = Unicode::digit(pattern.charAt(i), 16);
if (digit<0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
c = (UChar) ((c << 4) | digit);
}
--i; // Move i back to last parsed character
}
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
c = (UChar) escaped;
}
/* Parse variable references. These are treated as literals. If a

View File

@ -119,48 +119,10 @@ UChar* appendCompareResult(UCollationResult result, UChar* target)
return target;
}
UChar* CharsToUChars(const char* chars)
{
int unicode;
int i;
UChar *buffer;
UChar *alias;
int count = 0;
/* preflight */
for (i = 0; chars[i] != 0;) {
if ((chars[i] == '\\') && (chars[i+1] == 'u')) {
i += 6;
} else {
i++;
}
++count;
}
alias = buffer = (UChar*) malloc(sizeof(UChar) * (count + 1));
for (;;) {
/* search for \u or the end */
for(i = 0; chars[i] != 0 && !(chars[i] == '\\' && chars[i+1] == 'u'); ++i) {}
/* convert characters between escape sequences */
if(i > 0) {
u_charsToUChars(chars, alias, i);
chars += i;
alias += i;
}
/* did we reach the end or an escape sequence? */
if(*chars == 0) {
break;
}
/* unescape one character: we know that there is a \u sequence at chars[limit] */
chars += 2;
sscanf(chars, "%4X", &unicode);
*alias++ = (UChar)unicode;
chars += 4;
}
*alias = 0x0000;
return buffer;
UChar* CharsToUChars(const char* str) {
/* Might be faster to just use uprv_strlen() as the preflight len - liu */
int32_t len = u_unescape(str, 0, 0); /* preflight */
UChar *buf = (UChar*) malloc(sizeof(UChar) * len);
u_unescape(str, buf, len);
return buf;
}

View File

@ -22,6 +22,7 @@
#include "unicode/ures.h"
#include "unicode/coll.h"
#include "unicode/smpdtfmt.h"
#include "unicode/ustring.h"
#include "intltest.h"
#include "itmajor.h"
@ -1008,37 +1009,8 @@ main(int argc, char* argv[])
*/
UnicodeString CharsToUnicodeString(const char* chars)
{
int unicode;
int i;
UnicodeString result;
UChar buffer[400];
for (;;) {
/* repeat the following according to the length of the buffer */
do {
/* search for \u or the end */
for(i = 0; i < 400 && chars[i] != 0 && !(chars[i] == '\\' && chars[i+1] == 'u'); ++i) {}
/* convert characters between escape sequences */
if(i > 0) {
u_charsToUChars(chars, buffer, i);
result.append(buffer, i);
chars += i;
}
} while(i == 400);
/* did we reach the end or an escape sequence? */
if(*chars == 0) {
break;
}
/* unescape one character: we know that there is a \u sequence at chars[limit] */
chars += 2;
sscanf(chars, "%4X", &unicode);
result.append((UChar)unicode);
chars += 4;
}
return result;
UnicodeString str(chars, ""); // Invariant conversion
return str.unescape();
}
/*

View File

@ -43,33 +43,8 @@ void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec,
*/
static UnicodeString str(const char *input)
{
static const UnicodeString digitString1("0123456789ABCDEF");
static const UnicodeString digitString2("0123456789abcdef");
UnicodeString result(input);
int index = 0;
while ((index = result.indexOf("\\u")) != -1)
{
if (index + 6 <= result.length())
{
UChar c = 0;
for (int i = index + 2; i < index + 6; i++) {
UTextOffset value = digitString1.indexOf(result[i]);
if (value == -1) {
value = digitString2.indexOf(result[i]);
}
c = (UChar)(c * 16 + value);
}
UnicodeString replace;
replace += c;
result.replace(index, 6, replace);
}
index += 1;
}
return result;
UnicodeString str(input, ""); // Invariant conversion
return str.unescape();
}