ICU-410 use unified unescape function(s)
X-SVN-Rev: 1897
This commit is contained in:
parent
cb74716658
commit
5e9d055754
@ -240,27 +240,11 @@ int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit,
|
||||
if (pos == limit) {
|
||||
return syntaxError(RuleBasedTransliterator::TRAILING_BACKSLASH, rule, start);
|
||||
}
|
||||
|
||||
// UNLIKE THE JAVA version, we parse \uXXXX escapes. We
|
||||
// do not do this in Java because the compiler has already
|
||||
// done it when the ResourceBundle file was compiled.
|
||||
// Parse \uXXXX escapes
|
||||
c = rule.charAt(pos++);
|
||||
if (c == 0x0075/*u*/) {
|
||||
if ((pos+4) > limit) {
|
||||
return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
|
||||
}
|
||||
c = (UChar)0x0000;
|
||||
for (int32_t plim=pos+4; pos<plim; ++pos) { // [sic]
|
||||
int32_t digit = Unicode::digit(rule.charAt(pos), 16);
|
||||
if (digit<0) {
|
||||
return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
|
||||
}
|
||||
c = (UChar) ((c << 4) | digit);
|
||||
}
|
||||
UChar32 escaped = rule.unescapeAt(pos); // pos is already past '\\'
|
||||
if (escaped == (UChar32) -1) {
|
||||
return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
|
||||
}
|
||||
|
||||
buf.append(c);
|
||||
buf.append((UChar) escaped);
|
||||
continue;
|
||||
}
|
||||
// Handle quoted matter
|
||||
|
@ -812,30 +812,15 @@ void UnicodeSet::applyPattern(const UnicodeString& pattern,
|
||||
* interpret '\\uxxxx' Unicode escapes here (as literals).
|
||||
*/
|
||||
if (c == BACKSLASH) {
|
||||
++i;
|
||||
if (i < pattern.length()) {
|
||||
c = pattern.charAt(i);
|
||||
isLiteral = TRUE;
|
||||
if (c == 0x0075 /*u*/) {
|
||||
if ((i+4) >= pattern.length()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
c = (UChar)0x0000;
|
||||
for (int32_t j=(++i)+4; i<j; ++i) { // [sic]
|
||||
int32_t digit = Unicode::digit(pattern.charAt(i), 16);
|
||||
if (digit<0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
c = (UChar) ((c << 4) | digit);
|
||||
}
|
||||
--i; // Move i back to last parsed character
|
||||
}
|
||||
} else {
|
||||
++i; // Advance past '\\'
|
||||
UChar32 escaped = pattern.unescapeAt(i);
|
||||
if (escaped == (UChar32) -1) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
isLiteral = TRUE;
|
||||
--i; // Move i back to last parsed character
|
||||
c = (UChar) escaped;
|
||||
}
|
||||
|
||||
/* Parse variable references. These are treated as literals. If a
|
||||
|
@ -119,48 +119,10 @@ UChar* appendCompareResult(UCollationResult result, UChar* target)
|
||||
return target;
|
||||
}
|
||||
|
||||
UChar* CharsToUChars(const char* chars)
|
||||
{
|
||||
int unicode;
|
||||
int i;
|
||||
UChar *buffer;
|
||||
UChar *alias;
|
||||
int count = 0;
|
||||
|
||||
/* preflight */
|
||||
for (i = 0; chars[i] != 0;) {
|
||||
if ((chars[i] == '\\') && (chars[i+1] == 'u')) {
|
||||
i += 6;
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
++count;
|
||||
}
|
||||
|
||||
alias = buffer = (UChar*) malloc(sizeof(UChar) * (count + 1));
|
||||
|
||||
for (;;) {
|
||||
/* search for \u or the end */
|
||||
for(i = 0; chars[i] != 0 && !(chars[i] == '\\' && chars[i+1] == 'u'); ++i) {}
|
||||
|
||||
/* convert characters between escape sequences */
|
||||
if(i > 0) {
|
||||
u_charsToUChars(chars, alias, i);
|
||||
chars += i;
|
||||
alias += i;
|
||||
}
|
||||
|
||||
/* did we reach the end or an escape sequence? */
|
||||
if(*chars == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* unescape one character: we know that there is a \u sequence at chars[limit] */
|
||||
chars += 2;
|
||||
sscanf(chars, "%4X", &unicode);
|
||||
*alias++ = (UChar)unicode;
|
||||
chars += 4;
|
||||
}
|
||||
*alias = 0x0000;
|
||||
return buffer;
|
||||
UChar* CharsToUChars(const char* str) {
|
||||
/* Might be faster to just use uprv_strlen() as the preflight len - liu */
|
||||
int32_t len = u_unescape(str, 0, 0); /* preflight */
|
||||
UChar *buf = (UChar*) malloc(sizeof(UChar) * len);
|
||||
u_unescape(str, buf, len);
|
||||
return buf;
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/coll.h"
|
||||
#include "unicode/smpdtfmt.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
#include "intltest.h"
|
||||
#include "itmajor.h"
|
||||
@ -1008,37 +1009,8 @@ main(int argc, char* argv[])
|
||||
*/
|
||||
UnicodeString CharsToUnicodeString(const char* chars)
|
||||
{
|
||||
int unicode;
|
||||
int i;
|
||||
UnicodeString result;
|
||||
UChar buffer[400];
|
||||
|
||||
for (;;) {
|
||||
/* repeat the following according to the length of the buffer */
|
||||
do {
|
||||
/* search for \u or the end */
|
||||
for(i = 0; i < 400 && chars[i] != 0 && !(chars[i] == '\\' && chars[i+1] == 'u'); ++i) {}
|
||||
|
||||
/* convert characters between escape sequences */
|
||||
if(i > 0) {
|
||||
u_charsToUChars(chars, buffer, i);
|
||||
result.append(buffer, i);
|
||||
chars += i;
|
||||
}
|
||||
} while(i == 400);
|
||||
|
||||
/* did we reach the end or an escape sequence? */
|
||||
if(*chars == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* unescape one character: we know that there is a \u sequence at chars[limit] */
|
||||
chars += 2;
|
||||
sscanf(chars, "%4X", &unicode);
|
||||
result.append((UChar)unicode);
|
||||
chars += 4;
|
||||
}
|
||||
return result;
|
||||
UnicodeString str(chars, ""); // Invariant conversion
|
||||
return str.unescape();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -43,33 +43,8 @@ void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec,
|
||||
*/
|
||||
static UnicodeString str(const char *input)
|
||||
{
|
||||
static const UnicodeString digitString1("0123456789ABCDEF");
|
||||
static const UnicodeString digitString2("0123456789abcdef");
|
||||
|
||||
UnicodeString result(input);
|
||||
int index = 0;
|
||||
|
||||
while ((index = result.indexOf("\\u")) != -1)
|
||||
{
|
||||
if (index + 6 <= result.length())
|
||||
{
|
||||
UChar c = 0;
|
||||
for (int i = index + 2; i < index + 6; i++) {
|
||||
UTextOffset value = digitString1.indexOf(result[i]);
|
||||
|
||||
if (value == -1) {
|
||||
value = digitString2.indexOf(result[i]);
|
||||
}
|
||||
c = (UChar)(c * 16 + value);
|
||||
}
|
||||
UnicodeString replace;
|
||||
replace += c;
|
||||
result.replace(index, 6, replace);
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
UnicodeString str(input, ""); // Invariant conversion
|
||||
return str.unescape();
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user