ICU-3280 add unescapeAt(RuleCharacterIterator)

X-SVN-Rev: 13189
This commit is contained in:
Alan Liu 2003-09-24 19:16:05 +00:00
parent b735483f64
commit 4fb24ba9c0

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $
* $Date: 2003/06/11 19:55:18 $
* $Revision: 1.42 $
* $Date: 2003/09/24 19:16:05 $
* $Revision: 1.43 $
*
*****************************************************************************************
*/
@ -723,7 +723,7 @@ public final class Utility {
/**
* Convert an escape to a 32-bit code point value. We attempt
* to parallel the icu4c unesacpeAt() function.
* to parallel the icu4c unescapeAt() function.
* @param offset16 an array containing offset to the character
* <em>after</em> the backslash. Upon return offset16[0] will
* be updated to point after the escape sequence.
@ -827,6 +827,122 @@ public final class Utility {
return c;
}
/**
* Convert an escape sequence to a 32-bit code point value. This
* parallels the icu4c unescapeAt() function.
* @param chars iterator over the characters to be parsed. Upon
* entry it should point to the character immediately following
* the escape character (typically '\\'). Upon return it will be
* advanced to the first character after the parsed pattern, or
* the end of the iteration if all characters are parsed. If the
* parse fails, the iterator will be unchanged upon return.
* During parsing, the iterator will be dereferenced with none of
* the RuleCharacterIterator options set, to allow backslash,
* SYMBOL_REF, and whitespace characters to be escaped.
* @return 32-bit code point value from 0 to 10FFFF, or -1 on
* error.
*/
public static int unescapeAt(RuleCharacterIterator chars) {
int c;
int result = 0;
int n = 0;
int minDig = 0;
int maxDig = 0;
int bitsPerDigit = 4;
int dig;
int i;
boolean braces = false;
Object save = chars.getState();
/* Fetch first UChar after '\\' */
c = chars.current(0);
/* Convert hexadecimal and octal escapes */
switch (c) {
case 'u':
chars.next(0);
minDig = maxDig = 4;
break;
case 'U':
chars.next(0);
minDig = maxDig = 8;
break;
case 'x':
chars.next(0);
minDig = 1;
if (chars.current(0) == 0x7B /*{*/) {
chars.next(0);
braces = true;
maxDig = 8;
} else {
maxDig = 2;
}
break;
default:
dig = UCharacter.digit(c, 8);
if (dig >= 0) {
minDig = 1;
maxDig = 3;
n = 1; /* Already have first octal digit */
bitsPerDigit = 3;
result = dig;
}
break;
}
if (minDig != 0) {
while (n < maxDig && !chars.atEnd()) {
c = chars.current(0);
dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
if (dig < 0) {
break;
}
result = (result << bitsPerDigit) | dig;
++n;
chars.next(0);
}
if (n < minDig) {
chars.setState(save);
return -1;
}
if (braces) {
if (c != 0x7D /*}*/) {
chars.setState(save);
return -1;
}
chars.next(0);
}
if (result < 0 || result >= 0x110000) {
result = -1;
}
return result;
}
chars.next(0);
/* Convert C-style escapes in table */
for (i=0; i<UNESCAPE_MAP.length; i+=2) {
if (c == UNESCAPE_MAP[i]) {
return UNESCAPE_MAP[i+1];
} else if (c < UNESCAPE_MAP[i]) {
break;
}
}
/* Map \cX to control-X: X & 0x1F */
if (c == 'c') {
if (chars.atEnd()) {
chars.setState(save);
return -1;
}
c = chars.next(0);
return 0x1F & c;
}
/* If no special forms are recognized, then consider
* the backslash to generically escape the next character. */
return c;
}
/**
* Convert all escapes in a given string using unescapeAt().
* @exception IllegalArgumentException if an invalid escape is