ICU-2906 make unescapeAt() handle \u-escaped surrogate pairs

X-SVN-Rev: 13340
This commit is contained in:
Alan Liu 2003-10-07 17:22:14 +00:00
parent 402f683111
commit 94a17e18a5
2 changed files with 45 additions and 4 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $
* $Date: 2003/09/29 23:20:36 $
* $Revision: 1.53 $
* $Date: 2003/10/07 17:22:14 $
* $Revision: 1.54 $
*
*****************************************************************************************
*/
@ -1214,6 +1214,26 @@ public class UnicodeSetTest extends TestFmwk {
}
}
public void TestSurrogate() {
String DATA[] = {
// These should all behave identically
"[abc\\uD800\\uDC00]",
"[abc\uD800\uDC00]",
"[abc\\U00010000]",
};
for (int i=0; i<DATA.length; ++i) {
logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
UnicodeSet set = new UnicodeSet(DATA[i]);
expectContainment(set,
CharsToUnicodeString("abc\\U00010000"),
"\uD800;\uDC00"); // split apart surrogate-pair
if (set.size() != 4) {
errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
set.size() + ", expected 4"));
}
}
}
void _testComplement(int a) {
UnicodeSet x = bitsToSet(a);
UnicodeSet z = bitsToSet(a);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $
* $Date: 2003/10/07 16:51:56 $
* $Revision: 1.46 $
* $Date: 2003/10/07 17:22:14 $
* $Revision: 1.47 $
*
*****************************************************************************************
*/
@ -803,6 +803,27 @@ public final class Utility {
if (result < 0 || result >= 0x110000) {
return -1;
}
// If a 'u' escape sequence (16-bit) specifies a lead
// surrogate, see if there is a trail surrogate after it,
// either as a 'u' escape or as a literal. If so, join
// them up into a supplementary.
if (maxDig == 4 && offset < length &&
UTF16.isLeadSurrogate((char) result)) {
c = s.charAt(offset); // [sic] get 16-bit code unit
int ahead = offset+1;
// ONLY parse backslash 'u', nothing else
if (c == '\\' && (offset+1) < length &&
s.charAt(offset+1) == 'u') {
int o[] = new int[] { ahead };
c = unescapeAt(s, o);
ahead = o[0];
}
if (UTF16.isTrailSurrogate((char) c)) {
offset = ahead;
result = UCharacterProperty.getRawSupplementary(
(char) result, (char) c);
}
}
offset16[0] = offset;
return result;
}