ICU-2906 make unescapeAt() handle \u-escaped surrogate pairs
X-SVN-Rev: 13340
This commit is contained in:
parent
402f683111
commit
94a17e18a5
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $
|
||||
* $Date: 2003/09/29 23:20:36 $
|
||||
* $Revision: 1.53 $
|
||||
* $Date: 2003/10/07 17:22:14 $
|
||||
* $Revision: 1.54 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -1214,6 +1214,26 @@ public class UnicodeSetTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
public void TestSurrogate() {
|
||||
String DATA[] = {
|
||||
// These should all behave identically
|
||||
"[abc\\uD800\\uDC00]",
|
||||
"[abc\uD800\uDC00]",
|
||||
"[abc\\U00010000]",
|
||||
};
|
||||
for (int i=0; i<DATA.length; ++i) {
|
||||
logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
|
||||
UnicodeSet set = new UnicodeSet(DATA[i]);
|
||||
expectContainment(set,
|
||||
CharsToUnicodeString("abc\\U00010000"),
|
||||
"\uD800;\uDC00"); // split apart surrogate-pair
|
||||
if (set.size() != 4) {
|
||||
errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
|
||||
set.size() + ", expected 4"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _testComplement(int a) {
|
||||
UnicodeSet x = bitsToSet(a);
|
||||
UnicodeSet z = bitsToSet(a);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $
|
||||
* $Date: 2003/10/07 16:51:56 $
|
||||
* $Revision: 1.46 $
|
||||
* $Date: 2003/10/07 17:22:14 $
|
||||
* $Revision: 1.47 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -803,6 +803,27 @@ public final class Utility {
|
||||
if (result < 0 || result >= 0x110000) {
|
||||
return -1;
|
||||
}
|
||||
// If a 'u' escape sequence (16-bit) specifies a lead
|
||||
// surrogate, see if there is a trail surrogate after it,
|
||||
// either as a 'u' escape or as a literal. If so, join
|
||||
// them up into a supplementary.
|
||||
if (maxDig == 4 && offset < length &&
|
||||
UTF16.isLeadSurrogate((char) result)) {
|
||||
c = s.charAt(offset); // [sic] get 16-bit code unit
|
||||
int ahead = offset+1;
|
||||
// ONLY parse backslash 'u', nothing else
|
||||
if (c == '\\' && (offset+1) < length &&
|
||||
s.charAt(offset+1) == 'u') {
|
||||
int o[] = new int[] { ahead };
|
||||
c = unescapeAt(s, o);
|
||||
ahead = o[0];
|
||||
}
|
||||
if (UTF16.isTrailSurrogate((char) c)) {
|
||||
offset = ahead;
|
||||
result = UCharacterProperty.getRawSupplementary(
|
||||
(char) result, (char) c);
|
||||
}
|
||||
}
|
||||
offset16[0] = offset;
|
||||
return result;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user