ICU-3840 Update Charset ISCII to support new characters in Unicode.
X-SVN-Rev: 21869
This commit is contained in:
parent
3fc4373c3e
commit
9ef6426440
@ -34,6 +34,7 @@ class CharsetISCII extends CharsetICU {
|
||||
private final short ISCII_NUKTA = 0xe9;
|
||||
private final short ISCII_HALANT = 0xe8;
|
||||
private final short ISCII_DANDA = 0xea;
|
||||
private final short ISCII_VOWEL_SIGN_E = 0xe0;
|
||||
private final short ISCII_INV = 0xd9;
|
||||
private final short INDIC_BLOCK_BEGIN = 0x0900;
|
||||
private final short INDIC_BLOCK_END = 0x0d7f;
|
||||
@ -174,7 +175,7 @@ class CharsetISCII extends CharsetICU {
|
||||
/* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
|
||||
/* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
|
||||
/* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
|
||||
/* 0x00: 0x00: 0x904 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
|
||||
/* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
|
||||
/* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
|
||||
/* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
|
||||
/* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
|
||||
@ -296,7 +297,7 @@ class CharsetISCII extends CharsetICU {
|
||||
0x00a1, /* 0x0901 */
|
||||
0x00a2, /* 0x0902 */
|
||||
0x00a3, /* 0x0903 */
|
||||
0xFFFF, /* 0x0904 */
|
||||
0xa4e0, /* 0x0904 */
|
||||
0x00a4, /* 0x0905 */
|
||||
0x00a5, /* 0x0906 */
|
||||
0x00a6, /* 0x0907 */
|
||||
@ -697,6 +698,10 @@ class CharsetISCII extends CharsetICU {
|
||||
{ 0xDB, 0x0962 },
|
||||
{ 0xDC, 0x0963 }
|
||||
};
|
||||
private static final char vowelSignESpecialCases[][] = {
|
||||
{ 2 /* length of array */ , 0 },
|
||||
{ 0xA4, 0x0904 }
|
||||
};
|
||||
|
||||
private static final short lookupTable[][] = {
|
||||
{ MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */
|
||||
@ -925,6 +930,25 @@ class CharsetISCII extends CharsetICU {
|
||||
}
|
||||
/* else fall through to default */
|
||||
}
|
||||
case ISCII_VOWEL_SIGN_E:
|
||||
/* find <CHAR> + SIGN_VOWEL_E special mapping */
|
||||
int i = 1;
|
||||
boolean found = false;
|
||||
for (; i < vowelSignESpecialCases[0][0]; i++) {
|
||||
if (vowelSignESpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
|
||||
targetUniChar = vowelSignESpecialCases[i][1];
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
/* find out if the mapping is valid in this state */
|
||||
if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) {
|
||||
data.contextCharToUnicode = NO_CHAR_MARKER;
|
||||
this.toUnicodeStatus = UConverterConstants.missingCharMarker;
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
targetUniChar = GetMapping(sourceChar, targetUniChar, data);
|
||||
data.contextCharToUnicode = (char)sourceChar;
|
||||
@ -979,7 +1003,7 @@ class CharsetISCII extends CharsetICU {
|
||||
toULength = 0;
|
||||
}
|
||||
|
||||
if (toUnicodeStatus != UConverterConstants.missingCharMarker) {
|
||||
if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
|
||||
/* output a remaining target character */
|
||||
WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
|
||||
this.toUnicodeStatus = UConverterConstants.missingCharMarker;
|
||||
|
@ -2286,6 +2286,34 @@ public class TestCharset extends TestFmwk {
|
||||
errln("ISCII round trip test failed.");
|
||||
}
|
||||
|
||||
//Test new characters in the ISCII charset
|
||||
encoder = cs.newEncoder();
|
||||
decoder = cs.newDecoder();
|
||||
char u_pts[] = {
|
||||
(char)0x0904
|
||||
};
|
||||
byte b_pts[] = {
|
||||
/*(byte)0xef, (byte)0x42, */(byte)0xa4, (byte)0xe0
|
||||
};
|
||||
us = CharBuffer.allocate(u_pts.length);
|
||||
bs = ByteBuffer.allocate(b_pts.length);
|
||||
us.put(u_pts);
|
||||
bs.put(b_pts);
|
||||
|
||||
bs.limit(bs.position());
|
||||
bs.position(0);
|
||||
us.limit(us.position());
|
||||
us.position(0);
|
||||
|
||||
try {
|
||||
smBufDecode(decoder, "ISCII-update", bs, us, true, true);
|
||||
bs.position(0);
|
||||
us.position(0);
|
||||
smBufEncode(encoder, "ISCII-update", us, bs, true, true);
|
||||
} catch (Exception ex) {
|
||||
errln("Error occurred while encoding/decoding ISCII with the new characters.");
|
||||
}
|
||||
|
||||
//The rest of the code in this method is to provide better code coverage
|
||||
CharBuffer ccus = CharBuffer.allocate(0x10);
|
||||
ByteBuffer ccbs = ByteBuffer.allocate(0x10);
|
||||
@ -2836,6 +2864,75 @@ public class TestCharset extends TestFmwk {
|
||||
errln("Exception while encoding UTF32LE (6) should have been thrown.");
|
||||
} catch (Exception ex) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Test for charset UTF16LE to provide better code coverage
|
||||
public void TestCharsetUTF16LE() {
|
||||
CoderResult result = CoderResult.UNDERFLOW;
|
||||
CharsetProvider provider = new CharsetProviderICU();
|
||||
Charset cs = provider.charsetForName("UTF-16LE");
|
||||
CharsetEncoder encoder = cs.newEncoder();
|
||||
CharsetDecoder decoder = cs.newDecoder();
|
||||
|
||||
// Test for malform and change fromUChar32 for next call
|
||||
char u_pts1[] = {
|
||||
(char)0xD805,
|
||||
(char)0xDC01, (char)0xDC02, (char)0xDC03,
|
||||
(char)0xD901, (char)0xD902
|
||||
};
|
||||
byte b_pts1[] = {
|
||||
(byte)0x00,
|
||||
(byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00
|
||||
};
|
||||
|
||||
CharBuffer us = CharBuffer.allocate(u_pts1.length);
|
||||
ByteBuffer bs = ByteBuffer.allocate(b_pts1.length);
|
||||
|
||||
us.put(u_pts1);
|
||||
bs.put(b_pts1);
|
||||
|
||||
us.limit(1);
|
||||
us.position(0);
|
||||
bs.limit(1);
|
||||
bs.position(0);
|
||||
|
||||
result = encoder.encode(us, bs, true);
|
||||
|
||||
if (!result.isMalformed()) {
|
||||
errln("Error while encoding UTF-16LE (1) should have occured.");
|
||||
}
|
||||
|
||||
// Test for malform surrogate from previous buffer
|
||||
us.limit(4);
|
||||
us.position(1);
|
||||
bs.limit(7);
|
||||
bs.position(1);
|
||||
|
||||
result = encoder.encode(us, bs, true);
|
||||
|
||||
if (!result.isMalformed()) {
|
||||
errln("Error while encoding UTF-16LE (2) should have occured.");
|
||||
}
|
||||
|
||||
// Test for malform trail surrogate
|
||||
encoder.reset();
|
||||
|
||||
us.limit(1);
|
||||
us.position(0);
|
||||
bs.limit(1);
|
||||
bs.position(0);
|
||||
|
||||
result = encoder.encode(us, bs, true);
|
||||
|
||||
us.limit(6);
|
||||
us.position(4);
|
||||
bs.limit(4);
|
||||
bs.position(1);
|
||||
|
||||
result = encoder.encode(us, bs, true);
|
||||
|
||||
if (!result.isMalformed()) {
|
||||
errln("Error while encoding UTF-16LE (3) should have occured.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user