ICU-6583 Port over illegal sequence handling code from ticket #5691 to ICU4J. Fix minor bugs in various callback functions and error handling code in ICU4J. Reenable "full" data driven conversion test.
X-SVN-Rev: 25468
This commit is contained in:
parent
e729683a89
commit
27ce5a3df5
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2006-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
@ -332,10 +332,7 @@ public class CharsetCallback {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* reset the error */
|
||||
cr = CoderResult.UNDERFLOW;
|
||||
|
||||
|
||||
cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
|
||||
return cr;
|
||||
}
|
||||
@ -356,7 +353,7 @@ public class CharsetCallback {
|
||||
if (context == null || !(context instanceof String)) {
|
||||
while (i < length) {
|
||||
uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
||||
uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
|
||||
uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding U */
|
||||
valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
|
||||
}
|
||||
} else {
|
||||
@ -376,9 +373,11 @@ public class CharsetCallback {
|
||||
uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
|
||||
}
|
||||
} else if (((String)context).equals(ESCAPE_C)) {
|
||||
uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
||||
uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
|
||||
valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
|
||||
while (i < length) {
|
||||
uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
|
||||
uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
|
||||
valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
|
||||
}
|
||||
} else {
|
||||
while (i < length) {
|
||||
uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
||||
@ -388,10 +387,8 @@ public class CharsetCallback {
|
||||
}
|
||||
}
|
||||
}
|
||||
/* reset the error */
|
||||
cr = CoderResult.UNDERFLOW;
|
||||
|
||||
CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
|
||||
cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
|
||||
|
||||
return cr;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2006-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
@ -502,6 +502,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
|
||||
//UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
|
||||
replayArray.put(preToUArray,0, -preToULength);
|
||||
// reset position
|
||||
replayArray.position(0);
|
||||
|
||||
source=replayArray;
|
||||
source.limit(replayArrayIndex-preToULength);
|
||||
@ -649,7 +651,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
|
||||
private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
|
||||
for(int i=srcOffset; i<length; i++){
|
||||
dst[dstOffset++]=(char)src[srcOffset++];
|
||||
dst[dstOffset++]=(char)(src[srcOffset++] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2008-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -58,6 +58,7 @@ class CharsetHZ extends CharsetICU {
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
|
||||
CoderResult err = CoderResult.UNDERFLOW;
|
||||
byte[] tempBuf = new byte[2];
|
||||
int targetUniChar = 0;
|
||||
int mySourceChar = 0;
|
||||
@ -104,10 +105,25 @@ class CharsetHZ extends CharsetICU {
|
||||
* if the first byte is equal to TILDE and the trail byte is not a valid byte then it is an
|
||||
* error condition
|
||||
*/
|
||||
mySourceChar |= 0x7e00;
|
||||
targetUniChar = 0xffff;
|
||||
isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
|
||||
break;
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those.
|
||||
*/
|
||||
isEmptySegment = false; /* different error here, reset this to avoid spurious furture error */
|
||||
err = CoderResult.malformedForLength(1);
|
||||
toUBytesArray[0] = UCNV_TILDE;
|
||||
if (isStateDBCS ? (0x21 <= mySourceChar && mySourceChar <= 0x7e) : mySourceChar <= 0x7f) {
|
||||
/* The current byte could be the start of a character: Back it out. */
|
||||
toULength = 1;
|
||||
source.position(source.position() - 1);
|
||||
} else {
|
||||
/* Include the current byte in the illegal sequence. */
|
||||
toUBytesArray[1] = (byte)mySourceChar;
|
||||
toULength = 2;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
} else if (isStateDBCS) {
|
||||
if (toUnicodeStatus == 0) {
|
||||
@ -124,19 +140,36 @@ class CharsetHZ extends CharsetICU {
|
||||
continue;
|
||||
} else {
|
||||
/* trail byte */
|
||||
boolean leadIsOk, trailIsOk;
|
||||
int leadByte = toUnicodeStatus & 0xff;
|
||||
if (0x21 <= leadByte && leadByte <= 0x7d && 0x21 <= mySourceChar && mySourceChar <= 0x7e) {
|
||||
tempBuf[0] = (byte) (leadByte + 0x80);
|
||||
tempBuf[1] = (byte) (mySourceChar + 0x80);
|
||||
targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed());
|
||||
} else {
|
||||
targetUniChar = 0xffff;
|
||||
}
|
||||
targetUniChar = 0xffff;
|
||||
/*
|
||||
* add another bit so that the code below writes 2 bytes in case of error
|
||||
* Ticket 5691: consistent illegal sequence
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those
|
||||
*
|
||||
* In HZ DBCS, if the second byte is in the 21..7e range,
|
||||
* we report ony the first byte as the illegal sequence.
|
||||
* Otherwise we convert of report the pair of bytes.
|
||||
*/
|
||||
mySourceChar |= 0x10000 | (leadByte << 8);
|
||||
toUnicodeStatus = 0;
|
||||
leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (leadByte - 0x21)) <= (0x7d - 0x21);
|
||||
trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
|
||||
if (leadIsOk && trailIsOk) {
|
||||
tempBuf[0] = (byte)(leadByte + 0x80);
|
||||
tempBuf[1] = (byte)(mySourceChar + 0x80);
|
||||
targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed());
|
||||
mySourceChar = (leadByte << 8) | mySourceChar;
|
||||
} else if (trailIsOk) {
|
||||
/* report a single illegal byte and continue with the following DBCS starter byte */
|
||||
source.position(source.position() - 1);
|
||||
mySourceChar = (int)leadByte;
|
||||
} else {
|
||||
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
|
||||
/* add another bit so that the code below writes 2 bytes in case of error */
|
||||
mySourceChar = 0x10000 | (leadByte << 8) | mySourceChar;
|
||||
}
|
||||
toUnicodeStatus = 0x00;
|
||||
}
|
||||
} else {
|
||||
if (mySourceChar == UCNV_TILDE) {
|
||||
@ -177,7 +210,7 @@ class CharsetHZ extends CharsetICU {
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2008-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -200,10 +200,12 @@ class CharsetISO2022 extends CharsetICU {
|
||||
}
|
||||
|
||||
/*
|
||||
* Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
|
||||
*
|
||||
* This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
|
||||
* 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
|
||||
* unchanged.
|
||||
*/
|
||||
*
|
||||
private static int _2022ToGR94DBCS(int value) {
|
||||
int returnValue = value + 0x8080;
|
||||
|
||||
@ -213,7 +215,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
} else {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
/* is the StateEnum charset value for a DBCS charset? */
|
||||
private static boolean IS_JP_DBCS(byte cs) {
|
||||
@ -528,6 +530,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
byte value;
|
||||
int key[] = {myConverterData.key};
|
||||
int offset[] = {0};
|
||||
int initialToULength = decoder.toULength;
|
||||
byte c;
|
||||
int malformLength = 0;
|
||||
|
||||
@ -571,7 +574,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
/* indicate that the escape sequence is incomplete: key !=0 */
|
||||
return err;
|
||||
} else if (value == INVALID_2022) {
|
||||
return CoderResult.malformedForLength(malformLength);
|
||||
err = CoderResult.malformedForLength(malformLength);
|
||||
} else /* value == VALID_TERMINAL_2022 */ {
|
||||
switch (var) {
|
||||
case ISO_2022_JP: {
|
||||
@ -679,7 +682,39 @@ class CharsetISO2022 extends CharsetICU {
|
||||
}
|
||||
if (!err.isError()) {
|
||||
decoder.toULength = 0;
|
||||
} else if (err.isMalformed()) {
|
||||
if (decoder.toULength > 1) {
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte (ESC) in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequece before the first one of those.
|
||||
* In escape sequences, all following bytes are "printable", that is,
|
||||
* unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
|
||||
* they are valid single/lead bytes.
|
||||
* For simplicity, we always only report the initial ESC byte as the
|
||||
* illegal sequence and back out all other bytes we looked at.
|
||||
*/
|
||||
/* Back out some bytes. */
|
||||
int backOutDistance = decoder.toULength - 1;
|
||||
int bytesFromThisBuffer = decoder.toULength - initialToULength;
|
||||
if (backOutDistance <= bytesFromThisBuffer) {
|
||||
/* same as initialToULength<=1 */
|
||||
source.position(source.position() - backOutDistance);
|
||||
} else {
|
||||
/* Back out bytes from the previous buffer: Need to replay them. */
|
||||
decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
|
||||
/* same as -(initalToULength-1) */
|
||||
/* preToULength is negative! */
|
||||
for (int i = 0; i < -(decoder.preToULength); i++) {
|
||||
decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
|
||||
}
|
||||
source.position(source.position() - bytesFromThisBuffer);
|
||||
}
|
||||
decoder.toULength = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -820,7 +855,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
gotoEscape = true;
|
||||
} else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
|
||||
/* continue with a partial double-byte character */
|
||||
mySourceChar = toUBytesArray[0];
|
||||
mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
toULength = 0;
|
||||
cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
|
||||
// goto getTrailByte;
|
||||
@ -838,7 +873,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
|
||||
if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
|
||||
if (!gotoEscape && !gotoGetTrail) {
|
||||
mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
|
||||
mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
|
||||
mySourceCharTemp = mySourceChar;
|
||||
}
|
||||
|
||||
@ -963,26 +998,48 @@ class CharsetISO2022 extends CharsetICU {
|
||||
// getTrailByte:
|
||||
int tmpSourceChar;
|
||||
gotoGetTrail = false;
|
||||
byte trailByte;
|
||||
trailByte = source.get();
|
||||
tmpSourceChar = (mySourceChar << 8) | (short)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
|
||||
if (cs == JISX208) {
|
||||
_2022ToSJIS((char)(UConverterConstants.UNSIGNED_BYTE_MASK & mySourceChar),
|
||||
(char)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte), tempBuf);
|
||||
} else {
|
||||
if (cs == KSC5601) {
|
||||
tmpSourceChar = _2022ToGR94DBCS(tmpSourceChar);
|
||||
short trailByte;
|
||||
boolean leadIsOk, trailIsOk;
|
||||
|
||||
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those.
|
||||
*
|
||||
* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
|
||||
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
|
||||
* Otherwise we convert or report the pair of bytes.
|
||||
*/
|
||||
leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
|
||||
trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
|
||||
if (leadIsOk && trailIsOk) {
|
||||
source.get();
|
||||
tmpSourceChar = (mySourceChar << 8) | trailByte;
|
||||
if (cs == JISX208) {
|
||||
_2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
|
||||
mySourceChar = tmpSourceChar;
|
||||
} else {
|
||||
/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
|
||||
mySourceChar = tmpSourceChar;
|
||||
if (cs == KSC5601) {
|
||||
tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
|
||||
}
|
||||
tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
|
||||
tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
|
||||
}
|
||||
tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
|
||||
tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
|
||||
} else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
|
||||
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
|
||||
source.get();
|
||||
/* add another bit so that the code below writes 2 bytes in case of error */
|
||||
mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
|
||||
}
|
||||
ByteBuffer tempByteBuf = ByteBuffer.wrap(tempBuf);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], tempByteBuf, false);
|
||||
mySourceChar = tmpSourceChar;
|
||||
} else {
|
||||
toUBytesArray[0] = (byte)mySourceChar;
|
||||
toULength = 1;
|
||||
// goto endloop;
|
||||
// goto endloop
|
||||
return err;
|
||||
}
|
||||
} /* end of inner switch */
|
||||
@ -1056,8 +1113,9 @@ class CharsetISO2022 extends CharsetICU {
|
||||
gotoEscape = true;
|
||||
} else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
|
||||
/* continue with a partial double-byte character */
|
||||
mySourceChar = toUBytesArray[0];
|
||||
mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
toULength = 0;
|
||||
targetUniChar = UConverterConstants.missingCharMarker;
|
||||
// goto getTrailByte
|
||||
gotoGetTrailByte = true;
|
||||
}
|
||||
@ -1139,36 +1197,58 @@ class CharsetISO2022 extends CharsetICU {
|
||||
UConverterSharedData cnv;
|
||||
byte tempState;
|
||||
int tempBufLen;
|
||||
byte trailByte;
|
||||
boolean leadIsOk, trailIsOk;
|
||||
short trailByte;
|
||||
// getTrailByte: label
|
||||
gotoGetTrailByte = false; // reset gotoGetTrailByte
|
||||
|
||||
trailByte = source.get();
|
||||
tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
|
||||
if (tempState > CNS_11643_0) {
|
||||
cnv = myConverterData.myConverterArray[CNS_11643];
|
||||
tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
|
||||
tempBuf[1] = (byte)(mySourceChar);
|
||||
tempBuf[2] = trailByte;
|
||||
tempBufLen = 3;
|
||||
} else {
|
||||
cnv = myConverterData.myConverterArray[tempState];
|
||||
tempBuf[0] = (byte)(mySourceChar);
|
||||
tempBuf[1] = trailByte;
|
||||
tempBufLen = 2;
|
||||
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those.
|
||||
*
|
||||
* In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
|
||||
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
|
||||
* Otherwise we convert or report the pair of bytes.
|
||||
*/
|
||||
leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
|
||||
trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
|
||||
if (leadIsOk && trailIsOk) {
|
||||
source.get();
|
||||
tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
|
||||
if (tempState > CNS_11643_0) {
|
||||
cnv = myConverterData.myConverterArray[CNS_11643];
|
||||
tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
|
||||
tempBuf[1] = (byte)mySourceChar;
|
||||
tempBuf[2] = (byte)trailByte;
|
||||
tempBufLen = 3;
|
||||
} else {
|
||||
cnv = myConverterData.myConverterArray[tempState];
|
||||
tempBuf[0] = (byte)mySourceChar;
|
||||
tempBuf[1] = (byte)trailByte;
|
||||
tempBufLen = 2;
|
||||
}
|
||||
ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
|
||||
tempBuffer.limit(tempBufLen);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
|
||||
mySourceChar = (mySourceChar << 8) | trailByte;
|
||||
|
||||
} else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
|
||||
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
|
||||
source.get();
|
||||
/* add another bit so that the code below writes 2 bytes in case of error */
|
||||
mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
|
||||
}
|
||||
mySourceChar = (mySourceChar << 8) | (UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
|
||||
if (myConverterData.toU2022State.g >= 2) {
|
||||
/* return from a single-shift state to the previous one */
|
||||
myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
|
||||
}
|
||||
ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
|
||||
tempBuffer.limit(tempBufLen);
|
||||
tempBuffer.position(0);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
|
||||
} else {
|
||||
toUBytesArray[0] = (byte)mySourceChar;
|
||||
toULength = 1;
|
||||
// goto endloop;
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
@ -1228,7 +1308,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
|
||||
CoderResult err = CoderResult.UNDERFLOW;
|
||||
char mySourceChar = 0x0000;
|
||||
int mySourceChar = 0x0000;
|
||||
int targetUniChar = 0x0000;
|
||||
byte[] tempBuf = new byte[2];
|
||||
boolean usingFallback;
|
||||
@ -1247,7 +1327,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
gotoEscape = true;
|
||||
} else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
|
||||
/* continue with a partial double-byte character */
|
||||
mySourceChar = (char)toUBytesArray[0];
|
||||
mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
toULength = 0;
|
||||
gotoGetTrailByte = true;
|
||||
}
|
||||
@ -1255,7 +1335,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
|
||||
if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
|
||||
if (!gotoGetTrailByte && !gotoEscape) {
|
||||
mySourceChar = (char)(source.get()&UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
}
|
||||
|
||||
if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
|
||||
@ -1290,31 +1370,52 @@ class CharsetISO2022 extends CharsetICU {
|
||||
myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
|
||||
if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
|
||||
if (source.hasRemaining() || gotoGetTrailByte) {
|
||||
boolean leadIsOk, trailIsOk;
|
||||
short trailByte;
|
||||
// getTrailByte label
|
||||
gotoGetTrailByte = false; // reset gotoGetTrailByte flag
|
||||
|
||||
byte trailByte;
|
||||
trailByte = source.get();
|
||||
tempBuf[0] = (byte)(mySourceChar + 0x80);
|
||||
tempBuf[1] = (byte)(trailByte + 0x80);
|
||||
mySourceChar = (char)((mySourceChar << 8) | (short)(trailByte&UConverterConstants.UNSIGNED_BYTE_MASK));
|
||||
if ((mySourceChar & 0x8080) == 0) {
|
||||
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
targetUniChar = UConverterConstants.missingCharMarker;
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those.
|
||||
*
|
||||
* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
|
||||
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
|
||||
* Otherwise we convert or report the pair of bytes.
|
||||
*/
|
||||
leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
|
||||
trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
|
||||
if (leadIsOk && trailIsOk) {
|
||||
source.get();
|
||||
tempBuf[0] = (byte)(mySourceChar + 0x80);
|
||||
tempBuf[1] = (byte)(trailByte + 0x80);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
|
||||
} else {
|
||||
/* illegal bytes > 0x7f */
|
||||
targetUniChar = UConverterConstants.missingCharMarker;
|
||||
mySourceChar = (char)((mySourceChar << 8) | trailByte);
|
||||
} else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
|
||||
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
|
||||
source.get();
|
||||
/* add another bit so that the code below writes 2 bytes in case of error */
|
||||
mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
|
||||
}
|
||||
} else {
|
||||
toUBytesArray[0] = (byte)mySourceChar;
|
||||
toULength = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
int oldSourceLimit = source.limit();
|
||||
} else if (mySourceChar <= 0x7f) {
|
||||
int savedSourceLimit = source.limit();
|
||||
int savedSourcePosition = source.position();
|
||||
source.limit(source.position());
|
||||
source.position(source.position()-1);
|
||||
targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
|
||||
source.limit(oldSourceLimit);
|
||||
source.limit(savedSourceLimit);
|
||||
source.position(savedSourcePosition);
|
||||
} else {
|
||||
targetUniChar = 0xffff;
|
||||
}
|
||||
if (targetUniChar < 0xfffe) {
|
||||
target.put((char)targetUniChar);
|
||||
@ -1412,7 +1513,7 @@ class CharsetISO2022 extends CharsetICU {
|
||||
}
|
||||
}
|
||||
|
||||
if (err.isError() || (source.position() == source.limit())) {
|
||||
if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@ -2580,7 +2681,11 @@ class CharsetISO2022 extends CharsetICU {
|
||||
}
|
||||
/* only DBCS or SBCS characters are expected */
|
||||
/* DB characters with high bit set to 1 are expected */
|
||||
if (length > 2 || length == 0 || (((targetByteUnit[0] & 0x8080) != 0x8080) && length == 2)) {
|
||||
if (length > 2 || length == 0 ||
|
||||
(length == 1 && targetByteUnit[0] > 0x7f) ||
|
||||
(length ==2 &&
|
||||
((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
|
||||
((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
|
||||
targetByteUnit[0] = UConverterConstants.missingCharMarker;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2006-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
@ -1573,330 +1573,8 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
|
||||
CoderResult[] cr = { CoderResult.UNDERFLOW };
|
||||
|
||||
int sourceArrayIndex;
|
||||
int stateTable[][/* 256 */];
|
||||
char[] unicodeCodeUnits;
|
||||
|
||||
int offset;
|
||||
byte state;
|
||||
int byteIndex;
|
||||
byte[] bytes;
|
||||
|
||||
int sourceIndex, nextSourceIndex;
|
||||
|
||||
int entry = 0;
|
||||
char c;
|
||||
byte action;
|
||||
|
||||
if (preToULength > 0) {
|
||||
/*
|
||||
* pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
|
||||
* continuous offsets
|
||||
*/
|
||||
cr[0] = continueMatchToU(source, target, offsets, -1, flush);
|
||||
|
||||
if (cr[0].isError() || preToULength < 0) {
|
||||
return cr[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (sharedData.mbcs.countStates == 1) {
|
||||
if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
|
||||
} else {
|
||||
cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
|
||||
}
|
||||
return cr[0];
|
||||
}
|
||||
|
||||
/* set up the local pointers */
|
||||
sourceArrayIndex = source.position();
|
||||
|
||||
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
|
||||
stateTable = sharedData.mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable = sharedData.mbcs.stateTable;
|
||||
}
|
||||
unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
offset = (int) toUnicodeStatus;
|
||||
byteIndex = toULength;
|
||||
bytes = toUBytesArray;
|
||||
|
||||
/*
|
||||
* if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
|
||||
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
|
||||
*/
|
||||
state = (byte)mode;
|
||||
if (state == 0) {
|
||||
state = sharedData.mbcs.dbcsOnlyState;
|
||||
}
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex = byteIndex == 0 ? 0 : -1;
|
||||
nextSourceIndex = 0;
|
||||
|
||||
/* conversion loop */
|
||||
while (sourceArrayIndex < source.limit()) {
|
||||
/*
|
||||
* This following test is to see if available input would overflow the output. It does not catch output
|
||||
* of more than one code unit that overflows as a result of a surrogate pair or callback output from the
|
||||
* last source byte. Therefore, those situations also test for overflows and will then break the loop,
|
||||
* too.
|
||||
*/
|
||||
if (!target.hasRemaining()) {
|
||||
/* target is full */
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
break;
|
||||
}
|
||||
|
||||
if (byteIndex == 0) {
|
||||
/* optimized loop for 1/2-byte input and BMP output */
|
||||
// agljport:todo see ucnvmbcs.c for deleted block
|
||||
do {
|
||||
entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
++sourceArrayIndex;
|
||||
if (sourceArrayIndex < source.limit()
|
||||
&& MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
|
||||
&& MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
|
||||
&& (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
|
||||
++sourceArrayIndex;
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
sourceIndex = (nextSourceIndex += 2);
|
||||
}
|
||||
state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
offset = 0;
|
||||
} else {
|
||||
/* set the state and leave the optimized loop */
|
||||
++nextSourceIndex;
|
||||
bytes[0] = source.get(sourceArrayIndex - 1);
|
||||
byteIndex = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
|
||||
/* output BMP code point */
|
||||
++sourceArrayIndex;
|
||||
target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
sourceIndex = ++nextSourceIndex;
|
||||
}
|
||||
state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
} else {
|
||||
/* leave the optimized loop */
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (sourceArrayIndex < source.limit() && target.hasRemaining());
|
||||
/*
|
||||
* these tests and break statements could be put inside the loop if C had "break outerLoop" like
|
||||
* Java
|
||||
*/
|
||||
if (sourceArrayIndex >= source.limit()) {
|
||||
break;
|
||||
}
|
||||
if (!target.hasRemaining()) {
|
||||
/* target is full */
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
break;
|
||||
}
|
||||
|
||||
++nextSourceIndex;
|
||||
bytes[byteIndex++] = source.get(sourceArrayIndex++);
|
||||
} else /* byteIndex>0 */{
|
||||
++nextSourceIndex;
|
||||
entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
|
||||
& UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
}
|
||||
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
|
||||
mode = state;
|
||||
|
||||
/* set the next state early so that we can reuse the entry variable */
|
||||
state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
|
||||
/*
|
||||
* An if-else-if chain provides more reliable performance for the most common cases compared to a
|
||||
* switch.
|
||||
*/
|
||||
action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
|
||||
if (action == MBCS_STATE_VALID_16) {
|
||||
offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c = unicodeCodeUnits[offset];
|
||||
if (c < 0xfffe) {
|
||||
/* output BMP code point */
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (c == 0xfffe) {
|
||||
if (isFallbackUsed() && (entry = (int) getFallback(sharedData.mbcs, offset)) != 0xfffe) {
|
||||
/* output fallback BMP code point */
|
||||
target.put((char) entry);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
}
|
||||
} else {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_VALID_DIRECT_16) {
|
||||
/* output BMP code point */
|
||||
target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (action == MBCS_STATE_VALID_16_PAIR) {
|
||||
offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c = unicodeCodeUnits[offset++];
|
||||
if (c < 0xd800) {
|
||||
/* output BMP code point below 0xd800 */
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
|
||||
/* output roundtrip or fallback surrogate pair */
|
||||
target.put((char) (c & 0xdbff));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
if (target.hasRemaining()) {
|
||||
target.put(unicodeCodeUnits[offset]);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
} else {
|
||||
/* target overflow */
|
||||
charErrorBufferArray[0] = unicodeCodeUnits[offset];
|
||||
charErrorBufferLength = 1;
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
|
||||
offset = 0;
|
||||
break;
|
||||
}
|
||||
} else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
|
||||
/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
|
||||
target.put(unicodeCodeUnits[offset]);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (c == 0xffff) {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_VALID_DIRECT_20
|
||||
|| (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
|
||||
entry = MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
/* output surrogate pair */
|
||||
target.put((char) (0xd800 | (char) (entry >> 10)));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
c = (char) (0xdc00 | (char) (entry & 0x3ff));
|
||||
if (target.hasRemaining()) {
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
} else {
|
||||
/* target overflow */
|
||||
charErrorBufferArray[0] = c;
|
||||
charErrorBufferLength = 1;
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
|
||||
offset = 0;
|
||||
break;
|
||||
}
|
||||
} else if (action == MBCS_STATE_CHANGE_ONLY) {
|
||||
/*
|
||||
* This serves as a state change without any output. It is useful for reading simple stateful
|
||||
* encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
|
||||
* for more sophisticated state transitions.
|
||||
*/
|
||||
if (sharedData.mbcs.dbcsOnlyState == 0) {
|
||||
byteIndex = 0;
|
||||
} else {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
state = (byte) (mode); /* restore the previous state */
|
||||
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if (isFallbackUsed()) {
|
||||
/* output BMP code point */
|
||||
target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
}
|
||||
} else if (action == MBCS_STATE_UNASSIGNED) {
|
||||
/* just fall through */
|
||||
} else if (action == MBCS_STATE_ILLEGAL) {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
} else {
|
||||
/* reserved, must never occur */
|
||||
byteIndex = 0;
|
||||
}
|
||||
|
||||
/* end of action codes: prepare for a new character */
|
||||
offset = 0;
|
||||
|
||||
if (byteIndex == 0) {
|
||||
sourceIndex = nextSourceIndex;
|
||||
} else if (cr[0].isError()) {
|
||||
/* callback(illegal) */
|
||||
break;
|
||||
} else /* unassigned sequences indicated with byteIndex>0 */{
|
||||
/* try an extension mapping */
|
||||
int sourceBeginIndex = sourceArrayIndex;
|
||||
source.position(sourceArrayIndex);
|
||||
byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
|
||||
sourceArrayIndex = source.position();
|
||||
sourceIndex = nextSourceIndex + (int) (sourceArrayIndex - sourceBeginIndex);
|
||||
|
||||
if (cr[0].isError() || cr[0].isOverflow()) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
toUnicodeStatus = offset;
|
||||
mode = state;
|
||||
toULength = byteIndex;
|
||||
|
||||
/* write back the updated pointers */
|
||||
source.position(sourceArrayIndex);
|
||||
|
||||
return cr[0];
|
||||
/* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
|
||||
return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2253,132 +1931,134 @@ class CharsetMBCS extends CharsetICU {
|
||||
|
||||
CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
|
||||
CoderResult[] cr = { CoderResult.UNDERFLOW };
|
||||
|
||||
int[][] stateTable;
|
||||
|
||||
int sourceArrayIndex, sourceArrayIndexStart;
|
||||
int stateTable[][/* 256 */];
|
||||
char[] unicodeCodeUnits;
|
||||
|
||||
int sourceIndex, nextSourceIndex;
|
||||
|
||||
|
||||
int offset;
|
||||
short state;
|
||||
byte state;
|
||||
int byteIndex;
|
||||
byte[] bytes;
|
||||
|
||||
int entry;
|
||||
|
||||
int sourceIndex, nextSourceIndex;
|
||||
|
||||
int entry = 0;
|
||||
char c;
|
||||
short action;
|
||||
|
||||
if (this.preToULength > 0) {
|
||||
byte action;
|
||||
|
||||
if (preToULength > 0) {
|
||||
/*
|
||||
* pass sourceIndex-1 because we continue from an earlier buffer
|
||||
* in the future, this may change with continuous offsets
|
||||
* pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
|
||||
* continuous offsets
|
||||
*/
|
||||
cr[0] = continueMatchToU(source, target, offsets, -1, flush);
|
||||
if (cr[0].isError() || this.preToULength < 0) {
|
||||
|
||||
if (cr[0].isError() || preToULength < 0) {
|
||||
return cr[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (sharedData.mbcs.countStates == 1) {
|
||||
if ((sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
|
||||
} else {
|
||||
cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
|
||||
}
|
||||
return cr[0];
|
||||
}
|
||||
|
||||
if ((options&UConverterConstants.OPTION_SWAP_LFNL) != 0) {
|
||||
|
||||
/* set up the local pointers */
|
||||
sourceArrayIndex = sourceArrayIndexStart = source.position();
|
||||
|
||||
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
|
||||
stateTable = sharedData.mbcs.swapLFNLStateTable;
|
||||
} else {
|
||||
stateTable = sharedData.mbcs.stateTable;
|
||||
}
|
||||
unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
|
||||
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
offset = this.toUnicodeStatus;
|
||||
byteIndex = this.toULength;
|
||||
bytes = this.toUBytesArray;
|
||||
|
||||
offset = (int)toUnicodeStatus;
|
||||
byteIndex = toULength;
|
||||
bytes = toUBytesArray;
|
||||
|
||||
/*
|
||||
* if we are in the SBCS state for a DBCS-only converter,
|
||||
* then load the DBCS state from the MBCS data
|
||||
* if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
|
||||
* (dbcsOnlyState==0 if it is not a DBCS-only converter)
|
||||
*/
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&this.mode);
|
||||
state = (byte)mode;
|
||||
if (state == 0) {
|
||||
state = sharedData.mbcs.dbcsOnlyState;
|
||||
}
|
||||
|
||||
/* sourceIndex=-1 if the current character begain in the previous buffer */
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex = byteIndex == 0 ? 0 : -1;
|
||||
nextSourceIndex = 0;
|
||||
|
||||
|
||||
/* conversion loop */
|
||||
while (source.hasRemaining()) {
|
||||
while (sourceArrayIndex < source.limit()) {
|
||||
/*
|
||||
* This following test is to see if available input would overflow the output.
|
||||
* It does not catch output of more than one code unit that
|
||||
* overflows as a result of a surrogate pair or callback output
|
||||
* from the last source byte.
|
||||
* Therefore, those situations also test for overflows and will
|
||||
* then break the loop, too.
|
||||
* This following test is to see if available input would overflow the output. It does not catch output
|
||||
* of more than one code unit that overflows as a result of a surrogate pair or callback output from the
|
||||
* last source byte. Therefore, those situations also test for overflows and will then break the loop,
|
||||
* too.
|
||||
*/
|
||||
if (!target.hasRemaining()) {
|
||||
/* target is full */
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (byteIndex == 0) {
|
||||
/* optimized loop for 1/2-byte input and BMP output */
|
||||
// agljport:todo see ucnvmbcs.c for deleted block
|
||||
do {
|
||||
entry = stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
|
||||
state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
|
||||
source.get();
|
||||
if (source.hasRemaining() &&
|
||||
MBCS_ENTRY_IS_FINAL(entry=stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
|
||||
MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 &&
|
||||
(c = unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
|
||||
source.get();
|
||||
++sourceArrayIndex;
|
||||
if (sourceArrayIndex < source.limit()
|
||||
&& MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
|
||||
&& MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
|
||||
&& (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
|
||||
++sourceArrayIndex;
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
sourceIndex = (nextSourceIndex + 2);
|
||||
sourceIndex = (nextSourceIndex += 2);
|
||||
}
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
|
||||
state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
offset = 0;
|
||||
} else {
|
||||
/* set the state and leave the optimized loop */
|
||||
++nextSourceIndex;
|
||||
bytes[0] = source.get(source.position()-1);
|
||||
bytes[0] = source.get(sourceArrayIndex - 1);
|
||||
byteIndex = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
|
||||
/* output BMP code point */
|
||||
source.get();
|
||||
++sourceArrayIndex;
|
||||
target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
sourceIndex = ++nextSourceIndex;
|
||||
}
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
|
||||
state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
} else {
|
||||
/* leave the optimized loop */
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (source.hasRemaining() && target.hasRemaining());
|
||||
|
||||
/* these tests and break statements could be put inside the loop
|
||||
* if C had "break outerLoop" like Java
|
||||
} while (sourceArrayIndex < source.limit() && target.hasRemaining());
|
||||
/*
|
||||
* these tests and break statements could be put inside the loop if C had "break outerLoop" like
|
||||
* Java
|
||||
*/
|
||||
if (!source.hasRemaining()) {
|
||||
if (sourceArrayIndex >= source.limit()) {
|
||||
break;
|
||||
}
|
||||
if (!target.hasRemaining()) {
|
||||
@ -2386,31 +2066,32 @@ class CharsetMBCS extends CharsetICU {
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
++nextSourceIndex;
|
||||
bytes[byteIndex++] = source.get();
|
||||
} else { /* byteIndex>0 */
|
||||
bytes[byteIndex++] = source.get(sourceArrayIndex++);
|
||||
} else /* byteIndex>0 */{
|
||||
++nextSourceIndex;
|
||||
entry = stateTable[state][(short)(bytes[byteIndex++]=source.get()) & UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
|
||||
& UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
}
|
||||
|
||||
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
|
||||
offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* save the previous state for proper extension mapping with SI/SO-stateful converters */
|
||||
mode = state;
|
||||
|
||||
|
||||
/* set the next state early so that we can reuse the entry variable */
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
|
||||
|
||||
state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
|
||||
|
||||
/*
|
||||
* An if-else-if chain provides more reliable performance for
|
||||
* the most common cases compared to a switch.
|
||||
* An if-else-if chain provides more reliable performance for the most common cases compared to a
|
||||
* switch.
|
||||
*/
|
||||
action = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_ACTION(entry));
|
||||
action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
|
||||
if (action == MBCS_STATE_VALID_16) {
|
||||
offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c = unicodeCodeUnits[offset];
|
||||
@ -2422,7 +2103,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (c == 0xfffe) {
|
||||
if (CharsetDecoderICU.isToUUseFallback() && (entry = (int)getFallback(sharedData.mbcs, offset)) != 0xfffe) {
|
||||
if (isFallbackUsed() && (entry = (int)getFallback(sharedData.mbcs, offset)) != 0xfffe) {
|
||||
/* output fallback BMP code point */
|
||||
target.put((char)entry);
|
||||
if (offsets != null) {
|
||||
@ -2432,7 +2113,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
} else {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(1);
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_VALID_DIRECT_16) {
|
||||
/* output BMP code point */
|
||||
@ -2451,9 +2132,9 @@ class CharsetMBCS extends CharsetICU {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
} else if (CharsetDecoderICU.isToUUseFallback() ? c<=0xdfff : c<=0xdbff) {
|
||||
} else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
|
||||
/* output roundtrip or fallback surrogate pair */
|
||||
target.put((char)(c&0xdbff));
|
||||
target.put((char)(c & 0xdbff));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
@ -2468,11 +2149,11 @@ class CharsetMBCS extends CharsetICU {
|
||||
charErrorBufferArray[0] = unicodeCodeUnits[offset];
|
||||
charErrorBufferLength = 1;
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
|
||||
|
||||
offset = 0;
|
||||
break;
|
||||
}
|
||||
} else if (CharsetDecoderICU.isToUUseFallback() ? (c&0xfffe)==0xe000 : c==0xe000) {
|
||||
} else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
|
||||
/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
|
||||
target.put(unicodeCodeUnits[offset]);
|
||||
if (offsets != null) {
|
||||
@ -2481,18 +2162,18 @@ class CharsetMBCS extends CharsetICU {
|
||||
byteIndex = 0;
|
||||
} else if (c == 0xffff) {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(1);
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_VALID_DIRECT_20 ||
|
||||
action == MBCS_STATE_FALLBACK_DIRECT_20 && CharsetDecoderICU.isToUUseFallback()) {
|
||||
} else if (action == MBCS_STATE_VALID_DIRECT_20
|
||||
|| (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
|
||||
entry = MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
/* output surrogate pair */
|
||||
target.put((char)(0xd800 | (char)(entry&0x3ff)));
|
||||
target.put((char)(0xd800 | (char)(entry >> 10)));
|
||||
if (offsets != null) {
|
||||
offsets.put(sourceIndex);
|
||||
}
|
||||
byteIndex = 0;
|
||||
c = (char)(0xdc00 | (char)(entry>>10));
|
||||
c = (char)(0xdc00 | (char)(entry & 0x3ff));
|
||||
if (target.hasRemaining()) {
|
||||
target.put(c);
|
||||
if (offsets != null) {
|
||||
@ -2503,30 +2184,27 @@ class CharsetMBCS extends CharsetICU {
|
||||
charErrorBufferArray[0] = c;
|
||||
charErrorBufferLength = 1;
|
||||
cr[0] = CoderResult.OVERFLOW;
|
||||
|
||||
|
||||
offset = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
} else if (action == MBCS_STATE_CHANGE_ONLY) {
|
||||
/*
|
||||
* This serves as a state change without any output.
|
||||
* It is useful for reading simple stateful encodings,
|
||||
* for example using just Shift-In/Shift-Out codes.
|
||||
* The 21 unused bits may later be used for more sophisticated
|
||||
* state transistions.
|
||||
* This serves as a state change without any output. It is useful for reading simple stateful
|
||||
* encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
|
||||
* for more sophisticated state transitions.
|
||||
*/
|
||||
if (sharedData.mbcs.dbcsOnlyState == 0) {
|
||||
byteIndex = 0;
|
||||
} else {
|
||||
/* SI/SO are illegal for DBCS-only conversion */
|
||||
state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&mode); /* restore the previous state */
|
||||
|
||||
state = (byte)(mode); /* restore the previous state */
|
||||
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(1);
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
}
|
||||
} else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if (CharsetDecoderICU.isToUUseFallback()) {
|
||||
if (isFallbackUsed()) {
|
||||
/* output BMP code point */
|
||||
target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
|
||||
if (offsets != null) {
|
||||
@ -2538,37 +2216,70 @@ class CharsetMBCS extends CharsetICU {
|
||||
/* just fall through */
|
||||
} else if (action == MBCS_STATE_ILLEGAL) {
|
||||
/* callback(illegal) */
|
||||
cr[0] = CoderResult.malformedForLength(1);
|
||||
cr[0] = CoderResult.malformedForLength(byteIndex);
|
||||
} else {
|
||||
/* reserved, must never occur */
|
||||
byteIndex = 0;
|
||||
}
|
||||
|
||||
/* end of action codes: prepare for new character */
|
||||
|
||||
/* end of action codes: prepare for a new character */
|
||||
offset = 0;
|
||||
|
||||
|
||||
if (byteIndex == 0) {
|
||||
sourceIndex = nextSourceIndex;
|
||||
} else if (cr[0].isError()) {
|
||||
/* callback(illegal) */
|
||||
if (byteIndex > 1) {
|
||||
/*
|
||||
* Ticket 5691: consistent illegal sequences:
|
||||
* - We include at least the first byte in the illegal sequence.
|
||||
* - If any of the non-initial bytes could be the start of a character,
|
||||
* we stop the illegal sequence before the first one of those.
|
||||
*/
|
||||
boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
|
||||
byte i;
|
||||
for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
|
||||
if (i < byteIndex) {
|
||||
byte backOutDistance = (byte)(byteIndex - i);
|
||||
int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
|
||||
byteIndex = i; /* length of reported illegal byte sequence */
|
||||
if (backOutDistance <= bytesFromThisBuffer) {
|
||||
sourceArrayIndex -= backOutDistance;
|
||||
} else {
|
||||
/* Back out bytes from the previous buffer: Need to replay them. */
|
||||
this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
|
||||
/* preToULength is negative! */
|
||||
for (int n = 0; n < -this.preToULength; n++) {
|
||||
this.preToUArray[n] = bytes[i+n];
|
||||
}
|
||||
sourceArrayIndex = sourceArrayIndexStart;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else { /* unassigned sequences indicated with byteIndex>0 */
|
||||
} else /* unassigned sequences indicated with byteIndex>0 */{
|
||||
/* try an extension mapping */
|
||||
int sourceBeginIndex = sourceArrayIndex;
|
||||
source.position(sourceArrayIndex);
|
||||
byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
|
||||
sourceIndex = nextSourceIndex + source.position();
|
||||
|
||||
if (cr[0].isError()) {
|
||||
sourceArrayIndex = source.position();
|
||||
sourceIndex = nextSourceIndex += (int)(sourceArrayIndex - sourceBeginIndex);
|
||||
|
||||
if (cr[0].isError() || cr[0].isOverflow()) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
toUnicodeStatus = offset;
|
||||
mode = state;
|
||||
toULength = byteIndex;
|
||||
|
||||
|
||||
/* write back the updated pointers */
|
||||
source.position(sourceArrayIndex);
|
||||
|
||||
return cr[0];
|
||||
}
|
||||
/*
|
||||
@ -2908,8 +2619,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
/* conversion loop */
|
||||
while (true) {
|
||||
// entry=stateTable[state][(uint8_t)source[i++]];
|
||||
entry = stateTable[state][source.get() & UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
i = source.position();
|
||||
entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];
|
||||
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
@ -2991,8 +2701,8 @@ class CharsetMBCS extends CharsetICU {
|
||||
/* try an extension mapping */
|
||||
if (sharedData.mbcs.extIndexes != null) {
|
||||
/* Increase the limit for proper handling. Used in LMBCS. */
|
||||
if (source.limit() >= source.position() + length) {
|
||||
source.limit(source.position() + length);
|
||||
if (source.limit() > i + length) {
|
||||
source.limit(i + length);
|
||||
}
|
||||
return simpleMatchToU(source, useFallback);
|
||||
}
|
||||
@ -3000,6 +2710,51 @@ class CharsetMBCS extends CharsetICU {
|
||||
|
||||
return c;
|
||||
}
|
||||
private boolean hasValidTrailBytes(int[][] stateTable, short state) {
|
||||
int[] row = stateTable[state];
|
||||
int b, entry;
|
||||
/* First test for final entries in this state for some commonly valid byte values. */
|
||||
entry = row[0xa1];
|
||||
if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
|
||||
return true;
|
||||
}
|
||||
entry = row[0x41];
|
||||
if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
|
||||
return true;
|
||||
}
|
||||
/* Then test for final entries in this state. */
|
||||
for (b = 0; b <= 0xff; b++) {
|
||||
entry = row[b];
|
||||
if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/* Then recurse for transition entries. */
|
||||
for (b = 0; b <= 0xff; b++) {
|
||||
entry = row[b];
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry) &&
|
||||
hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
|
||||
int[] row = stateTable[state];
|
||||
int entry = row[b];
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
|
||||
return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));
|
||||
} else {
|
||||
short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
|
||||
return false; /* SI/SO are illegal for DBCS-only conversion */
|
||||
} else {
|
||||
return (action != MBCS_STATE_ILLEGAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 2006-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
@ -4809,21 +4809,6 @@ public class TestCharset extends TestFmwk {
|
||||
if (!result.isOverflow()) {
|
||||
errln("Overflow buffer while decoding ISO-2022-KR should have occurred.");
|
||||
}
|
||||
|
||||
/* This is part of the ambiguous converter test in ICU4C and is used here to provide
|
||||
* better code coverage.
|
||||
*/
|
||||
byte [] bytearray2 = {
|
||||
0x61, 0x5b, 0x5c
|
||||
};
|
||||
|
||||
bb = ByteBuffer.wrap(bytearray2);
|
||||
cb = CharBuffer.allocate(20);
|
||||
|
||||
result = decoder.decode(bb, cb, true);
|
||||
if (!result.isMalformed()) {
|
||||
errln("Malformed error while decoding ISO-2022-KR should have occurred.");
|
||||
}
|
||||
}
|
||||
|
||||
//provide better code coverage for Charset ISO-2022-JP
|
||||
|
@ -1090,38 +1090,16 @@ public class TestConversion extends ModuleTest {
|
||||
output.limit(output.position());
|
||||
output.rewind();
|
||||
|
||||
//TODO: Fix Me! After Ticket#6583 is completed, this code should be removed.
|
||||
boolean ignoreError = (0 <= cc.caseNr && cc.caseNr <= 15) || cc.caseNr == 17 || cc.caseNr == 18;
|
||||
//TODO: End
|
||||
|
||||
// test to see if the conversion matches actual results
|
||||
if (output.limit() != expected.length()) {
|
||||
//TODO: Remove this
|
||||
if (ignoreError) {
|
||||
logln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
|
||||
} else {
|
||||
errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
|
||||
res = false;
|
||||
}
|
||||
//TODO: End
|
||||
// errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
|
||||
// res = false;
|
||||
errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
|
||||
res = false;
|
||||
} else {
|
||||
for (int i = 0; i < expected.length(); i++) {
|
||||
if (output.get(i) != expected.charAt(i)) {
|
||||
//TODO: Remove this
|
||||
if (ignoreError) {
|
||||
logln("Test failed: output does not match expected for charset: " + cc.charset
|
||||
+ " [" + cc.caseNr + "]");
|
||||
} else {
|
||||
errln("Test failed: output does not match expected for charset: " + cc.charset
|
||||
+ " [" + cc.caseNr + "]");
|
||||
res = false;
|
||||
}
|
||||
//TODO: End
|
||||
// errln("Test failed: output does not match expected for charset: " + cc.charset
|
||||
// + " [" + cc.caseNr + "]");
|
||||
// res = false;
|
||||
errln("Test failed: output does not match expected for charset: " + cc.charset
|
||||
+ " [" + cc.caseNr + "]");
|
||||
res = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user