ICU-5398 make all tests pass

X-SVN-Rev: 20392
This commit is contained in:
Ram Viswanadha 2006-09-23 15:47:25 +00:00
parent 508d771e44
commit cbf0499659
9 changed files with 1050 additions and 909 deletions

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -48,36 +47,34 @@ class Charset88591 extends CharsetICU {
int sourceIndex = 0;
char c=0;
int oldTarget = target.position();
try{
/* conversion loop */
c=0;
while(sourceArrayIndex<source.limit() &&
(c=(char)(source.get(sourceArrayIndex)&0xFF))<=0xff ) {
target.put(c);
sourceArrayIndex++;
}
/* conversion loop */
c=0;
while(sourceArrayIndex<source.limit() &&
(c=(char)(source.get(sourceArrayIndex)&0xFF))<=0xff &&
target.hasRemaining()) {
target.put(c);
sourceArrayIndex++;
}
if(c>0xff) {
/* callback(illegal); copy the current bytes to toUBytes[] */
toUBytesArray[0]=(byte)c;
toULength=1;
cr = CoderResult.malformedForLength(toULength);
} else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
/* target is full */
cr = CoderResult.OVERFLOW;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
}catch(BufferOverflowException ex){
if(c>0xff) {
/* callback(illegal); copy the current bytes to toUBytes[] */
toUBytesArray[0]=(byte)c;
toULength=1;
cr = CoderResult.malformedForLength(toULength);
} else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
/* target is full */
cr = CoderResult.OVERFLOW;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
source.position(sourceArrayIndex);
return cr;
}
@ -114,88 +111,90 @@ class Charset88591 extends CharsetICU {
int oldTarget = target.position();
boolean doloop = true;
try{
if (fromUChar32 != 0 && target.hasRemaining()){
ch = fromUChar32;
fromUChar32 = 0;
if (sourceArrayIndex < source.limit()) {
/* test the following code unit */
char trail = source.get(sourceArrayIndex);
if(UTF16.isTrailSurrogate(trail)) {
++sourceArrayIndex;
ch = UCharacter.getCodePoint((char)ch, trail);
/* convert this supplementary code point */
/* callback(unassigned) */
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
fromUChar32 = (int)ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
} else {
/* no more input */
fromUChar32 = (int)ch;
if (fromUChar32 != 0 && target.hasRemaining()){
ch = fromUChar32;
fromUChar32 = 0;
if (sourceArrayIndex < source.limit()) {
/* test the following code unit */
char trail = source.get(sourceArrayIndex);
if(UTF16.isTrailSurrogate(trail)) {
++sourceArrayIndex;
ch = UCharacter.getCodePoint((char)ch, trail);
/* convert this supplementary code point */
cr = CoderResult.unmappableForLength(sourceArrayIndex);
doloop = false;
}
}
if(doloop){
/* conversion loop */
ch=0;
int ch2=0;
while(sourceArrayIndex<source.limit()){
ch=source.get(sourceArrayIndex++);
if(ch<=0xff) {
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
fromUChar32 = (int)ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
} else {
/* no more input */
fromUChar32 = (int)ch;
doloop = false;
}
}
if(doloop){
/* conversion loop */
ch=0;
int ch2=0;
while(sourceArrayIndex<source.limit()){
ch=source.get(sourceArrayIndex++);
if(ch<=0xff) {
if( target.hasRemaining()){
target.put((byte)ch);
}else {
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}else{
cr = CoderResult.OVERFLOW;
break;
}
}else {
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* ran out of source */
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
source.position(sourceArrayIndex);
return cr;
}

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -48,36 +47,34 @@ class CharsetASCII extends CharsetICU {
int sourceIndex = 0;
char c=0;
int oldTarget = target.position();
try{
/* conversion loop */
c=0;
while(sourceArrayIndex<source.limit()&&
(c=(char)source.get(sourceArrayIndex))<=0x7f){
target.put(c);
sourceArrayIndex++;
}
if(c>0x7f) {
/* callback(illegal); copy the current bytes to toUBytes[] */
toUBytesArray[0]=(byte)c;
toULength=1;
cr = CoderResult.malformedForLength(toULength);
} else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
/* target is full */
cr = CoderResult.OVERFLOW;
}
/* conversion loop */
c=0;
while(sourceArrayIndex<source.limit()&&
(c=(char)source.get(sourceArrayIndex))<=0x7f &&
target.hasRemaining()){
target.put(c);
sourceArrayIndex++;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
}catch(BufferOverflowException ex){
if(c>0x7f) {
/* callback(illegal); copy the current bytes to toUBytes[] */
toUBytesArray[0]=(byte)c;
toULength=1;
cr = CoderResult.malformedForLength(toULength);
} else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
/* target is full */
cr = CoderResult.OVERFLOW;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
source.position(sourceArrayIndex);
return cr;
}
@ -113,88 +110,90 @@ class CharsetASCII extends CharsetICU {
int ch=0;
int oldTarget = target.position();
boolean doloop = true;
try{
if (fromUChar32 != 0 && target.hasRemaining()){
ch = fromUChar32;
fromUChar32 = 0;
if (sourceArrayIndex < source.limit()) {
/* test the following code unit */
char trail = source.get(sourceArrayIndex);
if(UTF16.isTrailSurrogate(trail)) {
++sourceArrayIndex;
ch = UCharacter.getCodePoint((char)ch, trail);
/* convert this supplementary code point */
/* callback(unassigned) */
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
fromUChar32 = (int)ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
if (fromUChar32 != 0 && target.hasRemaining()){
ch = fromUChar32;
fromUChar32 = 0;
if (sourceArrayIndex < source.limit()) {
/* test the following code unit */
char trail = source.get(sourceArrayIndex);
if(UTF16.isTrailSurrogate(trail)) {
++sourceArrayIndex;
ch = UCharacter.getCodePoint((char)ch, trail);
/* convert this supplementary code point */
/* callback(unassigned) */
} else {
/* no more input */
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
fromUChar32 = (int)ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
}
if(doloop){
/* conversion loop */
ch=0;
int ch2=0;
while(sourceArrayIndex<source.limit()){
ch=source.get(sourceArrayIndex++);
if(ch<=0xff) {
}
} else {
/* no more input */
fromUChar32 = (int)ch;
doloop = false;
}
}
if(doloop){
/* conversion loop */
ch=0;
int ch2=0;
while(sourceArrayIndex<source.limit()){
ch=source.get(sourceArrayIndex++);
if(ch<=0xff) {
if(target.hasRemaining()){
target.put((byte)ch);
}else {
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}else{
cr = CoderResult.OVERFLOW;
break;
}
}else {
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* ran out of source */
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
}
/* set offsets since the start */
if(offsets!=null) {
count=target.position()-oldTarget;
while(count>0) {
offsets.put(sourceIndex++);
--count;
}
}
source.position(sourceArrayIndex);
return cr;
}

View File

@ -9,7 +9,6 @@
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetDecoder;

View File

@ -170,6 +170,10 @@ public abstract class CharsetICU extends Charset{
throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
}
static final boolean isSurrogate(int c){
return (((c)&0xfffff800)==0xd800);
}
/**
* Always use fallbacks from codepage to Unicode?
* @draft ICU 3.6

View File

@ -409,8 +409,7 @@ class CharsetUTF16 extends CharsetICU {
length=4; /* 4 bytes to output */
c=0;
} else {
/* unmatched lead surrogate */
//pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} else {

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -51,200 +50,199 @@ class CharsetUTF16LE extends CharsetICU {
char c=0, trail;
length = source.remaining();
sourceArrayIndex = source.position();
try{
/* complete a partial UChar or pair from the last call */
if(toUnicodeStatus!=0) {
/*
* special case: single byte from a previous buffer,
* where the byte turned out not to belong to a trail surrogate
* and the preceding, unmatched lead surrogate was put into toUBytes[]
* for error handling
*/
toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
toULength=1;
toUnicodeStatus=0;
}
if((count=toULength)!=0) {
byte[] pArray=toUBytesArray;
int pArrayIndex = toUBytesBegin;
do {
pArray[count++]=source.get(sourceArrayIndex++);
++sourceIndex;
--length;
if(count==2) {
c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(!UTF16.isSurrogate(c)) {
/* output the BMP code point */
target.put(c);
/* complete a partial UChar or pair from the last call */
if(toUnicodeStatus!=0) {
/*
* special case: single byte from a previous buffer,
* where the byte turned out not to belong to a trail surrogate
* and the preceding, unmatched lead surrogate was put into toUBytes[]
* for error handling
*/
toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
toULength=1;
toUnicodeStatus=0;
}
if((count=toULength)!=0) {
byte[] pArray=toUBytesArray;
int pArrayIndex = toUBytesBegin;
do {
pArray[count++]=source.get(sourceArrayIndex++);
++sourceIndex;
--length;
if(count==2) {
c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(!UTF16.isSurrogate(c)) {
/* output the BMP code point */
target.put(c);
if(offsets!=null) {
offsets.put(-1);
}
count=0;
c=0;
break;
} else if(UTF16.isLeadSurrogate(c)) {
/* continue collecting bytes for the trail surrogate */
c=0; /* avoid unnecessary surrogate handling below */
} else {
/* fall through to error handling for an unmatched trail surrogate */
break;
}
} else if(count==4) {
c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(UTF16.isTrailSurrogate(trail)) {
/* output the surrogate pair */
target.put(c);
if(target.remaining()>=1) {
target.put(trail);
if(offsets!=null) {
offsets.put(-1);
offsets.put(-1);
}
count=0;
c=0;
break;
} else if(UTF16.isLeadSurrogate(c)) {
/* continue collecting bytes for the trail surrogate */
c=0; /* avoid unnecessary surrogate handling below */
} else {
/* fall through to error handling for an unmatched trail surrogate */
break;
}
} else if(count==4) {
c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(UTF16.isTrailSurrogate(trail)) {
/* output the surrogate pair */
target.put(c);
if(target.remaining()>=1) {
target.put(trail);
if(offsets!=null) {
offsets.put(-1);
offsets.put(-1);
}
} else /* targetCapacity==1 */ {
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
cr = CoderResult.UNDERFLOW;
}
count=0;
c=0;
break;
} else {
/* unmatched lead surrogate, handle here for consistent toUBytes[] */
/* back out reading the code unit after it */
if((source.position()-sourceArrayIndex)>=2) {
sourceArrayIndex-=2;
} else {
/*
* if the trail unit's first byte was in a previous buffer, then
* we need to put it into a special place because toUBytes[] will be
* used for the lead unit's bytes
*/
toUnicodeStatus=0x100|pArray[pArrayIndex+2];
--sourceArrayIndex;
}
toULength=2;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
} while(length>0);
toULength=(byte)count;
}
/* copy an even number of bytes for complete UChars */
count=2*target.remaining();
if(count>length) {
count=length&~1;
}
if(c==0 && count>0) {
length-=count;
count>>=1;
//targetCapacity-=count;
if(offsets==null) {
do {
c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
} else {
break;
}
} while(--count>0);
} else {
do {
c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
offsets.put(sourceIndex);
sourceIndex+=2;
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=4;
} else {
break;
}
} while(--count>0);
}
if(count==0) {
/* done with the loop for complete UChars */
c=0;
} else {
/* keep c for surrogate handling, trail will be set there */
length+=2*(count-1); /* one more byte pair was consumed than count decremented */
}
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
toUBytesArray[toUBytesBegin+0]=(byte)c;
toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);
toULength=2;
if(UTF16.isLeadSurrogate(c)) {
if(length>=2) {
if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
sourceArrayIndex+=2;
length-=2;
target.put(c);
if(offsets!=null) {
offsets.put(sourceIndex);
}
} else /* targetCapacity==1 */ {
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
toULength=0;
cr = CoderResult.OVERFLOW;
} else {
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
return CoderResult.OVERFLOW;
}
count=0;
c=0;
break;
} else {
/* see if the trail surrogate is in the next buffer */
/* unmatched lead surrogate, handle here for consistent toUBytes[] */
/* back out reading the code unit after it */
if((source.position()-sourceArrayIndex)>=2) {
sourceArrayIndex-=2;
} else {
/*
* if the trail unit's first byte was in a previous buffer, then
* we need to put it into a special place because toUBytes[] will be
* used for the lead unit's bytes
*/
toUnicodeStatus=0x100|pArray[pArrayIndex+2];
--sourceArrayIndex;
}
toULength=2;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} while(length>0);
toULength=(byte)count;
}
/* copy an even number of bytes for complete UChars */
count=2*target.remaining();
if(count>length) {
count=length&~1;
}
if(c==0 && count>0) {
length-=count;
count>>=1;
//targetCapacity-=count;
if(offsets==null) {
do {
c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
} else {
break;
}
} while(--count>0);
} else {
do {
c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
offsets.put(sourceIndex);
sourceIndex+=2;
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=4;
} else {
break;
}
} while(--count>0);
}
/* check for a remaining source byte */
if(!cr.isError()){
if(length>0) {
if(!target.hasRemaining()) {
if(count==0) {
/* done with the loop for complete UChars */
c=0;
} else {
/* keep c for surrogate handling, trail will be set there */
length+=2*(count-1); /* one more byte pair was consumed than count decremented */
}
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
toUBytesArray[toUBytesBegin+0]=(byte)c;
toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);
toULength=2;
if(UTF16.isLeadSurrogate(c)) {
if(length>=2) {
if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
sourceArrayIndex+=2;
length-=2;
target.put(c);
if(offsets!=null) {
offsets.put(sourceIndex);
}
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
toULength=0;
cr = CoderResult.OVERFLOW;
} else {
/* it must be length==1 because otherwise the above would have copied more */
toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} else {
/* see if the trail surrogate is in the next buffer */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
}
/* check for a remaining source byte */
if(!cr.isError()){
if(length>0) {
if(!target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
} else {
/* it must be length==1 because otherwise the above would have copied more */
toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
}
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
@ -289,153 +287,148 @@ class CharsetUTF16LE extends CharsetICU {
int length = source.remaining();
int sourceArrayIndex = source.position();
try{
/* c!=0 indicates in several places outside the main loops that a surrogate was found */
if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
/* the last buffer ended with a lead surrogate, output the surrogate pair */
++sourceArrayIndex;
--length;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
if(offsets!=null && offsets.remaining()>=4) {
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
}
sourceIndex=1;
fromUChar32=c=0;
/* c!=0 indicates in several places outside the main loops that a surrogate was found */
if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
/* the last buffer ended with a lead surrogate, output the surrogate pair */
++sourceArrayIndex;
--length;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
if(offsets!=null && offsets.remaining()>=4) {
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
}
byte overflow[/*4*/] = new byte[4];
if(c==0) {
/* copy an even number of bytes for complete UChars */
int count=2*length;
int targetCapacity = target.remaining();
if(count>targetCapacity) {
count=targetCapacity&~1;
}
/* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
if(offsets==null) {
while(count>0) {
c= source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)c);
target.put((byte)(c>>>8));
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
} else {
break;
}
--count;
}
} else {
while(count>0) {
c=source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)c);
target.put((byte)(c>>>8));
offsets.put(sourceIndex);
offsets.put(sourceIndex++);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=2;
} else {
break;
}
--count;
}
}
sourceIndex=1;
fromUChar32=c=0;
}
byte overflow[/*4*/] = new byte[4];
if(count==0) {
/* done with the loop for complete UChars */
if(length>0 && targetCapacity>0) {
/*
* there is more input and some target capacity -
* it must be targetCapacity==1 because otherwise
* the above would have copied more;
* prepare for overflow output
*/
if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
overflow[0]=(byte)c;
overflow[1]=(byte)(c>>>8);
length=2; /* 2 bytes to output */
c=0;
/* } else { keep c for surrogate handling, length will be set there */
}
if(c==0) {
/* copy an even number of bytes for complete UChars */
int count=2*length;
int targetCapacity = target.remaining();
if(count>targetCapacity) {
count=targetCapacity&~1;
}
/* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
if(offsets==null) {
while(count>0) {
c= source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)c);
target.put((byte)(c>>>8));
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
} else {
length=0;
c=0;
break;
}
} else {
/* keep c for surrogate handling, length will be set there */
targetCapacity+=2*count;
--count;
}
} else {
length=0; /* from here on, length counts the bytes in overflow[] */
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
length=0;
if(UTF16.isLeadSurrogate(c)) {
if(sourceArrayIndex<source.limit()) {
if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
++sourceArrayIndex;
overflow[0]=(byte)c;
overflow[1]=(byte)(c>>>8);
overflow[2]=(byte)trail;
overflow[3]=(byte)(trail>>>8);
length=4; /* 4 bytes to output */
c=0;
} else {
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
while(count>0) {
c=source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)c);
target.put((byte)(c>>>8));
offsets.put(sourceIndex);
offsets.put(sourceIndex++);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)c);
target.put((byte)(c>>>8));
target.put((byte)trail);
target.put((byte)(trail>>>8));
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=2;
} else {
/* see if the trail surrogate is in the next buffer */
break;
}
--count;
}
}
if(count==0) {
/* done with the loop for complete UChars */
if(length>0 && targetCapacity>0) {
/*
* there is more input and some target capacity -
* it must be targetCapacity==1 because otherwise
* the above would have copied more;
* prepare for overflow output
*/
if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
overflow[0]=(byte)c;
overflow[1]=(byte)(c>>>8);
length=2; /* 2 bytes to output */
c=0;
/* } else { keep c for surrogate handling, length will be set there */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
length=0;
c=0;
}
fromUChar32=c;
} else {
/* keep c for surrogate handling, length will be set there */
targetCapacity+=2*count;
}
source.position(sourceArrayIndex);
if(length>0) {
/* output length bytes with overflow (length>targetCapacity>0) */
fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
} else {
length=0; /* from here on, length counts the bytes in overflow[] */
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
length=0;
if(UTF16.isLeadSurrogate(c)) {
if(sourceArrayIndex<source.limit()) {
if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
++sourceArrayIndex;
overflow[0]=(byte)c;
overflow[1]=(byte)(c>>>8);
overflow[2]=(byte)trail;
overflow[3]=(byte)(trail>>>8);
length=4; /* 4 bytes to output */
c=0;
} else {
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} else {
/* see if the trail surrogate is in the next buffer */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
fromUChar32=c;
}
source.position(sourceArrayIndex);
if(length>0) {
/* output length bytes with overflow (length>targetCapacity>0) */
cr = fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
}
return cr;
}

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -42,116 +41,114 @@ class CharsetUTF32 extends CharsetICU {
int sourceArrayIndex = source.position();
int ch, i;
try{
donefornow:
{
/* UTF-8 returns here for only non-offset, this needs to change.*/
if (toUnicodeStatus != 0 && target.hasRemaining()) {
i = toULength; /* restore # of bytes consumed */
ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
toUnicodeStatus = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterConstants.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterConstants.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char)ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
throw new BufferOverflowException();
}
}
donefornow:
{
/* UTF-8 returns here for only non-offset, this needs to change.*/
if (toUnicodeStatus != 0 && target.hasRemaining()) {
i = toULength; /* restore # of bytes consumed */
ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
toUnicodeStatus = 0;
toULength =0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
i = 0;
ch = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterSharedData.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char) ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
throw new BufferOverflowException();
}
}
if (ch <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterConstants.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char)ch);
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
cr = CoderResult.OVERFLOW;
}
}
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break donefornow;
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
/* End of target buffer */
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
i = 0;
ch = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch = (ch << 8) | ((byte)(source.get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterSharedData.MAXIMUM_UTF && !isSurrogate(ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterSharedData.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char) ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
cr = CoderResult.OVERFLOW;
break;
}
}
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
/* End of target buffer */
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
}
@ -193,37 +190,91 @@ class CharsetUTF32 extends CharsetICU {
temp[0] = 0;
int sourceArrayIndex = source.position();
try{
boolean doloop = true;
if (fromUChar32 != 0) {
ch = fromUChar32;
fromUChar32 = 0;
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
boolean doloop = true;
if (fromUChar32 != 0) {
ch = fromUChar32;
fromUChar32 = 0;
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* ran out of source */
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
doloop = false;
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[1] = (byte) (ch >>> 16 & 0x1F);
temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
if (target.hasRemaining()) {
target.put(temp[indexToWrite]);
}
else {
errorBuffer[errorBufferLength++] = temp[indexToWrite];
cr = CoderResult.OVERFLOW;
}
}
}
if(doloop) {
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
ch = source.get(sourceArrayIndex++);
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
else {
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[1] = (byte) (ch >>> 16 & 0x1F);
temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
@ -239,71 +290,12 @@ class CharsetUTF32 extends CharsetICU {
}
}
}
if(doloop) {
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
ch = source.get(sourceArrayIndex++);
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
else {
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[1] = (byte) (ch >>> 16 & 0x1F);
temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
if (target.hasRemaining()) {
target.put(temp[indexToWrite]);
}
else {
errorBuffer[errorBufferLength++] = temp[indexToWrite];
cr = CoderResult.OVERFLOW;
}
}
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
}

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -41,117 +40,115 @@ class CharsetUTF32LE extends CharsetICU {
int sourceArrayIndex = source.position();
int ch, i;
donefornow:
{
/* UTF-8 returns here for only non-offset, this needs to change.*/
if (toUnicodeStatus != 0 && target.hasRemaining()) {
i = toULength; /* restore # of bytes consumed */
try{
donefornow:
{
/* UTF-8 returns here for only non-offset, this needs to change.*/
if (toUnicodeStatus != 0 && target.hasRemaining()) {
i = toULength; /* restore # of bytes consumed */
ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
toUnicodeStatus = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterConstants.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterConstants.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char)ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
throw new BufferOverflowException();
}
}
ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
toUnicodeStatus = 0;
toULength=0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
i = 0;
ch = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterSharedData.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char) ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
throw new BufferOverflowException();
}
}
if (ch <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterConstants.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char)ch);
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
cr = CoderResult.OVERFLOW;
}
}
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break donefornow;
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
/* End of target buffer */
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
i = 0;
ch = 0;
while (i < 4) {
if (sourceArrayIndex < source.limit()) {
ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
}
else {
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
toUnicodeStatus = ch + 1;
toULength = (byte) i;
break donefornow;
}
}
if (ch <= UConverterSharedData.MAXIMUM_UTF && !isSurrogate(ch)) {
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= UConverterSharedData.MAXIMUM_UCS2)
{
/* fits in 16 bits */
target.put((char) ch);
}
else {
/* write out the surrogates */
target.put(UTF16.getLeadSurrogate(ch));
ch = UTF16.getTrailSurrogate(ch);
if (target.hasRemaining()) {
target.put((char)ch);
}
else {
/* Put in overflow buffer (not handled here) */
charErrorBufferArray[0] = (char) ch;
charErrorBufferLength = 1;
cr = CoderResult.OVERFLOW;
break;
}
}
}
else {
toULength = (byte)i;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
/* End of target buffer */
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
}
@ -193,37 +190,91 @@ class CharsetUTF32LE extends CharsetICU {
temp[3] = 0;
int sourceArrayIndex = source.position();
try{
boolean doloop = true;
if (fromUChar32 != 0) {
ch = fromUChar32;
fromUChar32 = 0;
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
boolean doloop = true;
if (fromUChar32 != 0) {
ch = fromUChar32;
fromUChar32 = 0;
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* ran out of source */
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
cr = CoderResult.malformedForLength(sourceArrayIndex);
doloop = false;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
doloop = false;
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[2] = (byte) (ch >>> 16 & 0x1F);
temp[1] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
temp[0] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
if (target.hasRemaining()) {
target.put(temp[indexToWrite]);
}
else {
errorBuffer[errorBufferLength++] = temp[indexToWrite];
cr = CoderResult.OVERFLOW;
}
}
}
if(doloop) {
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
ch = source.get(sourceArrayIndex++);
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
else {
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[2] = (byte) (ch >>> 16 & 0x1F);
temp[1] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
@ -239,71 +290,12 @@ class CharsetUTF32LE extends CharsetICU {
}
}
}
if(doloop) {
while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
ch = source.get(sourceArrayIndex++);
if (UTF16.isSurrogate((char)ch)) {
if (UTF16.isLeadSurrogate((char)ch)) {
//lowsurogate:
if (sourceArrayIndex < source.limit()) {
ch2 = source.get(sourceArrayIndex);
if (UTF16.isTrailSurrogate((char)ch2)) {
ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
sourceArrayIndex++;
}
else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
fromUChar32 = ch;
cr = CoderResult.OVERFLOW;
break;
}
}
else {
/* ran out of source */
fromUChar32 = ch;
if (flush) {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
break;
}
}
else {
fromUChar32 = ch;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
temp[2] = (byte) (ch >>> 16 & 0x1F);
temp[1] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
temp[0] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
if (target.hasRemaining()) {
target.put(temp[indexToWrite]);
}
else {
errorBuffer[errorBufferLength++] = temp[indexToWrite];
cr = CoderResult.OVERFLOW;
}
}
}
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
}
if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
}

View File

@ -77,12 +77,18 @@ public class TestCharset extends TestFmwk {
}
public void TestUTF16Converter(){
CharsetProvider icu = new CharsetProviderICU();
Charset icuChar = icu.charsetForName("UTF-16");
CharsetEncoder encoder = icuChar.newEncoder();
CharsetDecoder decoder = icuChar.newDecoder();
Charset cs1 = icu.charsetForName("UTF-16");
CharsetEncoder e1 = cs1.newEncoder();
CharsetDecoder d1 = cs1.newDecoder();
Charset cs2 = icu.charsetForName("UTF-16LE");
CharsetEncoder e2 = cs2.newEncoder();
CharsetDecoder d2 = cs2.newDecoder();
for(int i=0x0000; i<0x10FFFF; i+=0xFF){
CharBuffer us = CharBuffer.allocate(0xFF*2);
ByteBuffer bs = ByteBuffer.allocate(0xFF*8);
ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8);
ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8);
for(int j=0;j<0xFF; j++){
int c = i+j;
@ -103,36 +109,136 @@ public class TestCharset extends TestFmwk {
}
us.put(lead);
us.put(trail);
bs.put((byte)(lead>>8));
bs.put((byte)(lead&0xFF));
bs.put((byte)(trail>>8));
bs.put((byte)(trail&0xFF));
bs1.put((byte)(lead>>8));
bs1.put((byte)(lead&0xFF));
bs1.put((byte)(trail>>8));
bs1.put((byte)(trail&0xFF));
bs2.put((byte)(lead&0xFF));
bs2.put((byte)(lead>>8));
bs2.put((byte)(trail&0xFF));
bs2.put((byte)(trail>>8));
}else{
if(c<0xFF){
bs.put((byte)(c>>8));
bs.put((byte)(c&0xFF));
bs1.put((byte)0x00);
bs1.put((byte)(c));
bs2.put((byte)(c));
bs2.put((byte)0x00);
}else{
bs.put((byte)(c>>8));
bs.put((byte)(c&0xFF));
bs1.put((byte)(c>>8));
bs1.put((byte)(c&0xFF));
bs2.put((byte)(c&0xFF));
bs2.put((byte)(c>>8));
}
us.put((char)c);
}
}
bs.limit(bs.position());
bs.position(0);
us.limit(us.position());
us.position(0);
if(us.length()==0){
continue;
}
smBufDecode(decoder, "UTF-16", bs, us);
ByteBuffer newBS = ByteBuffer.allocate(bs.capacity());
bs1.limit(bs1.position());
bs1.position(0);
ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
newBS.put((byte)0xFE);
newBS.put((byte)0xFF);
newBS.put(bs);
newBS.put(bs1);
bs1.position(0);
smBufDecode(d1, "UTF-16", bs1, us);
smBufEncode(e1, "UTF-16", us, newBS);
smBufEncode(encoder, "UTF-16", us, newBS);
bs2.limit(bs2.position());
bs2.position(0);
newBS.clear();
newBS.put((byte)0xFF);
newBS.put((byte)0xFE);
newBS.put(bs2);
bs2.position(0);
smBufDecode(d2, "UTF16-LE", bs2, us);
smBufEncode(e2, "UTF-16LE", us, newBS);
}
}
public void TestUTF32Converter(){
CharsetProvider icu = new CharsetProviderICU();
Charset cs1 = icu.charsetForName("UTF-32");
CharsetEncoder e1 = cs1.newEncoder();
CharsetDecoder d1 = cs1.newDecoder();
Charset cs2 = icu.charsetForName("UTF-32LE");
CharsetEncoder e2 = cs2.newEncoder();
CharsetDecoder d2 = cs2.newDecoder();
for(int i=0x1d827; i<0x10FFFF; i+=0xFF){
CharBuffer us = CharBuffer.allocate(0xFF*2);
ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8);
ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8);
for(int j=0;j<0xFF; j++){
int c = i+j;
if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){
continue;
}
if(c>0xFFFF){
char lead = UTF16.getLeadSurrogate(c);
char trail = UTF16.getTrailSurrogate(c);
us.put(lead);
us.put(trail);
}else{
us.put((char)c);
}
bs1.put((byte) (c >>> 24));
bs1.put((byte) (c >>> 16));
bs1.put((byte) (c >>> 8));
bs1.put((byte) (c & 0xFF));
bs2.put((byte) (c & 0xFF));
bs2.put((byte) (c >>> 8));
bs2.put((byte) (c >>> 16));
bs2.put((byte) (c >>> 24));
}
bs1.limit(bs1.position());
bs1.position(0);
bs2.limit(bs2.position());
bs2.position(0);
us.limit(us.position());
us.position(0);
if(us.length()==0){
continue;
}
ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
newBS.put((byte)0x00);
newBS.put((byte)0x00);
newBS.put((byte)0xFE);
newBS.put((byte)0xFF);
newBS.put(bs1);
bs1.position(0);
smBufDecode(d1, "UTF-32", bs1, us);
smBufEncode(e1, "UTF-32", us, newBS);
newBS.clear();
newBS.put((byte)0xFF);
newBS.put((byte)0xFE);
newBS.put((byte)0x00);
newBS.put((byte)0x00);
newBS.put(bs2);
bs2.position(0);
smBufDecode(d2, "UTF-32LE", bs2, us);
smBufEncode(e2, "UTF-32LE", us, newBS);
}
}
@ -441,7 +547,7 @@ public class TestCharset extends TestFmwk {
boolean result = true;
for (int i = 0; i < chars.length; i++) {
if (chars[i] != compareTo[i]) {
errln(
logln(
"Got: "
+ hex(chars[i])
+ " Expected: "
@ -480,7 +586,7 @@ public class TestCharset extends TestFmwk {
boolean result = true;
for (int i = 0; i < chars.length; i++) {
if (chars[i] != compareTo[i]) {
errln(
logln(
"Got: "
+ hex(chars[i])
+ " Expected: "
@ -779,11 +885,42 @@ public class TestCharset extends TestFmwk {
ByteBuffer mySource = source.duplicate();
CharBuffer myTarget = CharBuffer.allocate(target.capacity());
{
decoder.reset();
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());
CoderResult result = CoderResult.UNDERFLOW;
result = decoder.decode(mySource, myTarget, true);
if (result.isError()) {
errln("Test complete buffers while decoding failed. "+result.toString());
return;
}
if (result.isOverflow()) {
errln("Test complete buffers while decoding threw overflow exception");
return;
}
myTarget.limit(myTarget.position());
myTarget.position(0);
target.position(0);
if (result.isUnderflow()&&!equals(myTarget,target)) {
errln(
" Test complete buffers while decoding "
+ encoding
+ " TO Unicode--failed");
}
}
if(isQuick()){
return;
}
{
decoder.reset();
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());
myTarget.clear();
myTarget.position(0);
int inputLen = mySource.remaining();
CoderResult result = CoderResult.UNDERFLOW;
@ -819,6 +956,7 @@ public class TestCharset extends TestFmwk {
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());
myTarget.clear();
while (true) {
int pos = myTarget.position();
myTarget.limit(++pos);
@ -858,6 +996,34 @@ public class TestCharset extends TestFmwk {
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());
CoderResult result=null;
result = encoder.encode(mySource, myTarget, true);
if (result.isError()) {
errln("Test complete while encoding failed. "+result.toString());
}
if (result.isOverflow()) {
errln("Test complete while encoding threw overflow exception");
}
if (!equals(myTarget,target)) {
errln("Test complete buffers while encoding for "+ encoding+ " failed");
}else{
logln("Tests complete buffers for "+ encoding +" passed");
}
}
if(isQuick()){
return;
}
{
logln("Running tests on small input buffers for "+ encoding);
encoder.reset();
myTarget.clear();
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());
int inputLen = mySource.limit();
CoderResult result=null;
for(int i=1; i<=inputLen; i++) {
@ -869,19 +1035,17 @@ public class TestCharset extends TestFmwk {
if (result.isOverflow()) {
errln("Test small input buffers while encoding threw overflow exception");
}
}
if (!equals(myTarget,target)) {
errln("Test small input buffers "+ encoding+ " From Unicode failed");
}else{
logln("Tests on small input buffers for "+ encoding +" passed");
}
logln("Tests on small input buffers for "+ encoding +" passed");
}
{
logln("Running tests on small output buffers for "+ encoding);
encoder.reset();
myTarget.clear();
myTarget.limit(target.limit());
mySource.limit(source.limit());
mySource.position(source.position());