ICU-5398 fix null pointer exception

X-SVN-Rev: 20389
This commit is contained in:
Ram Viswanadha 2006-09-22 21:28:20 +00:00
parent a0bde232fe
commit fe14a5d2a7
2 changed files with 309 additions and 316 deletions

View File

@ -231,7 +231,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
length=charErrorBufferLength;
i=0;
do {
if(target.remaining()<0) {
if(target.remaining()<=0) {
/* the overflow buffer contains too much, keep the rest */
int j=0;

View File

@ -8,7 +8,6 @@
*/
package com.ibm.icu.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@ -48,200 +47,198 @@ class CharsetUTF16 extends CharsetICU {
char c=0, trail;
length = source.remaining();
sourceArrayIndex = source.position();
try{
/* complete a partial UChar or pair from the last call */
if(toUnicodeStatus!=0) {
/*
* special case: single byte from a previous buffer,
* where the byte turned out not to belong to a trail surrogate
* and the preceding, unmatched lead surrogate was put into toUBytes[]
* for error handling
*/
toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
toULength=1;
toUnicodeStatus=0;
}
if((count=toULength)!=0) {
byte[] pArray=toUBytesArray;
int pArrayIndex = toUBytesBegin;
do {
pArray[count++]=source.get(sourceArrayIndex++);
++sourceIndex;
--length;
if(count==2) {
c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(!UTF16.isSurrogate(c)) {
/* output the BMP code point */
target.put(c);
/* complete a partial UChar or pair from the last call */
if(toUnicodeStatus!=0) {
/*
* special case: single byte from a previous buffer,
* where the byte turned out not to belong to a trail surrogate
* and the preceding, unmatched lead surrogate was put into toUBytes[]
* for error handling
*/
toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
toULength=1;
toUnicodeStatus=0;
}
if((count=toULength)!=0) {
byte[] pArray=toUBytesArray;
int pArrayIndex = toUBytesBegin;
do {
pArray[count++]=source.get(sourceArrayIndex++);
++sourceIndex;
--length;
if(count==2) {
c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(!UTF16.isSurrogate(c)) {
/* output the BMP code point */
target.put(c);
if(offsets!=null) {
offsets.put(-1);
}
count=0;
c=0;
break;
} else if(UTF16.isLeadSurrogate(c)) {
/* continue collecting bytes for the trail surrogate */
c=0; /* avoid unnecessary surrogate handling below */
} else {
/* fall through to error handling for an unmatched trail surrogate */
break;
}
} else if(count==4) {
c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
trail=(char)(((pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(UTF16.isTrailSurrogate(trail)) {
/* output the surrogate pair */
target.put(c);
if(target.remaining()>=1) {
target.put(trail);
if(offsets!=null) {
offsets.put(-1);
offsets.put(-1);
}
count=0;
c=0;
break;
} else if(UTF16.isLeadSurrogate(c)) {
/* continue collecting bytes for the trail surrogate */
c=0; /* avoid unnecessary surrogate handling below */
} else {
/* fall through to error handling for an unmatched trail surrogate */
break;
}
} else if(count==4) {
c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
trail=(char)(((pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK));
if(UTF16.isTrailSurrogate(trail)) {
/* output the surrogate pair */
target.put(c);
if(target.remaining()>=1) {
target.put(trail);
if(offsets!=null) {
offsets.put(-1);
offsets.put(-1);
}
} else /* targetCapacity==1 */ {
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
return CoderResult.OVERFLOW;
}
count=0;
c=0;
break;
} else {
/* unmatched lead surrogate, handle here for consistent toUBytes[] */
/* back out reading the code unit after it */
if((source.position()-sourceArrayIndex)>=2) {
sourceArrayIndex-=2;
} else {
/*
* if the trail unit's first byte was in a previous buffer, then
* we need to put it into a special place because toUBytes[] will be
* used for the lead unit's bytes
*/
toUnicodeStatus=0x100|pArray[pArrayIndex+2];
--sourceArrayIndex;
}
toULength=2;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
}
} while(length>0);
toULength=(byte)count;
}
/* copy an even number of bytes for complete UChars */
count=2*target.remaining();
if(count>length) {
count=length&~1;
}
if(c==0 && count>0) {
length-=count;
count>>=1;
//targetCapacity-=count;
if(offsets==null) {
do {
c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
} else {
break;
}
} while(--count>0);
} else {
do {
c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
offsets.put(sourceIndex);
sourceIndex+=2;
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=4;
} else {
break;
}
} while(--count>0);
}
if(count==0) {
/* done with the loop for complete UChars */
c=0;
} else {
/* keep c for surrogate handling, trail will be set there */
length+=2*(count-1); /* one more byte pair was consumed than count decremented */
}
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
toUBytesArray[toUBytesBegin+0]=(byte)(c>>>8);
toUBytesArray[toUBytesBegin+1]=(byte)c;
toULength=2;
if(UTF16.isLeadSurrogate(c)) {
if(length>=2) {
if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
sourceArrayIndex+=2;
length-=2;
target.put(c);
if(offsets!=null) {
offsets.put(sourceIndex);
}
} else /* targetCapacity==1 */ {
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
toULength=0;
cr = CoderResult.OVERFLOW;
} else {
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
return CoderResult.OVERFLOW;
}
count=0;
c=0;
break;
} else {
/* see if the trail surrogate is in the next buffer */
/* unmatched lead surrogate, handle here for consistent toUBytes[] */
/* back out reading the code unit after it */
if((source.position()-sourceArrayIndex)>=2) {
sourceArrayIndex-=2;
} else {
/*
* if the trail unit's first byte was in a previous buffer, then
* we need to put it into a special place because toUBytes[] will be
* used for the lead unit's bytes
*/
toUnicodeStatus=0x100|pArray[pArrayIndex+2];
--sourceArrayIndex;
}
toULength=2;
cr = CoderResult.malformedForLength(sourceArrayIndex);
break;
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} while(length>0);
toULength=(byte)count;
}
/* copy an even number of bytes for complete UChars */
count=2*target.remaining();
if(count>length) {
count=length&~1;
}
if(c==0 && count>0) {
length-=count;
count>>=1;
//targetCapacity-=count;
if(offsets==null) {
do {
c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
} else {
break;
}
} while(--count>0);
} else {
do {
c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
sourceArrayIndex+=2;
if(!UTF16.isSurrogate(c)) {
target.put(c);
offsets.put(sourceIndex);
sourceIndex+=2;
} else if(UTF16.isLeadSurrogate(c) && count>=2 &&
UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
) {
sourceArrayIndex+=2;
--count;
target.put(c);
target.put(trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=4;
} else {
break;
}
} while(--count>0);
}
/* check for a remaining source byte */
if(!cr.isError()){
if(length>0) {
if(!target.hasRemaining()) {
if(count==0) {
/* done with the loop for complete UChars */
c=0;
} else {
/* keep c for surrogate handling, trail will be set there */
length+=2*(count-1); /* one more byte pair was consumed than count decremented */
}
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
toUBytesArray[toUBytesBegin+0]=(byte)(c>>>8);
toUBytesArray[toUBytesBegin+1]=(byte)c;
toULength=2;
if(UTF16.isLeadSurrogate(c)) {
if(length>=2) {
if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
sourceArrayIndex+=2;
length-=2;
target.put(c);
if(offsets!=null) {
offsets.put(sourceIndex);
}
charErrorBufferArray[charErrorBufferBegin+0]=trail;
charErrorBufferLength=1;
toULength=0;
cr = CoderResult.OVERFLOW;
} else {
/* it must be length==1 because otherwise the above would have copied more */
toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
/* unmatched lead surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} else {
/* see if the trail surrogate is in the next buffer */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
}
/* check for a remaining source byte */
if(!cr.isError()){
if(length>0) {
if(!target.hasRemaining()) {
cr = CoderResult.OVERFLOW;
} else {
/* it must be length==1 because otherwise the above would have copied more */
toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
}
}
source.position(sourceArrayIndex);
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
}
source.position(sourceArrayIndex);
return cr;
}
@ -285,154 +282,150 @@ class CharsetUTF16 extends CharsetICU {
char trail = 0;
int length = source.remaining();
int sourceArrayIndex = source.position();
try{
/* c!=0 indicates in several places outside the main loops that a surrogate was found */
if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
/* the last buffer ended with a lead surrogate, output the surrogate pair */
++sourceArrayIndex;
--length;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
if(offsets!=null && offsets.remaining()>=4) {
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
}
sourceIndex=1;
fromUChar32=c=0;
/* c!=0 indicates in several places outside the main loops that a surrogate was found */
if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
/* the last buffer ended with a lead surrogate, output the surrogate pair */
++sourceArrayIndex;
--length;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
if(offsets!=null && offsets.remaining()>=4) {
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
offsets.put(-1);
}
byte overflow[/*4*/] = new byte[4];
if(c==0) {
/* copy an even number of bytes for complete UChars */
int count=2*length;
int targetCapacity = target.remaining();
if(count>targetCapacity) {
count=targetCapacity&~1;
}
/* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
if(offsets==null) {
while(count>0) {
c= source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)(c>>>8));
target.put((byte)c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
} else {
break;
}
--count;
}
} else {
while(count>0) {
c=source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)(c>>>8));
target.put((byte)c);
offsets.put(sourceIndex);
offsets.put(sourceIndex++);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=2;
} else {
break;
}
--count;
}
}
sourceIndex=1;
fromUChar32=c=0;
}
byte overflow[/*4*/] = new byte[4];
if(count==0) {
/* done with the loop for complete UChars */
if(length>0 && targetCapacity>0) {
/*
* there is more input and some target capacity -
* it must be targetCapacity==1 because otherwise
* the above would have copied more;
* prepare for overflow output
*/
if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
overflow[0]=(byte)(c>>>8);
overflow[1]=(byte)c;
length=2; /* 2 bytes to output */
c=0;
/* } else { keep c for surrogate handling, length will be set there */
}
if(c==0) {
/* copy an even number of bytes for complete UChars */
int count=2*length;
int targetCapacity = target.remaining();
if(count>targetCapacity) {
count=targetCapacity&~1;
}
/* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
if(offsets==null) {
while(count>0) {
c= source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)(c>>>8));
target.put((byte)c);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
} else {
length=0;
c=0;
break;
}
} else {
/* keep c for surrogate handling, length will be set there */
targetCapacity+=2*count;
--count;
}
} else {
length=0; /* from here on, length counts the bytes in overflow[] */
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
length=0;
if(UTF16.isLeadSurrogate(c)) {
if(sourceArrayIndex<source.limit()) {
if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
++sourceArrayIndex;
overflow[0]=(byte)(c>>>8);
overflow[1]=(byte)c;
overflow[2]=(byte)(trail>>>8);
overflow[3]=(byte)trail;
length=4; /* 4 bytes to output */
c=0;
} else {
/* unmatched lead surrogate */
//pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
while(count>0) {
c=source.get(sourceArrayIndex++);
if(!UTF16.isSurrogate(c)) {
target.put((byte)(c>>>8));
target.put((byte)c);
offsets.put(sourceIndex);
offsets.put(sourceIndex++);
} else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
++sourceArrayIndex;
--count;
target.put((byte)(c>>>8));
target.put((byte)c);
target.put((byte)(trail>>>8));
target.put((byte)trail);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
offsets.put(sourceIndex);
sourceIndex+=2;
} else {
/* see if the trail surrogate is in the next buffer */
break;
}
--count;
}
}
if(count==0) {
/* done with the loop for complete UChars */
if(length>0 && targetCapacity>0) {
/*
* there is more input and some target capacity -
* it must be targetCapacity==1 because otherwise
* the above would have copied more;
* prepare for overflow output
*/
if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
overflow[0]=(byte)(c>>>8);
overflow[1]=(byte)c;
length=2; /* 2 bytes to output */
c=0;
/* } else { keep c for surrogate handling, length will be set there */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
length=0;
c=0;
}
fromUChar32=c;
} else {
/* keep c for surrogate handling, length will be set there */
targetCapacity+=2*count;
}
source.position(sourceArrayIndex);
if(length>0) {
/* output length bytes with overflow (length>targetCapacity>0) */
cr = fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
} else {
length=0; /* from here on, length counts the bytes in overflow[] */
}
if(c!=0) {
/*
* c is a surrogate, and
* - source or target too short
* - or the surrogate is unmatched
*/
length=0;
if(UTF16.isLeadSurrogate(c)) {
if(sourceArrayIndex<source.limit()) {
if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
/* output the surrogate pair, will overflow (see conditions comment above) */
++sourceArrayIndex;
overflow[0]=(byte)(c>>>8);
overflow[1]=(byte)c;
overflow[2]=(byte)(trail>>>8);
overflow[3]=(byte)trail;
length=4; /* 4 bytes to output */
c=0;
} else {
/* unmatched lead surrogate */
//pErrorCode[0]=ErrorCode.U_ILLEGAL_CHAR_FOUND;
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
} else {
/* see if the trail surrogate is in the next buffer */
}
} else {
/* unmatched trail surrogate */
cr = CoderResult.malformedForLength(sourceArrayIndex);
}
}catch(BufferOverflowException ex){
cr = CoderResult.OVERFLOW;
fromUChar32=c;
}
source.position(sourceArrayIndex);
if(length>0) {
/* output length bytes with overflow (length>targetCapacity>0) */
cr = fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
}
return cr;
}