ICU-5784 API Added. Upon enabling the tests in TestConversion that required this API, a number of them failed. CharsetMBCS, CharsetEncoderICU and CharsetDecoderICU required changes to fix these issues. There are still a few tests that are failing in TestFromUnicode in TestConversion: 8, 57, 64, 65, 66.

X-SVN-Rev: 21977
This commit is contained in:
Andrew J Macheret 2007-07-13 23:21:08 +00:00
parent 85536a2691
commit 91b87f249d
5 changed files with 115 additions and 50 deletions

View File

@ -38,8 +38,11 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
char[] invalidCharBuffer = new char[128]; char[] invalidCharBuffer = new char[128];
int invalidCharLength; int invalidCharLength;
/* maximum number of indexed bytes */
private static final int EXT_MAX_BYTES = 0x1f;
/* store previous UChars/chars to continue partial matches */ /* store previous UChars/chars to continue partial matches */
byte[] preToUArray; byte[] preToUArray = new byte[EXT_MAX_BYTES];
int preToUBegin; int preToUBegin;
int preToULength; /* negative: replay */ int preToULength; /* negative: replay */
int preToUFirstLength; /* length of first character */ int preToUFirstLength; /* length of first character */
@ -48,30 +51,26 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
Object toUContext = null; Object toUContext = null;
private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP; private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP;
private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP; private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
CharsetCallback.Decoder toCharErrorBehaviour= new CharsetCallback.Decoder(){ CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
public CoderResult call(CharsetDecoderICU decoder, Object context, public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
ByteBuffer source, CharBuffer target, IntBuffer offsets, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
char[] buffer, int length, CoderResult cr) { if (cr.isUnmappable()) {
if(cr.isUnmappable()){ return onUnmappableInput.call(decoder, context, source, target, offsets, buffer,
return onUnmappableInput.call(decoder, context, length, cr);
source, target, offsets, } else if (cr.isMalformed()) {
buffer, length, cr); return onMalformedInput.call(decoder, context, source, target, offsets, buffer,
}else if(cr.isMalformed()){ length, cr);
return onMalformedInput.call(decoder, context, }
source, target, offsets, return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target,
buffer, length, cr); offsets, buffer, length, cr);
} }
return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, };
source, target, offsets,
buffer, length, cr);
}
};
/** /**
* Construct a CharsetDecorderICU based on the information provided from a * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
* CharsetICU object. *
* @param cs The CharsetICU object containing information about how to * @param cs
* charset to decode. * The CharsetICU object containing information about how to charset to decode.
* @draft ICU 3.6 * @draft ICU 3.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
@ -94,7 +93,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
/** /**
* Sets the action to be taken if an illegal sequence is encountered * Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken *
* @param newAction
* action to be taken
* @exception IllegalArgumentException * @exception IllegalArgumentException
* @stable ICU 3.6 * @stable ICU 3.6
*/ */
@ -104,7 +105,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
/** /**
* Sets the action to be taken if an illegal sequence is encountered * Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken *
* @param newAction
* action to be taken
* @exception IllegalArgumentException * @exception IllegalArgumentException
* @stable ICU 3.6 * @stable ICU 3.6
*/ */
@ -171,6 +174,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
*/ */
protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){ protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
if(!in.hasRemaining()){ if(!in.hasRemaining()){
//TODO: do we want to reset the decoder state?
//toULength = 0;
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
} }
in.position(in.position()+toUCountPending()); in.position(in.position()+toUCountPending());
@ -280,8 +285,6 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
return toUnicodeWithCallback(source, target, offsets, flush); return toUnicodeWithCallback(source, target, offsets, flush);
} }
/* maximum number of indexed bytes */
private static final int EXT_MAX_BYTES = 0x1f;
private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) { private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
int limit; int limit;
int delta, offset; int delta, offset;

View File

@ -42,9 +42,12 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
boolean useSubChar1; boolean useSubChar1;
boolean useFallback; boolean useFallback;
/* maximum number of indexed UChars */
private static final int EXT_MAX_UCHARS = 19;
/* store previous UChars/chars to continue partial matches */ /* store previous UChars/chars to continue partial matches */
int preFromUFirstCP; /* >=0: partial match */ int preFromUFirstCP; /* >=0: partial match */
char[] preFromUArray; char[] preFromUArray = new char[EXT_MAX_UCHARS];
int preFromUBegin; int preFromUBegin;
int preFromULength; /* negative: replay */ int preFromULength; /* negative: replay */
@ -294,8 +297,6 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
return fromUnicodeWithCallback(source, target, offsets, flush); return fromUnicodeWithCallback(source, target, offsets, flush);
} }
/* maximum number of indexed UChars */
private static final int EXT_MAX_UCHARS = 19;
/** /**
* Implementation note for m:n conversions * Implementation note for m:n conversions

View File

@ -116,11 +116,11 @@ class CharsetMBCS extends CharsetICU {
* @draft ICU 3.6 * @draft ICU 3.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{ public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath, ClassLoader loader) throws InvalidFormatException{
super(icuCanonicalName, javaCanonicalName, aliases); super(icuCanonicalName, javaCanonicalName, aliases);
// now try to load the data // now try to load the data
LoadArguments args = new LoadArguments(1, icuCanonicalName); LoadArguments args = new LoadArguments(1, icuCanonicalName, classPath, loader);
sharedData = loadConverter(args); sharedData = loadConverter(args);
maxBytesPerChar = sharedData.staticData.maxBytesPerChar; maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
@ -136,6 +136,15 @@ class CharsetMBCS extends CharsetICU {
// Todo: pass options // Todo: pass options
initializeConverter(0); initializeConverter(0);
} }
/**
* Tags for pacifying the check tags tool
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);
}
class LoadArguments class LoadArguments
{ {
@ -144,11 +153,15 @@ class CharsetMBCS extends CharsetICU {
// long options; // long options;
// String pkg; // String pkg;
String name; String name;
String classPath;
ClassLoader loader;
LoadArguments(int nestedLoads, String name) LoadArguments(int nestedLoads, String name, String classPath, ClassLoader loader)
{ {
this.nestedLoads = nestedLoads; this.nestedLoads = nestedLoads;
this.name = name; this.name = name;
this.loader = loader;
this.classPath = classPath;
} }
} }
@ -158,7 +171,14 @@ class CharsetMBCS extends CharsetICU {
UConverterStaticData staticData = new UConverterStaticData(); UConverterStaticData staticData = new UConverterStaticData();
UConverterDataReader reader = null; UConverterDataReader reader = null;
try { try {
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" + args.name + "." + UConverterSharedData.DATA_TYPE); String resourceName = args.classPath + "/" + args.name + "." + UConverterSharedData.DATA_TYPE;
InputStream i;
if (args.loader != null) {
i = ICUData.getRequiredStream(args.loader, resourceName);
} else {
i = ICUData.getRequiredStream(resourceName);
}
BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE); BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);
reader = new UConverterDataReader(b); reader = new UConverterDataReader(b);
reader.readStaticData(staticData); reader.readStaticData(staticData);
@ -267,7 +287,7 @@ class CharsetMBCS extends CharsetICU {
/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */ /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
//agljport:fix args.size=sizeof(UConverterLoadArgs); //agljport:fix args.size=sizeof(UConverterLoadArgs);
LoadArguments args2 = new LoadArguments(2, baseName); LoadArguments args2 = new LoadArguments(2, baseName, ICUResourceBundle.ICU_BUNDLE, null);
baseSharedData=loadConverter(args2); baseSharedData=loadConverter(args2);
if( baseSharedData.staticData.conversionType!=UConverterType.MBCS || if( baseSharedData.staticData.conversionType!=UConverterType.MBCS ||
@ -2019,6 +2039,7 @@ class CharsetMBCS extends CharsetICU {
try{ try{
//TODO: remove this todo
if(preFromUFirstCP>=0) { if(preFromUFirstCP>=0) {
/* /*
* pass sourceIndex=-1 because we continue from an earlier buffer * pass sourceIndex=-1 because we continue from an earlier buffer
@ -2090,8 +2111,10 @@ class CharsetMBCS extends CharsetICU {
*/ */
boolean doloop = true; boolean doloop = true;
boolean doread = true; boolean doread = true;
if(c!=0 && target.hasRemaining()) { if (c != 0 && target.hasRemaining()) {
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength); if(UTF16.isSurrogate((char)c) && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0 && UTF16.isLeadSurrogate((char)c)) {
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
nextSourceIndex, prevSourceIndex, prevLength);
doloop = getTrail(source, target, unicodeMask, x, flush, cr); doloop = getTrail(source, target, unicodeMask, x, flush, cr);
doread = x.doread; doread = x.doread;
c = x.c; c = x.c;
@ -2100,6 +2123,9 @@ class CharsetMBCS extends CharsetICU {
nextSourceIndex = x.nextSourceIndex; nextSourceIndex = x.nextSourceIndex;
prevSourceIndex = x.prevSourceIndex; prevSourceIndex = x.prevSourceIndex;
prevLength = x.prevLength; prevLength = x.prevLength;
} else {
doread = false;
}
} }
if(doloop) { if(doloop) {
@ -2154,6 +2180,8 @@ class CharsetMBCS extends CharsetICU {
break; break;
} }
} }
} else {
doread = true;
} }
/* convert the Unicode code point in c into codepage bytes */ /* convert the Unicode code point in c into codepage bytes */
@ -2594,7 +2622,8 @@ class CharsetMBCS extends CharsetICU {
preFromULength = (byte) - preFromULength; preFromULength = (byte) - preFromULength;
/* set the error code for unassigned */ /* set the error code for unassigned */
cr = CoderResult.unmappableForLength(source.position()); //TODO: figure out what the unmappable length really should be
cr = CoderResult.unmappableForLength(1);
} }
return cr; return cr;
} }
@ -3516,15 +3545,18 @@ class CharsetMBCS extends CharsetICU {
if(UTF16.isTrailSurrogate(trail)) { if(UTF16.isTrailSurrogate(trail)) {
++x.sourceArrayIndex; ++x.sourceArrayIndex;
++x.nextSourceIndex; ++x.nextSourceIndex;
/* convert this supplementary code point */
x.c = UCharacter.getCodePoint((char)x.c, trail); x.c = UCharacter.getCodePoint((char)x.c, trail);
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
fromUnicodeStatus = x.prevLength; /* save the old state */ fromUnicodeStatus = x.prevLength; /* save the old state */
/* callback(unassigned) */ /* callback(unassigned) */
x.doread = true;
return unassigned(source, target, null, x, flush, cr); return unassigned(source, target, null, x, flush, cr);
} else {
x.doread = false;
return true;
} }
/* convert this supplementary code point */
/* exit this condition tree */
} else { } else {
/* this is an unmatched lead code unit (1st surrogate) */ /* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */ /* callback(illegal) */
@ -3535,8 +3567,6 @@ class CharsetMBCS extends CharsetICU {
/* no more input */ /* no more input */
return false; return false;
} }
x.doread = false;
return true;
} }
// function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
@ -3590,14 +3620,17 @@ class CharsetMBCS extends CharsetICU {
if(UTF16.isTrailSurrogate(trail)) { if(UTF16.isTrailSurrogate(trail)) {
++x.sourceArrayIndex; ++x.sourceArrayIndex;
++x.nextSourceIndex; ++x.nextSourceIndex;
/* convert this supplementary code point */
x.c = UCharacter.getCodePoint((char)x.c, trail); x.c = UCharacter.getCodePoint((char)x.c, trail);
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
/* callback(unassigned) */ /* callback(unassigned) */
x.doread = true;
return unassignedDouble(source, target, x, flush, cr); return unassignedDouble(source, target, x, flush, cr);
} else {
x.doread = false;
return true;
} }
/* convert this supplementary code point */
/* exit this condition tree */
} else { } else {
/* this is an unmatched lead code unit (1st surrogate) */ /* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */ /* callback(illegal) */
@ -3608,8 +3641,6 @@ class CharsetMBCS extends CharsetICU {
/* no more input */ /* no more input */
return false; return false;
} }
x.doread = false;
return true;
} }
// function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets

View File

@ -16,6 +16,10 @@ import java.nio.charset.spi.CharsetProvider;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.MissingResourceException;
import com.ibm.icu.impl.InvalidFormatException;
import com.ibm.icu.util.UResourceBundle;
/** /**
@ -58,6 +62,32 @@ public final class CharsetProviderICU extends CharsetProvider{
} }
return null; return null;
} }
public final Charset charsetForName(String charsetName, String classPath) {
try {
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, null);
return cs;
} catch (InvalidFormatException e) {
return null;
}
}
public Charset charsetForName(String charsetName, String classPath, ClassLoader testLoader) {
try {
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, testLoader);
return cs;
} catch (InvalidFormatException e) {
return null;
}
// UResourceBundle bundle = null;
// try {
// bundle = (UResourceBundle) UResourceBundle.getBundleInstance(classPath, charsetName,
// testLoader);
// } catch (MissingResourceException e) {
// }
}
/** /**
* Gets the canonical name of the converter as defined by Java * Gets the canonical name of the converter as defined by Java
* @param enc converter name * @param enc converter name

View File

@ -79,9 +79,9 @@ public final class ICUData {
return getStream(loader,resourceName, false); return getStream(loader,resourceName, false);
} }
/*public static InputStream getRequiredStream(ClassLoader loader, String resourceName){ public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
return getStream(loader,resourceName, true); return getStream(loader, resourceName, true);
}*/ }
/* /*
* Convenience override that calls getStream(ICUData.class, resourceName, false); * Convenience override that calls getStream(ICUData.class, resourceName, false);
@ -103,7 +103,7 @@ public final class ICUData {
public static InputStream getStream(Class root, String resourceName) { public static InputStream getStream(Class root, String resourceName) {
return getStream(root, resourceName, false); return getStream(root, resourceName, false);
} }
/* /*
* Convenience method that calls getStream(root, resourceName, true). * Convenience method that calls getStream(root, resourceName, true).
*/ */