ICU-5784 API Added. Upon enabling the tests in TestConversion that required this API, a number of them failed. CharsetMBCS, CharsetEncoderICU and CharsetDecoderICU required changes to fix these issues. There are still a few tests that are failing in TestFromUnicode in TestConversion: 8, 57, 64, 65, 66.

X-SVN-Rev: 21977
This commit is contained in:
Andrew J Macheret 2007-07-13 23:21:08 +00:00
parent 85536a2691
commit 91b87f249d
5 changed files with 115 additions and 50 deletions

View File

@ -38,8 +38,11 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
char[] invalidCharBuffer = new char[128];
int invalidCharLength;
/* maximum number of indexed bytes */
private static final int EXT_MAX_BYTES = 0x1f;
/* store previous UChars/chars to continue partial matches */
byte[] preToUArray;
byte[] preToUArray = new byte[EXT_MAX_BYTES];
int preToUBegin;
int preToULength; /* negative: replay */
int preToUFirstLength; /* length of first character */
@ -49,29 +52,25 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP;
private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr) {
public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
if (cr.isUnmappable()) {
return onUnmappableInput.call(decoder, context,
source, target, offsets,
buffer, length, cr);
return onUnmappableInput.call(decoder, context, source, target, offsets, buffer,
length, cr);
} else if (cr.isMalformed()) {
return onMalformedInput.call(decoder, context,
source, target, offsets,
buffer, length, cr);
return onMalformedInput.call(decoder, context, source, target, offsets, buffer,
length, cr);
}
return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context,
source, target, offsets,
buffer, length, cr);
return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target,
offsets, buffer, length, cr);
}
};
/**
* Construct a CharsetDecorderICU based on the information provided from a
* CharsetICU object.
* @param cs The CharsetICU object containing information about how to
* charset to decode.
* Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
*
* @param cs
* The CharsetICU object containing information about how to charset to decode.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
@ -94,7 +93,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
/**
* Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken
*
* @param newAction
* action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
@ -104,7 +105,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
/**
* Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken
*
* @param newAction
* action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
@ -171,6 +174,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
*/
protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
if(!in.hasRemaining()){
//TODO: do we want to reset the decoder state?
//toULength = 0;
return CoderResult.UNDERFLOW;
}
in.position(in.position()+toUCountPending());
@ -280,8 +285,6 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
return toUnicodeWithCallback(source, target, offsets, flush);
}
/* maximum number of indexed bytes */
private static final int EXT_MAX_BYTES = 0x1f;
private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
int limit;
int delta, offset;

View File

@ -42,9 +42,12 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
boolean useSubChar1;
boolean useFallback;
/* maximum number of indexed UChars */
private static final int EXT_MAX_UCHARS = 19;
/* store previous UChars/chars to continue partial matches */
int preFromUFirstCP; /* >=0: partial match */
char[] preFromUArray;
char[] preFromUArray = new char[EXT_MAX_UCHARS];
int preFromUBegin;
int preFromULength; /* negative: replay */
@ -294,8 +297,6 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
return fromUnicodeWithCallback(source, target, offsets, flush);
}
/* maximum number of indexed UChars */
private static final int EXT_MAX_UCHARS = 19;
/**
* Implementation note for m:n conversions

View File

@ -116,11 +116,11 @@ class CharsetMBCS extends CharsetICU {
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath, ClassLoader loader) throws InvalidFormatException{
super(icuCanonicalName, javaCanonicalName, aliases);
// now try to load the data
LoadArguments args = new LoadArguments(1, icuCanonicalName);
LoadArguments args = new LoadArguments(1, icuCanonicalName, classPath, loader);
sharedData = loadConverter(args);
maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
@ -137,6 +137,15 @@ class CharsetMBCS extends CharsetICU {
initializeConverter(0);
}
/**
* Tags for pacifying the check tags tool
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);
}
class LoadArguments
{
int nestedLoads; /* count nested loadConverter() calls */
@ -144,11 +153,15 @@ class CharsetMBCS extends CharsetICU {
// long options;
// String pkg;
String name;
String classPath;
ClassLoader loader;
LoadArguments(int nestedLoads, String name)
LoadArguments(int nestedLoads, String name, String classPath, ClassLoader loader)
{
this.nestedLoads = nestedLoads;
this.name = name;
this.loader = loader;
this.classPath = classPath;
}
}
@ -158,7 +171,14 @@ class CharsetMBCS extends CharsetICU {
UConverterStaticData staticData = new UConverterStaticData();
UConverterDataReader reader = null;
try {
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" + args.name + "." + UConverterSharedData.DATA_TYPE);
String resourceName = args.classPath + "/" + args.name + "." + UConverterSharedData.DATA_TYPE;
InputStream i;
if (args.loader != null) {
i = ICUData.getRequiredStream(args.loader, resourceName);
} else {
i = ICUData.getRequiredStream(resourceName);
}
BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);
reader = new UConverterDataReader(b);
reader.readStaticData(staticData);
@ -267,7 +287,7 @@ class CharsetMBCS extends CharsetICU {
/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
//agljport:fix args.size=sizeof(UConverterLoadArgs);
LoadArguments args2 = new LoadArguments(2, baseName);
LoadArguments args2 = new LoadArguments(2, baseName, ICUResourceBundle.ICU_BUNDLE, null);
baseSharedData=loadConverter(args2);
if( baseSharedData.staticData.conversionType!=UConverterType.MBCS ||
@ -2019,6 +2039,7 @@ class CharsetMBCS extends CharsetICU {
try{
//TODO: remove this todo
if(preFromUFirstCP>=0) {
/*
* pass sourceIndex=-1 because we continue from an earlier buffer
@ -2091,7 +2112,9 @@ class CharsetMBCS extends CharsetICU {
boolean doloop = true;
boolean doread = true;
if (c != 0 && target.hasRemaining()) {
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength);
if(UTF16.isSurrogate((char)c) && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0 && UTF16.isLeadSurrogate((char)c)) {
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
nextSourceIndex, prevSourceIndex, prevLength);
doloop = getTrail(source, target, unicodeMask, x, flush, cr);
doread = x.doread;
c = x.c;
@ -2100,6 +2123,9 @@ class CharsetMBCS extends CharsetICU {
nextSourceIndex = x.nextSourceIndex;
prevSourceIndex = x.prevSourceIndex;
prevLength = x.prevLength;
} else {
doread = false;
}
}
if(doloop) {
@ -2154,6 +2180,8 @@ class CharsetMBCS extends CharsetICU {
break;
}
}
} else {
doread = true;
}
/* convert the Unicode code point in c into codepage bytes */
@ -2594,7 +2622,8 @@ class CharsetMBCS extends CharsetICU {
preFromULength = (byte) - preFromULength;
/* set the error code for unassigned */
cr = CoderResult.unmappableForLength(source.position());
//TODO: figure out what the unmappable length really should be
cr = CoderResult.unmappableForLength(1);
}
return cr;
}
@ -3516,15 +3545,18 @@ class CharsetMBCS extends CharsetICU {
if(UTF16.isTrailSurrogate(trail)) {
++x.sourceArrayIndex;
++x.nextSourceIndex;
/* convert this supplementary code point */
x.c = UCharacter.getCodePoint((char)x.c, trail);
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
fromUnicodeStatus = x.prevLength; /* save the old state */
/* callback(unassigned) */
x.doread = true;
return unassigned(source, target, null, x, flush, cr);
} else {
x.doread = false;
return true;
}
/* convert this supplementary code point */
/* exit this condition tree */
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
@ -3535,8 +3567,6 @@ class CharsetMBCS extends CharsetICU {
/* no more input */
return false;
}
x.doread = false;
return true;
}
// function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
@ -3590,14 +3620,17 @@ class CharsetMBCS extends CharsetICU {
if(UTF16.isTrailSurrogate(trail)) {
++x.sourceArrayIndex;
++x.nextSourceIndex;
/* convert this supplementary code point */
x.c = UCharacter.getCodePoint((char)x.c, trail);
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
/* callback(unassigned) */
x.doread = true;
return unassignedDouble(source, target, x, flush, cr);
} else {
x.doread = false;
return true;
}
/* convert this supplementary code point */
/* exit this condition tree */
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
@ -3608,8 +3641,6 @@ class CharsetMBCS extends CharsetICU {
/* no more input */
return false;
}
x.doread = false;
return true;
}
// function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets

View File

@ -16,6 +16,10 @@ import java.nio.charset.spi.CharsetProvider;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.MissingResourceException;
import com.ibm.icu.impl.InvalidFormatException;
import com.ibm.icu.util.UResourceBundle;
/**
@ -58,6 +62,32 @@ public final class CharsetProviderICU extends CharsetProvider{
}
return null;
}
public final Charset charsetForName(String charsetName, String classPath) {
try {
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, null);
return cs;
} catch (InvalidFormatException e) {
return null;
}
}
public Charset charsetForName(String charsetName, String classPath, ClassLoader testLoader) {
try {
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, testLoader);
return cs;
} catch (InvalidFormatException e) {
return null;
}
// UResourceBundle bundle = null;
// try {
// bundle = (UResourceBundle) UResourceBundle.getBundleInstance(classPath, charsetName,
// testLoader);
// } catch (MissingResourceException e) {
// }
}
/**
* Gets the canonical name of the converter as defined by Java
* @param enc converter name

View File

@ -79,9 +79,9 @@ public final class ICUData {
return getStream(loader,resourceName, false);
}
/*public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
return getStream(loader, resourceName, true);
}*/
}
/*
* Convenience override that calls getStream(ICUData.class, resourceName, false);