ICU-5784 API Added. Upon enabling the tests in TestConversion that required this API, a number of them failed. CharsetMBCS, CharsetEncoderICU and CharsetDecoderICU required changes to fix these issues. There are still a few tests that are failing in TestFromUnicode in TestConversion: 8, 57, 64, 65, 66.
X-SVN-Rev: 21977
This commit is contained in:
parent
85536a2691
commit
91b87f249d
@ -38,8 +38,11 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
char[] invalidCharBuffer = new char[128];
|
||||
int invalidCharLength;
|
||||
|
||||
/* maximum number of indexed bytes */
|
||||
private static final int EXT_MAX_BYTES = 0x1f;
|
||||
|
||||
/* store previous UChars/chars to continue partial matches */
|
||||
byte[] preToUArray;
|
||||
byte[] preToUArray = new byte[EXT_MAX_BYTES];
|
||||
int preToUBegin;
|
||||
int preToULength; /* negative: replay */
|
||||
int preToUFirstLength; /* length of first character */
|
||||
@ -48,30 +51,26 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
Object toUContext = null;
|
||||
private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP;
|
||||
private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
|
||||
CharsetCallback.Decoder toCharErrorBehaviour= new CharsetCallback.Decoder(){
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context,
|
||||
ByteBuffer source, CharBuffer target, IntBuffer offsets,
|
||||
char[] buffer, int length, CoderResult cr) {
|
||||
if(cr.isUnmappable()){
|
||||
return onUnmappableInput.call(decoder, context,
|
||||
source, target, offsets,
|
||||
buffer, length, cr);
|
||||
}else if(cr.isMalformed()){
|
||||
return onMalformedInput.call(decoder, context,
|
||||
source, target, offsets,
|
||||
buffer, length, cr);
|
||||
}
|
||||
return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context,
|
||||
source, target, offsets,
|
||||
buffer, length, cr);
|
||||
}
|
||||
};
|
||||
CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
|
||||
CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
|
||||
if (cr.isUnmappable()) {
|
||||
return onUnmappableInput.call(decoder, context, source, target, offsets, buffer,
|
||||
length, cr);
|
||||
} else if (cr.isMalformed()) {
|
||||
return onMalformedInput.call(decoder, context, source, target, offsets, buffer,
|
||||
length, cr);
|
||||
}
|
||||
return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target,
|
||||
offsets, buffer, length, cr);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Construct a CharsetDecorderICU based on the information provided from a
|
||||
* CharsetICU object.
|
||||
* @param cs The CharsetICU object containing information about how to
|
||||
* charset to decode.
|
||||
* Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
|
||||
*
|
||||
* @param cs
|
||||
* The CharsetICU object containing information about how to charset to decode.
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@ -94,7 +93,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
|
||||
/**
|
||||
* Sets the action to be taken if an illegal sequence is encountered
|
||||
* @param newAction action to be taken
|
||||
*
|
||||
* @param newAction
|
||||
* action to be taken
|
||||
* @exception IllegalArgumentException
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
@ -104,7 +105,9 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
|
||||
/**
|
||||
* Sets the action to be taken if an illegal sequence is encountered
|
||||
* @param newAction action to be taken
|
||||
*
|
||||
* @param newAction
|
||||
* action to be taken
|
||||
* @exception IllegalArgumentException
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
@ -171,6 +174,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
*/
|
||||
protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
|
||||
if(!in.hasRemaining()){
|
||||
//TODO: do we want to reset the decoder state?
|
||||
//toULength = 0;
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
in.position(in.position()+toUCountPending());
|
||||
@ -280,8 +285,6 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
return toUnicodeWithCallback(source, target, offsets, flush);
|
||||
}
|
||||
|
||||
/* maximum number of indexed bytes */
|
||||
private static final int EXT_MAX_BYTES = 0x1f;
|
||||
private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
|
||||
int limit;
|
||||
int delta, offset;
|
||||
|
@ -42,9 +42,12 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
|
||||
boolean useSubChar1;
|
||||
boolean useFallback;
|
||||
|
||||
/* maximum number of indexed UChars */
|
||||
private static final int EXT_MAX_UCHARS = 19;
|
||||
|
||||
/* store previous UChars/chars to continue partial matches */
|
||||
int preFromUFirstCP; /* >=0: partial match */
|
||||
char[] preFromUArray;
|
||||
char[] preFromUArray = new char[EXT_MAX_UCHARS];
|
||||
int preFromUBegin;
|
||||
int preFromULength; /* negative: replay */
|
||||
|
||||
@ -294,8 +297,6 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
|
||||
return fromUnicodeWithCallback(source, target, offsets, flush);
|
||||
|
||||
}
|
||||
/* maximum number of indexed UChars */
|
||||
private static final int EXT_MAX_UCHARS = 19;
|
||||
|
||||
/**
|
||||
* Implementation note for m:n conversions
|
||||
|
@ -116,11 +116,11 @@ class CharsetMBCS extends CharsetICU {
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
|
||||
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath, ClassLoader loader) throws InvalidFormatException{
|
||||
super(icuCanonicalName, javaCanonicalName, aliases);
|
||||
|
||||
// now try to load the data
|
||||
LoadArguments args = new LoadArguments(1, icuCanonicalName);
|
||||
LoadArguments args = new LoadArguments(1, icuCanonicalName, classPath, loader);
|
||||
sharedData = loadConverter(args);
|
||||
|
||||
maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
|
||||
@ -136,6 +136,15 @@ class CharsetMBCS extends CharsetICU {
|
||||
// Todo: pass options
|
||||
initializeConverter(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tags for pacifying the check tags tool
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
|
||||
this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);
|
||||
}
|
||||
|
||||
class LoadArguments
|
||||
{
|
||||
@ -144,11 +153,15 @@ class CharsetMBCS extends CharsetICU {
|
||||
// long options;
|
||||
// String pkg;
|
||||
String name;
|
||||
String classPath;
|
||||
ClassLoader loader;
|
||||
|
||||
LoadArguments(int nestedLoads, String name)
|
||||
LoadArguments(int nestedLoads, String name, String classPath, ClassLoader loader)
|
||||
{
|
||||
this.nestedLoads = nestedLoads;
|
||||
this.name = name;
|
||||
this.loader = loader;
|
||||
this.classPath = classPath;
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,7 +171,14 @@ class CharsetMBCS extends CharsetICU {
|
||||
UConverterStaticData staticData = new UConverterStaticData();
|
||||
UConverterDataReader reader = null;
|
||||
try {
|
||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" + args.name + "." + UConverterSharedData.DATA_TYPE);
|
||||
String resourceName = args.classPath + "/" + args.name + "." + UConverterSharedData.DATA_TYPE;
|
||||
InputStream i;
|
||||
|
||||
if (args.loader != null) {
|
||||
i = ICUData.getRequiredStream(args.loader, resourceName);
|
||||
} else {
|
||||
i = ICUData.getRequiredStream(resourceName);
|
||||
}
|
||||
BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);
|
||||
reader = new UConverterDataReader(b);
|
||||
reader.readStaticData(staticData);
|
||||
@ -267,7 +287,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
|
||||
/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
|
||||
//agljport:fix args.size=sizeof(UConverterLoadArgs);
|
||||
LoadArguments args2 = new LoadArguments(2, baseName);
|
||||
LoadArguments args2 = new LoadArguments(2, baseName, ICUResourceBundle.ICU_BUNDLE, null);
|
||||
baseSharedData=loadConverter(args2);
|
||||
|
||||
if( baseSharedData.staticData.conversionType!=UConverterType.MBCS ||
|
||||
@ -2019,6 +2039,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
|
||||
try{
|
||||
|
||||
//TODO: remove this todo
|
||||
if(preFromUFirstCP>=0) {
|
||||
/*
|
||||
* pass sourceIndex=-1 because we continue from an earlier buffer
|
||||
@ -2090,8 +2111,10 @@ class CharsetMBCS extends CharsetICU {
|
||||
*/
|
||||
boolean doloop = true;
|
||||
boolean doread = true;
|
||||
if(c!=0 && target.hasRemaining()) {
|
||||
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength);
|
||||
if (c != 0 && target.hasRemaining()) {
|
||||
if(UTF16.isSurrogate((char)c) && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0 && UTF16.isLeadSurrogate((char)c)) {
|
||||
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
|
||||
nextSourceIndex, prevSourceIndex, prevLength);
|
||||
doloop = getTrail(source, target, unicodeMask, x, flush, cr);
|
||||
doread = x.doread;
|
||||
c = x.c;
|
||||
@ -2100,6 +2123,9 @@ class CharsetMBCS extends CharsetICU {
|
||||
nextSourceIndex = x.nextSourceIndex;
|
||||
prevSourceIndex = x.prevSourceIndex;
|
||||
prevLength = x.prevLength;
|
||||
} else {
|
||||
doread = false;
|
||||
}
|
||||
}
|
||||
|
||||
if(doloop) {
|
||||
@ -2154,6 +2180,8 @@ class CharsetMBCS extends CharsetICU {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
doread = true;
|
||||
}
|
||||
|
||||
/* convert the Unicode code point in c into codepage bytes */
|
||||
@ -2594,7 +2622,8 @@ class CharsetMBCS extends CharsetICU {
|
||||
preFromULength = (byte) - preFromULength;
|
||||
|
||||
/* set the error code for unassigned */
|
||||
cr = CoderResult.unmappableForLength(source.position());
|
||||
//TODO: figure out what the unmappable length really should be
|
||||
cr = CoderResult.unmappableForLength(1);
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
@ -3516,15 +3545,18 @@ class CharsetMBCS extends CharsetICU {
|
||||
if(UTF16.isTrailSurrogate(trail)) {
|
||||
++x.sourceArrayIndex;
|
||||
++x.nextSourceIndex;
|
||||
/* convert this supplementary code point */
|
||||
x.c = UCharacter.getCodePoint((char)x.c, trail);
|
||||
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
fromUnicodeStatus = x.prevLength; /* save the old state */
|
||||
/* callback(unassigned) */
|
||||
x.doread = true;
|
||||
return unassigned(source, target, null, x, flush, cr);
|
||||
} else {
|
||||
x.doread = false;
|
||||
return true;
|
||||
}
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
@ -3535,8 +3567,6 @@ class CharsetMBCS extends CharsetICU {
|
||||
/* no more input */
|
||||
return false;
|
||||
}
|
||||
x.doread = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
|
||||
@ -3590,14 +3620,17 @@ class CharsetMBCS extends CharsetICU {
|
||||
if(UTF16.isTrailSurrogate(trail)) {
|
||||
++x.sourceArrayIndex;
|
||||
++x.nextSourceIndex;
|
||||
/* convert this supplementary code point */
|
||||
x.c = UCharacter.getCodePoint((char)x.c, trail);
|
||||
if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
x.doread = true;
|
||||
return unassignedDouble(source, target, x, flush, cr);
|
||||
} else {
|
||||
x.doread = false;
|
||||
return true;
|
||||
}
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
@ -3608,8 +3641,6 @@ class CharsetMBCS extends CharsetICU {
|
||||
/* no more input */
|
||||
return false;
|
||||
}
|
||||
x.doread = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
|
||||
|
@ -16,6 +16,10 @@ import java.nio.charset.spi.CharsetProvider;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
import com.ibm.icu.impl.InvalidFormatException;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
|
||||
/**
|
||||
@ -58,6 +62,32 @@ public final class CharsetProviderICU extends CharsetProvider{
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public final Charset charsetForName(String charsetName, String classPath) {
|
||||
try {
|
||||
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, null);
|
||||
return cs;
|
||||
} catch (InvalidFormatException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Charset charsetForName(String charsetName, String classPath, ClassLoader testLoader) {
|
||||
try {
|
||||
CharsetMBCS cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, testLoader);
|
||||
return cs;
|
||||
} catch (InvalidFormatException e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// UResourceBundle bundle = null;
|
||||
// try {
|
||||
// bundle = (UResourceBundle) UResourceBundle.getBundleInstance(classPath, charsetName,
|
||||
// testLoader);
|
||||
// } catch (MissingResourceException e) {
|
||||
// }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the canonical name of the converter as defined by Java
|
||||
* @param enc converter name
|
||||
|
@ -79,9 +79,9 @@ public final class ICUData {
|
||||
return getStream(loader,resourceName, false);
|
||||
}
|
||||
|
||||
/*public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
|
||||
return getStream(loader,resourceName, true);
|
||||
}*/
|
||||
public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
|
||||
return getStream(loader, resourceName, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convenience override that calls getStream(ICUData.class, resourceName, false);
|
||||
@ -103,7 +103,7 @@ public final class ICUData {
|
||||
public static InputStream getStream(Class root, String resourceName) {
|
||||
return getStream(root, resourceName, false);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Convenience method that calls getStream(root, resourceName, true).
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user