ICU-5407 remove UTF32.java and make the test depend on icu4j charsets
X-SVN-Rev: 20542
This commit is contained in:
parent
78af2c2195
commit
444fd811e9
@ -61,7 +61,9 @@ public abstract class CharsetICU extends Charset{
|
||||
short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
|
||||
byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
|
||||
byte reserved[/*19*/]; /* +81: 19 to round out the structure */
|
||||
|
||||
|
||||
boolean writeBOM = false; /* only used by UTF-16, UTF-32 */
|
||||
|
||||
/**
|
||||
*
|
||||
* @param icuCanonicalName
|
||||
@ -101,8 +103,8 @@ public abstract class CharsetICU extends Charset{
|
||||
algorithmicCharsets.put("HZ", "com.ibm.icu.charset.CharsetHZ" );
|
||||
algorithmicCharsets.put("imapmailboxname", "com.ibm.icu.charset.CharsetIMAP" );
|
||||
algorithmicCharsets.put("ISCII", "com.ibm.icu.charset.CharsetISCII" );
|
||||
algorithmicCharsets.put("iso2022", "com.ibm.icu.charset.CharsetISO2022" );*/
|
||||
/*algorithmicCharsets.put("lmbcs1", "com.ibm.icu.charset.CharsetLMBCS1" );
|
||||
algorithmicCharsets.put("iso2022", "com.ibm.icu.charset.CharsetISO2022" );
|
||||
algorithmicCharsets.put("lmbcs1", "com.ibm.icu.charset.CharsetLMBCS1" );
|
||||
algorithmicCharsets.put("lmbcs11", "com.ibm.icu.charset.CharsetLMBCS11" );
|
||||
algorithmicCharsets.put("lmbcs16", "com.ibm.icu.charset.CharsetLMBCS16" );
|
||||
algorithmicCharsets.put("lmbcs17", "com.ibm.icu.charset.CharsetLMBCS17" );
|
||||
@ -219,11 +221,13 @@ public abstract class CharsetICU extends Charset{
|
||||
*/
|
||||
public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
|
||||
CharsetProviderICU icuProvider = new CharsetProviderICU();
|
||||
Charset cs = icuProvider.charsetForName(charsetName);
|
||||
CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
|
||||
if (cs != null) {
|
||||
cs.writeBOM = true;
|
||||
return cs;
|
||||
}
|
||||
return Charset.forName(charsetName);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -265,7 +265,7 @@ class CharsetUTF16 extends CharsetICU {
|
||||
}
|
||||
char c;
|
||||
/* write the BOM if necessary */
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
|
||||
byte bom[]={ (byte)0xfe, (byte)0xff };
|
||||
cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
|
||||
if(cr.isError()){
|
||||
|
@ -269,7 +269,7 @@ class CharsetUTF16LE extends CharsetICU {
|
||||
}
|
||||
char c;
|
||||
/* write the BOM if necessary */
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
|
||||
byte bom[]={ (byte)0xff, (byte)0xfe };
|
||||
cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
|
||||
if(cr.isError()){
|
||||
|
@ -175,7 +175,7 @@ class CharsetUTF32 extends CharsetICU {
|
||||
}
|
||||
|
||||
/* write the BOM if necessary */
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
|
||||
byte[] bom={ 0, 0, (byte)0xfe, (byte)0xff };
|
||||
cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
|
||||
if(cr.isError()){
|
||||
|
@ -175,7 +175,7 @@ class CharsetUTF32LE extends CharsetICU {
|
||||
}
|
||||
|
||||
/* write the BOM if necessary */
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
|
||||
if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
|
||||
byte[] bom={ (byte)0xff, (byte)0xfe, 0, 0 };
|
||||
cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
|
||||
if(cr.isError()){
|
||||
@ -183,7 +183,7 @@ class CharsetUTF32LE extends CharsetICU {
|
||||
}
|
||||
fromUnicodeStatus=0;
|
||||
}
|
||||
|
||||
|
||||
int ch, ch2;
|
||||
int indexToWrite;
|
||||
byte temp[] = new byte[4];
|
||||
|
@ -47,7 +47,6 @@ public class TestCharset extends TestFmwk {
|
||||
(byte) 0x00,(byte) 0x0d,
|
||||
(byte) 0x00,(byte) 0x0a };
|
||||
static final byte[] expectedByteStr ={
|
||||
(byte) 0xFE,(byte) 0xFF,
|
||||
(byte) 0x00,(byte) 'a',
|
||||
(byte) 0x00,(byte) 'b',
|
||||
(byte) 0x00,(byte) 'c',
|
||||
@ -147,8 +146,8 @@ public class TestCharset extends TestFmwk {
|
||||
bs1.limit(bs1.position());
|
||||
bs1.position(0);
|
||||
ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
|
||||
newBS.put((byte)0xFE);
|
||||
newBS.put((byte)0xFF);
|
||||
//newBS.put((byte)0xFE);
|
||||
//newBS.put((byte)0xFF);
|
||||
newBS.put(bs1);
|
||||
bs1.position(0);
|
||||
smBufDecode(d1, "UTF-16", bs1, us);
|
||||
@ -157,8 +156,8 @@ public class TestCharset extends TestFmwk {
|
||||
bs2.limit(bs2.position());
|
||||
bs2.position(0);
|
||||
newBS.clear();
|
||||
newBS.put((byte)0xFF);
|
||||
newBS.put((byte)0xFE);
|
||||
//newBS.put((byte)0xFF);
|
||||
//newBS.put((byte)0xFE);
|
||||
newBS.put(bs2);
|
||||
bs2.position(0);
|
||||
smBufDecode(d2, "UTF16-LE", bs2, us);
|
||||
@ -216,29 +215,33 @@ public class TestCharset extends TestFmwk {
|
||||
if(us.length()==0){
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
|
||||
|
||||
/*
|
||||
newBS.put((byte)0x00);
|
||||
newBS.put((byte)0x00);
|
||||
newBS.put((byte)0xFE);
|
||||
newBS.put((byte)0xFF);
|
||||
newBS.put(bs1);
|
||||
*/
|
||||
newBS.put(bs1);
|
||||
bs1.position(0);
|
||||
smBufDecode(d1, "UTF-32", bs1, us);
|
||||
smBufEncode(e1, "UTF-32", us, newBS);
|
||||
|
||||
|
||||
newBS.clear();
|
||||
/*
|
||||
newBS.put((byte)0xFF);
|
||||
newBS.put((byte)0xFE);
|
||||
newBS.put((byte)0x00);
|
||||
newBS.put((byte)0x00);
|
||||
*/
|
||||
newBS.put(bs2);
|
||||
bs2.position(0);
|
||||
smBufDecode(d2, "UTF-32LE", bs2, us);
|
||||
smBufEncode(e2, "UTF-32LE", us, newBS);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -142,11 +142,11 @@ public class TestCharsetDetector extends TestFmwk
|
||||
CharsetDetector det = new CharsetDetector();
|
||||
byte[] bytes;
|
||||
|
||||
if (enc.startsWith("UTF-32")) {
|
||||
UTF32 utf32 = UTF32.getInstance(enc);
|
||||
//if (enc.startsWith("UTF-32")) {
|
||||
// UTF32 utf32 = UTF32.getInstance(enc);
|
||||
|
||||
bytes = utf32.toBytes(testString);
|
||||
} else {
|
||||
// bytes = utf32.toBytes(testString);
|
||||
//} else {
|
||||
String from = enc;
|
||||
|
||||
while (true) {
|
||||
@ -172,7 +172,7 @@ public class TestCharsetDetector extends TestFmwk
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
//}
|
||||
|
||||
det.setText(bytes);
|
||||
checkMatch(det, testString, enc, lang, id);
|
||||
|
@ -1,253 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2005-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*/
|
||||
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
|
||||
* Java Strings.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public abstract class UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array. It is
|
||||
* implemented by subclasses that implement the BE and LE encodings.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
abstract protected void pack(byte[] bytes, int codePoint, int out);
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the encoded byte array into a 32-bit
|
||||
* Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
abstract protected int unpack(byte[] bytes, int index);
|
||||
|
||||
|
||||
/**
|
||||
* Convert a Java String into an array of UTF-32 encoded bytes. Calls
|
||||
* the <code>pack</code> method to do the encoding.
|
||||
*
|
||||
* @param utf16 the source Java String.
|
||||
* @return an array of UTF-32 encoded bytes.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public byte[] toBytes(String utf16)
|
||||
{
|
||||
int codePoints = UTF16.countCodePoint(utf16);
|
||||
byte[] bytes = new byte[codePoints * 4];
|
||||
int out = 0;
|
||||
|
||||
for (int cp = 0; cp < codePoints; out += 4) {
|
||||
int codePoint = UTF16.charAt(utf16, cp);
|
||||
|
||||
pack(bytes, codePoint, out);
|
||||
cp += UTF16.getCharCount(codePoint);
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method converts a sequence of UTF-32 encoded bytes into
|
||||
* a Java String. It calls the <code>unpack</code> method to implement
|
||||
* the encoding.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param offset the starting offset in the byte array.
|
||||
* @param count the number of bytes to process.
|
||||
* @return the Java String.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public String fromBytes(byte[] bytes, int offset, int count)
|
||||
{
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
int limit = offset + count;
|
||||
|
||||
for (int cp = offset; cp < limit; cp += 4) {
|
||||
int codePoint = unpack(bytes, cp);
|
||||
|
||||
UTF16.append(buffer, codePoint);
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience method that converts an entire byte array
|
||||
* into a Java String.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @return the Java String.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public String fromBytes(byte[] bytes)
|
||||
{
|
||||
return fromBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implements UTF-32BE encoding.
|
||||
*
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getBEInstance()
|
||||
{
|
||||
if (beInstance == null) {
|
||||
beInstance = new BE();
|
||||
}
|
||||
|
||||
return beInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implemnts the UTF-32LE encoding.
|
||||
*
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getLEInstance()
|
||||
{
|
||||
if (leInstance == null) {
|
||||
leInstance = new LE();
|
||||
}
|
||||
|
||||
return leInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implements either UTF-32BE or UTF32-LE,
|
||||
* depending on the encoding name suppled.
|
||||
*
|
||||
* @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getInstance(String encoding)
|
||||
{
|
||||
if (encoding.equals("UTF-32BE")) {
|
||||
return getBEInstance();
|
||||
}
|
||||
|
||||
if (encoding.equals("UTF-32LE")) {
|
||||
return getLEInstance();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This sublcass implements the UTF-32BE encoding via the
|
||||
* <code>pack</code> and <code>unpack</code> methods.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static class BE extends UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array using
|
||||
* the UTF-32BE encoding.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public void pack(byte[] bytes, int codePoint, int out)
|
||||
{
|
||||
bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
|
||||
bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
|
||||
bytes[out + 2] = (byte) ((codePoint >> 8) & 0xFF);
|
||||
bytes[out + 3] = (byte) ((codePoint >> 0) & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
|
||||
* Unicode code point.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public int unpack(byte[] bytes, int index)
|
||||
{
|
||||
return (bytes[index + 0] & 0xFF) << 24 | (bytes[index + 1] & 0xFF) << 16 |
|
||||
(bytes[index + 2] & 0xFF) << 8 | (bytes[index + 3] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This sublcass implements the UTF-32LE encoding via the
|
||||
* <code>pack</code> and <code>unpack</code> methods.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static class LE extends UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array using
|
||||
* the UTF-32LE encoding.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public void pack(byte[] bytes, int codePoint, int out)
|
||||
{
|
||||
bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
|
||||
bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
|
||||
bytes[out + 1] = (byte) ((codePoint >> 8) & 0xFF);
|
||||
bytes[out + 0] = (byte) ((codePoint >> 0) & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
|
||||
* Unicode code point.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public int unpack(byte[] bytes, int index)
|
||||
{
|
||||
return (bytes[index + 3] & 0xFF) << 24 | (bytes[index + 2] & 0xFF) << 16 |
|
||||
(bytes[index + 1] & 0xFF) << 8 | (bytes[index + 0] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
private static UTF32 beInstance = null;
|
||||
private static UTF32 leInstance = null;
|
||||
}
|
@ -28,8 +28,6 @@ import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Iterator;
|
||||
|
Loading…
Reference in New Issue
Block a user