ICU-5407 remove UTF32.java and make the test depend on icu4j charsets

X-SVN-Rev: 20542
2006-10-20 22:54:22 +00:00 · 2006-10-20 22:54:22 +00:00 · 444fd811e9
commit 444fd811e9
parent 78af2c2195
9 changed files with 29 additions and 277 deletions
--- a/icu4j/src/com/ibm/icu/charset/CharsetICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetICU.java
@ -61,7 +61,9 @@ public abstract class CharsetICU extends Charset{
     short unicodeMask;            /* +79: 1  bit 0: has supplementary  bit 1: has single surrogates */
     byte subChar1;               /* +80: 1  single-byte substitution character for IBM MBCS (0 if none) */
     byte reserved[/*19*/];           /* +81: 19 to round out the structure */
-    
+     
+     boolean writeBOM = false; /* only used by UTF-16, UTF-32 */
+     
    /**
     * 
     * @param icuCanonicalName
@ -101,8 +103,8 @@ public abstract class CharsetICU extends Charset{
        algorithmicCharsets.put("HZ",                    "com.ibm.icu.charset.CharsetHZ" );
        algorithmicCharsets.put("imapmailboxname",       "com.ibm.icu.charset.CharsetIMAP" );
        algorithmicCharsets.put("ISCII",                 "com.ibm.icu.charset.CharsetISCII" );
-        algorithmicCharsets.put("iso2022",               "com.ibm.icu.charset.CharsetISO2022" );*/
-        /*algorithmicCharsets.put("lmbcs1",                "com.ibm.icu.charset.CharsetLMBCS1" );
+        algorithmicCharsets.put("iso2022",               "com.ibm.icu.charset.CharsetISO2022" );
+        algorithmicCharsets.put("lmbcs1",                "com.ibm.icu.charset.CharsetLMBCS1" );
        algorithmicCharsets.put("lmbcs11",               "com.ibm.icu.charset.CharsetLMBCS11" );
        algorithmicCharsets.put("lmbcs16",               "com.ibm.icu.charset.CharsetLMBCS16" );
        algorithmicCharsets.put("lmbcs17",               "com.ibm.icu.charset.CharsetLMBCS17" );
@ -219,11 +221,13 @@ public abstract class CharsetICU extends Charset{
     */
    public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
        CharsetProviderICU icuProvider = new CharsetProviderICU();
-        Charset cs = icuProvider.charsetForName(charsetName);
+        CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
        if (cs != null) {
+            cs.writeBOM = true;
            return cs;
        }
        return Charset.forName(charsetName);
    }
+    
 }

--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF16.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF16.java
@ -265,7 +265,7 @@ class CharsetUTF16 extends CharsetICU {
            }
            char c;
            /* write the BOM if necessary */
-            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
                byte bom[]={ (byte)0xfe, (byte)0xff };
                cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
                if(cr.isError()){
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF16LE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF16LE.java
@ -269,7 +269,7 @@ class CharsetUTF16LE extends CharsetICU {
            }
            char c;
            /* write the BOM if necessary */
-            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
                byte bom[]={ (byte)0xff, (byte)0xfe };
                cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
                if(cr.isError()){
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
@ -175,7 +175,7 @@ class CharsetUTF32 extends CharsetICU {
            }
            
            /* write the BOM if necessary */
-            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
                byte[] bom={ 0, 0, (byte)0xfe, (byte)0xff };
                cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
                if(cr.isError()){
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
@ -175,7 +175,7 @@ class CharsetUTF32LE extends CharsetICU {
            }
            
            /* write the BOM if necessary */
-            if(fromUnicodeStatus==NEED_TO_WRITE_BOM) {
+            if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
                byte[] bom={ (byte)0xff, (byte)0xfe, 0, 0 };
                cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
                if(cr.isError()){
@ -183,7 +183,7 @@ class CharsetUTF32LE extends CharsetICU {
                }
                fromUnicodeStatus=0;
            }
-            
+             
            int ch, ch2;
            int indexToWrite;
            byte temp[] = new byte[4];
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -47,7 +47,6 @@ public class TestCharset extends TestFmwk {
            (byte) 0x00,(byte) 0x0d,
            (byte) 0x00,(byte) 0x0a };
    static final byte[] expectedByteStr ={
-        (byte) 0xFE,(byte) 0xFF,    
        (byte) 0x00,(byte) 'a',
        (byte) 0x00,(byte) 'b',
        (byte) 0x00,(byte) 'c',
@ -147,8 +146,8 @@ public class TestCharset extends TestFmwk {
            bs1.limit(bs1.position());
            bs1.position(0);
            ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
-            newBS.put((byte)0xFE);
-            newBS.put((byte)0xFF);
+            //newBS.put((byte)0xFE);
+            //newBS.put((byte)0xFF);
            newBS.put(bs1);    
            bs1.position(0);
            smBufDecode(d1, "UTF-16", bs1, us);
@ -157,8 +156,8 @@ public class TestCharset extends TestFmwk {
            bs2.limit(bs2.position());
            bs2.position(0);
            newBS.clear();
-            newBS.put((byte)0xFF);
-            newBS.put((byte)0xFE);
+            //newBS.put((byte)0xFF);
+            //newBS.put((byte)0xFE);
            newBS.put(bs2);     
            bs2.position(0);
            smBufDecode(d2, "UTF16-LE", bs2, us);
@ -216,29 +215,33 @@ public class TestCharset extends TestFmwk {
            if(us.length()==0){
                continue;
            }
-            
+             

            ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
-            
+            /*
            newBS.put((byte)0x00);
            newBS.put((byte)0x00);
            newBS.put((byte)0xFE);
            newBS.put((byte)0xFF);
-            newBS.put(bs1);    
+            */
+            newBS.put(bs1);
            bs1.position(0);
            smBufDecode(d1, "UTF-32", bs1, us);
            smBufEncode(e1, "UTF-32", us, newBS);
            
            
            newBS.clear();
+            /*
            newBS.put((byte)0xFF);
            newBS.put((byte)0xFE);
            newBS.put((byte)0x00);
            newBS.put((byte)0x00);
+            */
            newBS.put(bs2);    
            bs2.position(0);
            smBufDecode(d2, "UTF-32LE", bs2, us);
            smBufEncode(e2, "UTF-32LE", us, newBS);
+
        }
        
    }
--- a/icu4j/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
@ -142,11 +142,11 @@ public class TestCharsetDetector extends TestFmwk
            CharsetDetector det = new CharsetDetector();
            byte[] bytes;
            
-            if (enc.startsWith("UTF-32")) {
-                UTF32 utf32 = UTF32.getInstance(enc);
+            //if (enc.startsWith("UTF-32")) {
+            //    UTF32 utf32 = UTF32.getInstance(enc);
                
-                bytes = utf32.toBytes(testString);
-            } else {
+            //    bytes = utf32.toBytes(testString);
+            //} else {
                String from = enc;

                while (true) {
@ -172,7 +172,7 @@ public class TestCharsetDetector extends TestFmwk
                    
                    break;
                }
-            }
+            //}
        
            det.setText(bytes);
            checkMatch(det, testString, enc, lang, id);
--- a/icu4j/src/com/ibm/icu/impl/UTF32.java
+++ b/icu4j/src/com/ibm/icu/impl/UTF32.java
@ -1,253 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 2005-2006, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- *
- */
-
-package com.ibm.icu.impl;
-
-import com.ibm.icu.text.UTF16;
-
-/**
- * This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
- * Java Strings.
- * 
- * @internal
- */
-public abstract class UTF32
-{
-    /**
-     * This method packs a 32-bit Unicode code point into the byte array. It is
-     * implemented by subclasses that implement the BE and LE encodings.
-     * 
-     * @param bytes the destination byte array
-     * @param codePoint the 32-bit Unicode code point
-     * @param out the destination index in <code>bytes</code>.
-     * 
-     * @internal
-     */
-    abstract protected void pack(byte[] bytes, int codePoint, int out);
-    
-    /**
-     * This method unpacks bytes from the encoded byte array into a 32-bit
-     * Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
-     * 
-     * @param bytes the source byte array.
-     * @param index the index of the first source byte.
-     * @return the 32-bit Unicode code point.
-     * 
-     * @internal
-     */
-    abstract protected int unpack(byte[] bytes, int index);
-    
-    
-    /**
-     * Convert a Java String into an array of UTF-32 encoded bytes. Calls
-     * the <code>pack</code> method to do the encoding.
-     * 
-     * @param utf16 the source Java String.
-     * @return an array of UTF-32 encoded bytes.
-     * 
-     * @internal
-     */
-    public byte[] toBytes(String utf16)
-    {
-        int codePoints = UTF16.countCodePoint(utf16);
-        byte[] bytes = new byte[codePoints * 4];
-        int out = 0;
-
-        for (int cp = 0; cp < codePoints; out += 4) {
-            int codePoint = UTF16.charAt(utf16, cp);
-            
-            pack(bytes, codePoint, out);
-            cp += UTF16.getCharCount(codePoint);
-        }
-        
-        return bytes;
-    }
-    
-    /**
-     * This method converts a sequence of UTF-32 encoded bytes into
-     * a Java String. It calls the <code>unpack</code> method to implement
-     * the encoding.
-     * 
-     * @param bytes the source byte array.
-     * @param offset the starting offset in the byte array.
-     * @param count the number of bytes to process.
-     * @return the Java String.
-     * 
-     * @internal
-     */
-    public String fromBytes(byte[] bytes, int offset, int count)
-    {
-        StringBuffer buffer = new StringBuffer();
-        int limit = offset + count;
-        
-        for (int cp = offset; cp < limit; cp += 4) {
-            int codePoint = unpack(bytes, cp);
-            
-            UTF16.append(buffer, codePoint);
-        }
-        
-        return buffer.toString();
-    }
-    
-    /**
-     * A convenience method that converts an entire byte array
-     * into a Java String.
-     * 
-     * @param bytes the source byte array.
-     * @return the Java String.
-     * 
-     * @internal
-     */
-    public String fromBytes(byte[] bytes)
-    {
-        return fromBytes(bytes, 0, bytes.length);
-    }
-    
-    /**
-     * Get an instance that implements UTF-32BE encoding.
-     * 
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getBEInstance()
-    {
-        if (beInstance == null) {
-            beInstance = new BE();
-        }
-        
-        return beInstance;
-    }
-    
-    /**
-     * Get an instance that implemnts the UTF-32LE encoding.
-     * 
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getLEInstance()
-    {
-        if (leInstance == null) {
-            leInstance = new LE();
-        }
-        
-        return leInstance;
-    }
-    
-    /**
-     * Get an instance that implements either UTF-32BE or UTF32-LE,
-     * depending on the encoding name suppled.
-     * 
-     * @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getInstance(String encoding)
-    {
-        if (encoding.equals("UTF-32BE")) {
-            return getBEInstance();
-        }
-        
-        if (encoding.equals("UTF-32LE")) {
-            return getLEInstance();
-        }
-        
-        return null;
-    }
-    
-    /**
-     * This sublcass implements the UTF-32BE encoding via the
-     * <code>pack</code> and <code>unpack</code> methods.
-     * 
-     * @internal
-     */
-    static class BE extends UTF32
-    {
-        /**
-         * This method packs a 32-bit Unicode code point into the byte array using
-         * the UTF-32BE encoding.
-         * 
-         * @param bytes the destination byte array
-         * @param codePoint the 32-bit Unicode code point
-         * @param out the destination index in <code>bytes</code>.
-         * 
-         * @internal
-         */
-        public void pack(byte[] bytes, int codePoint, int out)
-        {
-            bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
-            bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
-            bytes[out + 2] = (byte) ((codePoint >>  8) & 0xFF);
-            bytes[out + 3] = (byte) ((codePoint >>  0) & 0xFF);
-        }
-        
-        /**
-         * This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
-         * Unicode code point.
-         * 
-         * @param bytes the source byte array.
-         * @param index the index of the first source byte.
-         * @return the 32-bit Unicode code point.
-         * 
-         * @internal
-         */
-        public int unpack(byte[] bytes, int index)
-        {
-            return (bytes[index + 0] & 0xFF) << 24 | (bytes[index + 1] & 0xFF) << 16 |
-                   (bytes[index + 2] & 0xFF) <<  8 | (bytes[index + 3] & 0xFF);
-        }
-    }
-    
-    /**
-     * This sublcass implements the UTF-32LE encoding via the
-     * <code>pack</code> and <code>unpack</code> methods.
-     * 
-     * @internal
-     */
-    static class LE extends UTF32
-    {
-        /**
-         * This method packs a 32-bit Unicode code point into the byte array using
-         * the UTF-32LE encoding.
-         * 
-         * @param bytes the destination byte array
-         * @param codePoint the 32-bit Unicode code point
-         * @param out the destination index in <code>bytes</code>.
-         * 
-         * @internal
-         */
-        public void pack(byte[] bytes, int codePoint, int out)
-        {
-            bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
-            bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
-            bytes[out + 1] = (byte) ((codePoint >>  8) & 0xFF);
-            bytes[out + 0] = (byte) ((codePoint >>  0) & 0xFF);
-        }
-        
-        /**
-         * This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
-         * Unicode code point.
-         * 
-         * @param bytes the source byte array.
-         * @param index the index of the first source byte.
-         * @return the 32-bit Unicode code point.
-         * 
-         * @internal
-         */
-        public int unpack(byte[] bytes, int index)
-        {
-            return (bytes[index + 3] & 0xFF) << 24 | (bytes[index + 2] & 0xFF) << 16 |
-                   (bytes[index + 1] & 0xFF) <<  8 | (bytes[index + 0] & 0xFF);
-        }
-    }
-    
-    private static UTF32 beInstance = null;
-    private static UTF32 leInstance = null;
-}
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -28,8 +28,6 @@ import com.ibm.icu.util.VersionInfo;

 import com.ibm.icu.text.BreakIterator;

-import java.util.Map;
-import java.util.HashMap;
 import java.util.MissingResourceException;
 import java.util.TreeSet;
 import java.util.Iterator;