ICU-7273 remove old NormalizerImpl.getDecomposition()

X-SVN-Rev: 27504
2010-02-05 22:36:34 +00:00 · 2010-02-05 22:36:34 +00:00 · f90578937a
commit f90578937a
parent 016cd3ca66
8 changed files with 69 additions and 154 deletions
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
@ -14,6 +14,8 @@ import java.util.Hashtable;
 import java.util.Vector;

 import com.ibm.icu.impl.IntTrieBuilder;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.TrieBuilder;
 import com.ibm.icu.impl.TrieIterator;
@ -1326,7 +1328,7 @@ final class CollationParsedRuleBuilder {
        new WeightRange(), new WeightRange(), new WeightRange(),
        new WeightRange() };
    private WeightRange m_utilWeightRange_ = new WeightRange();
-    private char m_utilCharBuffer_[] = new char[256];
+    private Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstanceNoIOException().impl;
    private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
    private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
    // Flag indicating a combining marks table is required or not.
@ -3856,12 +3858,9 @@ final class CollationParsedRuleBuilder {
            // if the range is assigned - we might ommit more categories later

            for (int u32 = start; u32 < limit; u32++) {
-                int noOfDec = NormalizerImpl.getDecomposition(u32, false,
-                        m_utilCharBuffer_, 0, 256);
-                if (noOfDec > 0) {
-                    // if we're positive, that means there is no decomposition
+                String decomp = nfcImpl.getDecomposition(u32);
+                if (decomp != null) {
                    String comp = UCharacter.toString(u32);
-                    String decomp = new String(m_utilCharBuffer_, 0, noOfDec);
                    if (!collator.equals(comp, decomp)) {
                        m_utilElement_.m_cPoints_ = decomp;
                        m_utilElement_.m_prefix_ = 0;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/NormalizerImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/NormalizerImpl.java
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2009, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and
+ * others. All Rights Reserved.
 *******************************************************************************
 */
 
@ -1037,95 +1037,6 @@ public final class NormalizerImpl {
    //------------------------------------------------------ 
    // make NFD & NFKD 
    //------------------------------------------------------
-    public static int getDecomposition(int c /*UTF-32*/ , 
-                                        boolean compat,
-                                           char[] dest,
-                                           int destStart, 
-                                           int destCapacity) {
-            
-        if( (UNSIGNED_INT_MASK & c)<=0x10ffff) {
-            long /*unsigned*/ norm32;
-            int qcMask;
-            int minNoMaybe;
-            int length;
-    
-            // initialize 
-            if(!compat) {
-                minNoMaybe = indexes[INDEX_MIN_NFD_NO_MAYBE];
-                qcMask = QC_NFD;
-            } else {
-                minNoMaybe = indexes[INDEX_MIN_NFKD_NO_MAYBE];
-                qcMask = QC_NFKD;
-            }
-    
-            if(c<minNoMaybe) {
-                // trivial case 
-                if(destCapacity>0) {
-                    dest[0]=(char)c;
-                }
-                return -1;
-            }
-    
-            /* data lookup */
-            norm32=getNorm32(c);
-            if((norm32&qcMask)==0) {
-                /* simple case: no decomposition */
-                if(c<=0xffff) {
-                    if(destCapacity>0) {
-                        dest[0]=(char)c;
-                    }
-                    return -1;
-                } else {
-                    if(destCapacity>=2) {
-                        dest[0]=UTF16.getLeadSurrogate(c);
-                        dest[1]=UTF16.getTrailSurrogate(c);
-                    }
-                    return -2;
-                }
-            } else if(isNorm32HangulOrJamo(norm32)) {
-                /* Hangul syllable: decompose algorithmically */
-                char c2;
-    
-                c-=HANGUL_BASE;
-    
-                c2=(char)(c%JAMO_T_COUNT);
-                c/=JAMO_T_COUNT;
-                if(c2>0) {
-                    if(destCapacity>=3) {
-                        dest[2]=(char)(JAMO_T_BASE+c2);
-                    }
-                    length=3;
-                } else {
-                    length=2;
-                }
-    
-                if(destCapacity>=2) {
-                    dest[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
-                    dest[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
-                }
-                return length;
-            } else {
-                /* c decomposes, get everything from the variable-length extra 
-                 * data 
-                 */
-                int p, limit;
-                DecomposeArgs args = new DecomposeArgs();
-                /* the index into extra data array*/                 
-                p=decompose(norm32, qcMask, args);
-                if(args.length<=destCapacity) {
-                    limit=p+args.length;
-                    do {
-                        dest[destStart++]=extraData[p++];
-                    } while(p<limit);
-                }
-                return args.length;
-            }
-        } else {
-            return 0;
-        }
-    }
-
-    
    public static int decompose(char[] src,int srcStart,int srcLimit,
                                char[] dest,int destStart,int destLimit,
                                 boolean compat,int[] outTrailCC,
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
@ -3731,7 +3731,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
            return String.valueOf((char)ch);
        }

-        StringBuffer result = new StringBuffer();
+        StringBuilder result = new StringBuilder();
        result.append(UTF16.getLeadSurrogate(ch));
        result.append(UTF16.getTrailSurrogate(ch));
        return result.toString();
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/ComposedCharIter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ComposedCharIter.java
@ -1,13 +1,18 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2007, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
 package com.ibm.icu.text;
-import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;

 /**
+ * This class has been deprecated since ICU 2.2.
+ * One problem is that this class is not designed to return supplementary characters.
+ * Use the Normalizer2 and UCharacter classes instead.
+ * <p>
 * <tt>ComposedCharIter</tt> is an iterator class that returns all
 * of the precomposed characters defined in the Unicode standard, along
 * with their decomposed forms.  This is often useful when building
@ -50,7 +55,6 @@ import com.ibm.icu.impl.NormalizerImpl;
 */
 ///CLOVER:OFF
 public final class ComposedCharIter {
-    
    /**
     * Constant that indicates the iteration has completed.
     * {@link #next} returns this value when there are no more composed characters
@ -58,7 +62,7 @@ public final class ComposedCharIter {
     * @deprecated ICU 2.2
     */
    public static final  char DONE = (char) Normalizer.DONE;
-    
+
    /**
     * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
     * all Unicode characters with canonical decompositions, including Korean
@ -66,11 +70,9 @@ public final class ComposedCharIter {
     * @deprecated ICU 2.2
     */
    public ComposedCharIter() {
-        compat = false;
-        //options =0;
+        this(false, 0);
    }
-    
-    
+
    /**
     * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
     * <p>
@ -78,18 +80,17 @@ public final class ComposedCharIter {
     *                  <tt>true</tt> for both canonical and compatibility
     *                  decompositions.
     *
-     * @param options   Optional decomposition features.  Currently, the only
-     *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which
-     *                  causes this <tt>ComposedCharIter</tt> not to iterate
-     *                  over the Hangul characters and their corresponding
-     *                  Jamo decompositions.
+     * @param options   Optional decomposition features. None are supported, so this is ignored.
     * @deprecated ICU 2.2
     */
    public ComposedCharIter(boolean compat, int options) {
-        this.compat = compat;
-        //this.options = options;
+        if(compat) {
+            n2impl = Norm2AllModes.getNFKCInstanceNoIOException().impl;
+        } else {
+            n2impl = Norm2AllModes.getNFCInstanceNoIOException().impl;
+        }
    }
-    
+
    /**
     * Determines whether there any precomposed Unicode characters not yet returned
     * by {@link #next}.
@ -129,36 +130,35 @@ public final class ComposedCharIter {
    public String decomposition() {
        // the decomposition buffer contains the decomposition of 
        // current char so just return it
-        return new String(decompBuf,0, bufLen);
+        if(decompBuf != null) {
+            return decompBuf;
+        } else {
+            return "";
+        }
    }
-    
+
    private void findNextChar() {
        int c=curChar+1;
-        for(;;){
-           if(c < 0xFFFF){
-               bufLen = NormalizerImpl.getDecomposition(c,compat,
-                                                        decompBuf,0,
-                                                        decompBuf.length);
-               if(bufLen>0){
+        decompBuf = null;
+        for(;;) {
+            if(c < 0xFFFF) {
+                decompBuf = n2impl.getDecomposition(c);
+                if(decompBuf != null) {
                    // the curChar can be decomposed... so it is a composed char
                    // cache the result     
                    break;
-               }
-               c++;
-           }else{
-               c=Normalizer.DONE;
-               break;
-           }
+                }
+                c++;
+            } else {
+                c=Normalizer.DONE;
+                break;
+            }
        }
        nextChar=c;  
    }
-    
-    //private int options;
-    private boolean compat;
-    private char[] decompBuf = new char[100];
-    private int bufLen=0;
+
+    private final Normalizer2Impl n2impl;
+    private String decompBuf;
    private int curChar = 0;
    private int nextChar = Normalizer.DONE;
-    
-
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java
@ -9,6 +9,7 @@ package com.ibm.icu.text;
 import java.io.InputStream;
 import java.io.IOException;

+import com.ibm.icu.impl.Norm2AllModes;
 import com.ibm.icu.text.Normalizer;

 /**
@ -63,7 +64,7 @@ public abstract class Normalizer2 {
     * @draft ICU 4.4
     * @provisional This API might change or be removed in a future release.
     */
-    enum Mode {
+    public enum Mode {
        /**
         * Decomposition followed by composition.
         * Same as standard NFC when using an "nfc" instance.
@ -132,7 +133,14 @@ public abstract class Normalizer2 {
     * @provisional This API might change or be removed in a future release.
     */
    public static Normalizer2 getInstance(InputStream data, String name, Mode mode) throws IOException {
-        return null;
+        Norm2AllModes all2Modes=Norm2AllModes.getInstance(data, name);
+        switch(mode) {
+        case COMPOSE: return all2Modes.comp;
+        case DECOMPOSE: return all2Modes.decomp;
+        case FCD: return all2Modes.fcd;
+        case COMPOSE_CONTIGUOUS: return all2Modes.fcc;
+        default: return null;  // will not occur
+        }
    }

    /**
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
@ -2670,7 +2670,7 @@ public final class UTF16 {
            return String.valueOf((char) ch);
        }

-        StringBuffer result = new StringBuffer();
+        StringBuilder result = new StringBuilder();
        result.append(getLeadSurrogate(ch));
        result.append(getTrailSurrogate(ch));
        return result.toString();
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 1996-2009, International Business Machines Corporation and    *
+* Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -8,6 +8,7 @@
 package com.ibm.icu.dev.test.lang;

 import java.io.BufferedReader;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Locale;

@ -26,6 +27,7 @@ import com.ibm.icu.lang.UCharacterDirection;
 import com.ibm.icu.lang.UCharacterEnums;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
@ -2279,9 +2281,7 @@ public final class UCharacterTest extends TestFmwk
    }

   /* various tests for consistency of UCD data and API behavior */
-   public void TestConsistency() {
-       char[] buffer16 = new char[300];
-       char[] buffer   = new char[300];
+   public void TestConsistency() throws IOException {
       UnicodeSet set1, set2, set3, set4;

       USerializedSet sset;
@ -2366,6 +2366,7 @@ public final class UCharacterTest extends TestFmwk
        * In general, the set for the middle such character should be a subset
        * of the set for the first.
        */
+       Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
       set1=new UnicodeSet();
       set2=new UnicodeSet();
       sset = new USerializedSet();
@ -2374,7 +2375,8 @@ public final class UCharacterTest extends TestFmwk

       /* enumerate all characters that are plausible to be latin letters */
       for(start=0xa0; start<0x2000; ++start) {
-           if(NormalizerImpl.getDecomposition(start, false, buffer16,0,buffer16.length) > 1 && buffer[0]==0x0049) {
+           String decomp=norm2.normalize(UTF16.valueOf(start));
+           if(decomp.length() > 1 && decomp.charAt(0)==0x49) {
               set2.add(start);
           }
       }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestDeprecatedNormalizerAPI.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestDeprecatedNormalizerAPI.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2007, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -146,10 +146,10 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk

        ComposedCharIter iter = new ComposedCharIter(false, options);
        while (iter.hasNext()) {
-            char ch = iter.next();
+            final char ch = iter.next();

-            String chStr = new StringBuffer().append(ch).toString();
-            String decomp = Normalizer.decompose(chStr, compat);
+            String chStr = String.valueOf(ch);
+            String decomp = iter.decomposition();
            String comp = Normalizer.compose(decomp, compat);

            if (NormalizerImpl.isFullCompositionExclusion(ch)) {
@ -158,19 +158,14 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk
            }

            // Avoid disparaged characters
-            if (getDecomposition(ch,compat).length() == 4) continue;
+            if (decomp.length() == 4) continue;

            if (!comp.equals(chStr)) {
                errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
                    + " --> " + hex(comp));

-                errln("  char decomp is '" + getDecomposition(ch,compat) + "'");
+                errln("  char decomp is '" + decomp + "'");
            }
        }
    }
-    private String getDecomposition(char ch, boolean compat){
-        char[] dest = new char[10];   
-        int length = NormalizerImpl.getDecomposition(ch,compat,dest,0,dest.length);   
-        return new String(dest,0,length);
-    }
 }