ICU-7273 finish Normalizer2Impl port to Java (normalizeAndAppend() and boundary tests), and port changes to top-level Normalizer.compare()

X-SVN-Rev: 27485
2010-02-03 05:53:06 +00:00 · 2010-02-03 05:53:06 +00:00 · f9a9d47489
commit f9a9d47489
parent 7a8d49ed32
3 changed files with 234 additions and 234 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
@ -148,7 +148,7 @@ public final class Norm2AllModes {
        @Override
        protected void normalizeAndAppend(
                CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
-            impl.decomposeAndAppend(src, 0, src.length(), doNormalize, buffer);
+            impl.decomposeAndAppend(src, doNormalize, buffer);
        }
        @Override
        public int spanQuickCheckYes(CharSequence s) {
@ -179,7 +179,7 @@ public final class Norm2AllModes {
        @Override
        protected void normalizeAndAppend(
                CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
-            impl.composeAndAppend(src, 0, src.length(), doNormalize, onlyContiguous, buffer);
+            impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer);
        }

        @Override
@ -234,7 +234,7 @@ public final class Norm2AllModes {
        @Override
        protected void normalizeAndAppend(
                CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
-            impl.makeFCDAndAppend(src, 0, src.length(), doNormalize, buffer);
+            impl.makeFCDAndAppend(src, doNormalize, buffer);
        }
        @Override
        public int spanQuickCheckYes(CharSequence s) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
@ -711,10 +711,31 @@ public final class Normalizer2Impl {
        }
        return src;
    }
-    public void decomposeAndAppend(CharSequence s, int src, int limit,
-                                   boolean doDecompose,
-                                   ReorderingBuffer buffer) {
-        throw new UnsupportedOperationException();  // TODO
+    public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
+        int limit=s.length();
+        if(limit==0) {
+            return;
+        }
+        if(doDecompose) {
+            decompose(s, 0, limit, buffer);
+            return;
+        }
+        // Just merge the strings at the boundary.
+        int c=Character.codePointAt(s, 0);
+        int src=0;
+        int firstCC, prevCC, cc;
+        firstCC=prevCC=cc=getCC(getNorm16(c));
+        while(cc!=0) {
+            prevCC=cc;
+            src+=Character.charCount(c);
+            if(src>=limit) {
+                break;
+            }
+            c=Character.codePointAt(s, src);
+            cc=getCC(getNorm16(c));
+        };
+        buffer.append(s, 0, src, firstCC, prevCC);
+        buffer.append(s, src, limit);
    }
    // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
    // doCompose: normalize
@ -1062,11 +1083,30 @@ public final class Normalizer2Impl {
            return prevBoundary<<1;  // "no"
        }
    }
-    public void composeAndAppend(CharSequence s, int src, int limit,
+    public void composeAndAppend(CharSequence s,
                                 boolean doCompose,
                                 boolean onlyContiguous,
                                 ReorderingBuffer buffer) {
-        throw new UnsupportedOperationException();  // TODO
+        int src=0, limit=s.length();
+        if(!buffer.isEmpty()) {
+            int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
+            if(0!=firstStarterInSrc) {
+                int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
+                                                               buffer.length());
+                StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
+                                                       firstStarterInSrc+16);
+                middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
+                buffer.removeSuffix(buffer.length()-lastStarterInDest);
+                middle.append(s, 0, firstStarterInSrc);
+                compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
+                src=firstStarterInSrc;
+            }
+        }
+        if(doCompose) {
+            compose(s, src, limit, onlyContiguous, true, buffer);
+        } else {
+            buffer.append(s, src, limit);
+        }
    }
    public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
        // Note: In this function we use buffer->appendZeroCC() because we track
@ -1195,14 +1235,65 @@ public final class Normalizer2Impl {
        }
        return src;
    }
-    public void makeFCDAndAppend(CharSequence s, int src, int limit,
-                                 boolean doMakeFCD,
-                                 ReorderingBuffer buffer) {
-        throw new UnsupportedOperationException();  // TODO
+    public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
+        int src=0, limit=s.length();
+        if(!buffer.isEmpty()) {
+            int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
+            if(0!=firstBoundaryInSrc) {
+                int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
+                                                               buffer.length());
+                StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
+                                                       firstBoundaryInSrc+16);
+                middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
+                buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
+                middle.append(s, 0, firstBoundaryInSrc);
+                makeFCD(middle, 0, middle.length(), buffer);
+                src=firstBoundaryInSrc;
+            }
+        }
+        if(doMakeFCD) {
+            makeFCD(s, src, limit, buffer);
+        } else {
+            buffer.append(s, src, limit);
+        }
    }

+    // Note: hasDecompBoundary() could be implemented as aliases to
+    // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
+    // at the cost of building the FCD trie for a decomposition normalizer.
    public boolean hasDecompBoundary(int c, boolean before) {
-        throw new UnsupportedOperationException();  // TODO
+        for(;;) {
+            if(c<minDecompNoCP) {
+                return true;
+            }
+            int norm16=getNorm16(c);
+            if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
+                return true;
+            } else if(norm16>MIN_NORMAL_MAYBE_YES) {
+                return false;  // ccc!=0
+            } else if(isDecompNoAlgorithmic(norm16)) {
+                c=mapAlgorithmic(c, norm16);
+            } else {
+                // c decomposes, get everything from the variable-length extra data
+                int firstUnit=extraData.charAt(norm16++);
+                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
+                    return false;
+                }
+                if(!before) {
+                    // decomp after-boundary: same as hasFCDBoundaryAfter(),
+                    // fcd16<=1 || trailCC==0
+                    if(firstUnit>0x1ff) {
+                        return false;  // trailCC>1
+                    }
+                    if(firstUnit<=0xff) {
+                        return true;  // trailCC==0
+                    }
+                    // if(trailCC==1) test leadCC==0, same as checking for before-boundary
+                }
+                // true if leadCC==0 (hasFCDBoundaryBefore())
+                return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16)&0xff00)==0;
+            }
+        }
    }
    public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }

@ -1210,7 +1301,33 @@ public final class Normalizer2Impl {
        return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
    }
    public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous, boolean testInert) {
-        throw new UnsupportedOperationException();  // TODO
+        for(;;) {
+            int norm16=getNorm16(c);
+            if(isInert(norm16)) {
+                return true;
+            } else if(norm16<=minYesNo) {
+                // Hangul LVT (==minYesNo) has a boundary after it.
+                // Hangul LV and non-inert yesYes characters combine forward.
+                return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char)c);
+            } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
+                return false;
+            } else if(isDecompNoAlgorithmic(norm16)) {
+                c=mapAlgorithmic(c, norm16);
+            } else {
+                // c decomposes, get everything from the variable-length extra data.
+                // If testInert, then c must be a yesNo character which has lccc=0,
+                // otherwise it could be a noNo.
+                int firstUnit=extraData.charAt(norm16);
+                // true if
+                //      c is not deleted, and
+                //      it and its decomposition do not combine forward, and it has a starter, and
+                //      if FCC then trailCC<=1
+                return
+                    (firstUnit&MAPPING_LENGTH_MASK)!=0 &&
+                    (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 &&
+                    (!onlyContiguous || firstUnit<=0x1ff);
+            }
+        }
    }

    public boolean hasFCDBoundaryBefore(int c) { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
@ -1322,7 +1439,6 @@ public final class Normalizer2Impl {
    private void decomposeShort(CharSequence s, int src, int limit,
                                ReorderingBuffer buffer) {
        while(src<limit) {
-            // TODO: use trie string iterator?? C++ uses UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
            int c=Character.codePointAt(s, src);
            src+=Character.charCount(c);
            decompose(c, getNorm16(c), buffer);
@ -1462,7 +1578,6 @@ public final class Normalizer2Impl {
            c=sb.codePointAt(p);
            p+=Character.charCount(c);
            norm16=getNorm16(c);
-            // TODO: use trie string iterator?? C++ uses UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
            cc=getCCFromYesOrMaybe(norm16);
            if( // this character combines backward and
                isMaybe(norm16) &&
@ -1612,8 +1727,17 @@ public final class Normalizer2Impl {
            }
        }
    }
-    private int findPreviousCompBoundary(CharSequence s, int start, int p) {
-        throw new UnsupportedOperationException();  // TODO
+    private int findPreviousCompBoundary(CharSequence s, int p) {
+        while(p>0) {
+            int c=Character.codePointBefore(s, p);
+            p-=Character.charCount(c);
+            if(hasCompBoundaryBefore(c)) {
+                break;
+            }
+            // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
+            // but that's probably not worth the extra cost.
+        }
+        return p;
    }
    private int findNextCompBoundary(CharSequence s, int p, int limit) {
        while(p<limit) {
@ -1627,8 +1751,15 @@ public final class Normalizer2Impl {
        return p;
    }

-    private int findPreviousFCDBoundary(CharSequence s, int start, int p) {
-        throw new UnsupportedOperationException();  // TODO
+    private int findPreviousFCDBoundary(CharSequence s, int p) {
+        while(p>0) {
+            int c=Character.codePointBefore(s, p);
+            p-=Character.charCount(c);
+            if(fcdTrie.get(c)<=0xff) {
+                break;
+            }
+        }
+        return p;
    }
    private int findNextFCDBoundary(CharSequence s, int p, int limit) {
        while(p<limit) {
@ -1642,23 +1773,22 @@ public final class Normalizer2Impl {
        return p;
    }

-    VersionInfo dataVersion;
+    @SuppressWarnings("unused")
+    private VersionInfo dataVersion;

    // Code point thresholds for quick check codes.
-    int minDecompNoCP;
-    int minCompNoMaybeCP;
+    private int minDecompNoCP;
+    private int minCompNoMaybeCP;

    // Norm16 value thresholds for quick check combinations and types of extra data.
-    int minYesNo;
-    int minNoNo;
-    int limitNoNo;
-    int minMaybeYes;
+    private int minYesNo;
+    private int minNoNo;
+    private int limitNoNo;
+    private int minMaybeYes;

-    Trie2_16 normTrie;
-    String maybeYesCompositions;
-    String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
+    private Trie2_16 normTrie;
+    private String maybeYesCompositions;
+    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters

-    Trie2_16 fcdTrie;
+    private Trie2_16 fcdTrie;
 }
-
-// TODO: Copy parts of normalizer2impl.h starting with Normalizer2Factory??
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer.java
@ -154,11 +154,11 @@ public final class Normalizer implements Cloneable {
     * @stable ICU 2.8
     */
    public static class Mode {
-        protected Mode(Normalizer2 n2) {
+        private Mode(Normalizer2 n2) {
            normalizer2 = n2;
            uni32Normalizer2 = new FilteredNormalizer2(n2, UNI32_SET);
        }
-        protected final Normalizer2 getNormalizer2(int options) {
+        private final Normalizer2 getNormalizer2(int options) {
            return (options&UNICODE_3_2) != 0 ? uni32Normalizer2 : normalizer2;
        }

@ -191,14 +191,6 @@ public final class Normalizer implements Cloneable {
                                );
        }
        
-        /**
-         * This method is used for method dispatch
-         * @stable ICU 2.6
-         */
-        protected String normalize(String src, int options) {
-            return src;
-        }
-
        /**
         * This method is used for method dispatch
         * @stable ICU 2.8
@ -231,18 +223,6 @@ public final class Normalizer implements Cloneable {
            return null;
        }

-        /**
-         * This method is used for method dispatch
-         * @stable ICU 2.6
-         */
-        protected QuickCheckResult quickCheck(char[] src,int start, int limit, 
-                                              boolean allowMaybe,UnicodeSet nx) {
-            if(allowMaybe) {
-                return MAYBE;
-            }
-            return NO;
-        }
-
        /**
         * This method is used for method dispatch
         * @stable ICU 2.8
@ -281,10 +261,6 @@ public final class Normalizer implements Cloneable {
                                            false, trailCC,nx);
        }
        
-        protected String normalize( String src, int options) {
-            return decompose(src,false);
-        }
-
        protected int getMinC() {
            return NormalizerImpl.MIN_WITH_LEAD_CC;
        }
@ -301,21 +277,6 @@ public final class Normalizer implements Cloneable {
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
        }

-        protected QuickCheckResult quickCheck(char[] src,int start, 
-                                              int limit,boolean allowMaybe,
-                                              UnicodeSet nx) {
-            return NormalizerImpl.quickCheck(
-                                             src, start,limit,
-                                             NormalizerImpl.getFromIndexesArr(
-                                                                              NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
-                                                                              ),
-                                             NormalizerImpl.QC_NFD,
-                                             0,
-                                             allowMaybe,
-                                             nx
-                                             );
-        }
-
        protected boolean isNFSkippable(int c) {
            return NormalizerImpl.isNFSkippable(c,this,
                                                (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
@ -343,10 +304,6 @@ public final class Normalizer implements Cloneable {
                                            true, trailCC, nx);
        }

-        protected String normalize( String src, int options) {
-            return decompose(src,true);
-        }
-
        protected int getMinC() {
            return NormalizerImpl.MIN_WITH_LEAD_CC;
        }
@ -363,21 +320,6 @@ public final class Normalizer implements Cloneable {
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
        }

-        protected QuickCheckResult quickCheck(char[] src,int start, 
-                                              int limit,boolean allowMaybe,
-                                              UnicodeSet nx) {
-            return NormalizerImpl.quickCheck(
-                                             src,start,limit,
-                                             NormalizerImpl.getFromIndexesArr(
-                                                                              NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
-                                                                              ),
-                                             NormalizerImpl.QC_NFKD,
-                                             NormalizerImpl.OPTIONS_COMPAT,
-                                             allowMaybe,
-                                             nx
-                                             );
-        }
-
        protected boolean isNFSkippable(int c) {
            return NormalizerImpl.isNFSkippable(c, this,
                                                (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
@ -403,10 +345,6 @@ public final class Normalizer implements Cloneable {
                                           0, nx);
        }
  
-        protected String normalize( String src, int options) {
-            return compose(src, false, options);
-        }
-       
        protected int getMinC() {
            return NormalizerImpl.getFromIndexesArr(
                                                    NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
@ -421,20 +359,6 @@ public final class Normalizer implements Cloneable {
        protected int getMask() {
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
        }
-        protected QuickCheckResult quickCheck(char[] src,int start, 
-                                              int limit,boolean allowMaybe,
-                                              UnicodeSet nx) {
-            return NormalizerImpl.quickCheck(
-                                             src,start,limit,
-                                             NormalizerImpl.getFromIndexesArr(
-                                                                              NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
-                                                                              ),
-                                             NormalizerImpl.QC_NFC,
-                                             0,
-                                             allowMaybe,
-                                             nx
-                                             );
-        }
        protected boolean isNFSkippable(int c) {
            return NormalizerImpl.isNFSkippable(c,this,
                                                ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
@ -468,9 +392,6 @@ public final class Normalizer implements Cloneable {
                                          NormalizerImpl.OPTIONS_COMPAT, nx);
        }

-        protected String normalize( String src, int options) {
-            return compose(src, true, options);
-        }
        protected int getMinC() {
            return NormalizerImpl.getFromIndexesArr(
                                                    NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
@ -485,20 +406,6 @@ public final class Normalizer implements Cloneable {
        protected int getMask() {
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
        }
-        protected QuickCheckResult quickCheck(char[] src,int start, 
-                                              int limit,boolean allowMaybe,
-                                              UnicodeSet nx) {
-            return NormalizerImpl.quickCheck(
-                                             src,start,limit,
-                                             NormalizerImpl.getFromIndexesArr(
-                                                                              NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
-                                                                              ),
-                                             NormalizerImpl.QC_NFKC,
-                                             NormalizerImpl.OPTIONS_COMPAT,
-                                             allowMaybe,
-                                             nx
-                                             );
-        }
        protected boolean isNFSkippable(int c) {
            return NormalizerImpl.isNFSkippable(c, this,
                                                ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
@ -525,9 +432,6 @@ public final class Normalizer implements Cloneable {
            return NormalizerImpl.makeFCD(src, srcStart,srcLimit,
                                          dest, destStart,destLimit, nx);
        }
-        protected String normalize( String src, int options) {
-            return makeFCD(src, options);
-        }
        protected int getMinC() {
            return NormalizerImpl.MIN_WITH_LEAD_CC;
        }
@ -540,11 +444,6 @@ public final class Normalizer implements Cloneable {
        protected int getMask() {
            return NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD;
        }
-        protected QuickCheckResult quickCheck(char[] src,int start, 
-                                              int limit,boolean allowMaybe,
-                                              UnicodeSet nx) {
-            return NormalizerImpl.checkFCD(src,start,limit,nx) ? YES : NO;
-        }
        protected boolean isNFSkippable(int c) {
            /* FCD: skippable if lead cc==0 and trail cc<=1 */
            return (NormalizerImpl.getFCD16(c)>1);
@ -1005,10 +904,6 @@ public final class Normalizer implements Cloneable {
        return app.length();
    }

-    private static String makeFCD(String src,int options) {
-        return Norm2AllModes.getFCDNormalizer2NoIOException().normalize(src);
-    }
-    
    /**
     * Normalizes a <tt>String</tt> using the given normalization operation.
     * <p>
@ -1329,8 +1224,14 @@ public final class Normalizer implements Cloneable {
    public static int compare(char[] s1, int s1Start, int s1Limit,
                              char[] s2, int s2Start, int s2Limit,
                              int options) {
-        return internalCompare(s1, s1Start, s1Limit, 
-                               s2, s2Start, s2Limit, 
+        if( s1==null || s1Start<0 || s1Limit<0 || 
+            s2==null || s2Start<0 || s2Limit<0 ||
+            s1Limit<s1Start || s2Limit<s2Start
+        ) {
+            throw new IllegalArgumentException();
+        }
+        return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start), 
+                               CharBuffer.wrap(s2, s2Start, s2Limit-s2Start), 
                               options);
    } 

@ -1338,7 +1239,21 @@ public final class Normalizer implements Cloneable {
     * Compare two strings for canonical equivalence.
     * Further options include case-insensitive comparison and
     * code point order (as opposed to code unit order).
-     * Convenience method.
+     *
+     * Canonical equivalence between two strings is defined as their normalized
+     * forms (NFD or NFC) being identical.
+     * This function compares strings incrementally instead of normalizing
+     * (and optionally case-folding) both strings entirely,
+     * improving performance significantly.
+     *
+     * Bulk normalization is only necessary if the strings do not fulfill the 
+     * FCD conditions. Only in this case, and only if the strings are relatively 
+     * long, is memory allocated temporarily.
+     * For FCD strings and short non-FCD strings there is no memory allocation.
+     *
+     * Semantically, this is equivalent to
+     *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
+     * where code point order and foldCase are all optional.
     *
     * @param s1 First source string.
     * @param s2 Second source string.
@ -1368,10 +1283,7 @@ public final class Normalizer implements Cloneable {
     * @stable ICU 2.8
     */
    public static int compare(String s1, String s2, int options) {
-         
-        return compare(s1.toCharArray(),0,s1.length(),
-                       s2.toCharArray(),0,s2.length(),
-                       options);
+        return internalCompare(s1, s2, options);
    }

    /**
@ -1408,7 +1320,7 @@ public final class Normalizer implements Cloneable {
     * @stable ICU 2.8
     */
    public static int compare(char[] s1, char[] s2, int options) {
-        return compare(s1,0,s1.length,s2,0,s2.length,options);
+        return internalCompare(CharBuffer.wrap(s1), CharBuffer.wrap(s2), options);
    }

    /**
@ -1419,10 +1331,8 @@ public final class Normalizer implements Cloneable {
     * @param options    A bit set of options
     * @stable ICU 2.8
     */
-    // TODO: actually do the optimization when the guts of Normalizer are 
-    // upgraded --has just dumb implementation for now
-    public static int compare(int char32a, int char32b,int options) {
-        return compare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options);
+    public static int compare(int char32a, int char32b, int options) {
+        return internalCompare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options);
    }

    /**
@ -1433,10 +1343,8 @@ public final class Normalizer implements Cloneable {
     * @param options   A bit set of options
     * @stable ICU 2.8
     */
-    // TODO: actually do the optimization when the guts of Normalizer are 
-    // upgraded --has just dumb implementation for now
    public static int compare(int char32a, String str2, int options) {
-        return compare(UTF16.valueOf(char32a), str2, options);
+        return internalCompare(UTF16.valueOf(char32a), str2, options);
    }

    /**
@ -2618,27 +2526,10 @@ public final class Normalizer implements Cloneable {
    }    

        
-    private static int internalCompare(char[] s1, int s1Start,int s1Limit,
-                                       char[] s2, int s2Start,int s2Limit,
-                                       int options) {
-                                  
-        char[] fcd1  = new char[300];
-        char[] fcd2  = new char[300];
-        
-        Normalizer.Mode mode;
-        int result;
-        
-        if(    s1==null || s1Start<0 || s1Limit<0 || 
-               s2==null || s2Start<0 || s2Limit<0 ||
-               s1Limit<s1Start || s2Limit<s2Start
-               ) {
-                
-            throw new IllegalArgumentException();
-        }
-
-        UnicodeSet nx=NormalizerImpl.getNX(options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT);
+    // TODO: Consider proposing this function as public API.
+    private static int internalCompare(CharSequence s1, CharSequence s2, int options) {
+        int normOptions=options>>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
        options|= NormalizerImpl.COMPARE_EQUIV;
-        result=0;

        /*
         * UAX #21 Case Mappings, as fixed for Unicode version 4
@ -2661,20 +2552,18 @@ public final class Normalizer implements Cloneable {
         * are first decomposed or not, so an FCD check - a check only for
         * canonical order - is not sufficient.
         */
-        if((options& Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) >0 ) {
-            mode=Normalizer.NFD;
-            options&=~ Normalizer.INPUT_IS_FCD;
-        } else {
-            mode=Normalizer.FCD;
-        }
-        if((options& Normalizer.INPUT_IS_FCD)==0) {
-            char[] dest;
-            int fcdLen1, fcdLen2;
-            boolean isFCD1, isFCD2;
+        if((options&INPUT_IS_FCD)==0 || (options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
+            Normalizer2 n2;
+            if((options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
+                n2=NFD.getNormalizer2(normOptions);
+            } else {
+                n2=FCD.getNormalizer2(normOptions);
+            }

            // check if s1 and/or s2 fulfill the FCD conditions
-            isFCD1= Normalizer.YES==mode.quickCheck(s1, s1Start, s1Limit, true, nx);
-            isFCD2= Normalizer.YES==mode.quickCheck(s2, s2Start, s2Limit, true, nx);
+            int spanQCYes1=n2.spanQuickCheckYes(s1);
+            int spanQCYes2=n2.spanQuickCheckYes(s2);
+
            /*
             * ICU 2.4 had a further optimization:
             * If both strings were not in FCD, then they were both NFD'ed,
@ -2684,47 +2573,28 @@ public final class Normalizer implements Cloneable {
             * Therefore, ICU 2.6 removes that optimization.
             */

-            if(!isFCD1) {
-                fcdLen1=mode.normalize(s1, 0, s1.length,
-                                       fcd1, 0, fcd1.length,
-                                       nx);
-                                       
-                if(fcdLen1>fcd1.length) {
-                    dest=new char[fcdLen1];
-                    fcdLen1=mode.normalize( s1, 0, s1.length,
-                                            dest, 0, dest.length,
-                                            nx);
-                    s1=dest;
-                } else {
-                    s1=fcd1;
-                }
-                s1Limit=fcdLen1;
-                s1Start=0;
+            if(spanQCYes1<s1.length()) {
+                StringBuilder fcd1=new StringBuilder(s1.length()+16).append(s1, 0, spanQCYes1);
+                s1=n2.normalizeSecondAndAppend(fcd1, s1.subSequence(spanQCYes1, s1.length()));
            }
-
-            if(!isFCD2) {
-                fcdLen2=mode.normalize(s2,s2Start,s2Limit,
-                                       fcd2,0,fcd2.length,
-                                       nx);
-                
-                if(fcdLen2>fcd2.length) {
-                    dest=new char[fcdLen2];
-                    fcdLen2=mode.normalize( s2,s2Start,s2Limit,
-                                            dest,0,dest.length,
-                                            nx);
-                    s2=dest;
-                } else {
-                    s2=fcd2;
-                }
-                s2Limit=fcdLen2;
-                s2Start=0;
+            if(spanQCYes2<s2.length()) {
+                StringBuilder fcd2=new StringBuilder(s2.length()+16).append(s2, 0, spanQCYes2);
+                s2=n2.normalizeSecondAndAppend(fcd2, s2.subSequence(spanQCYes2, s2.length()));
            }
-                
        }

-        result=NormalizerImpl.cmpEquivFold(s1, s1Start, s1Limit, 
-                                           s2, s2Start, s2Limit, options);
-        return result;
+        // TODO: Temporarily hideously slow. Convert internals to work on CharSequence.
+        int length1=s1.length();
+        char[] s1Array=new char[length1];
+        for(int i=0; i<length1; ++i) {
+            s1Array[i]=s1.charAt(i);
+        }
+        int length2=s2.length();
+        char[] s2Array=new char[length2];
+        for(int i=0; i<length2; ++i) {
+            s2Array[i]=s2.charAt(i);
+        }
+        return NormalizerImpl.cmpEquivFold(s1Array, 0, length1, s2Array, 0, length2, options);
    }    

    /**