ICU-7746 port UTS #46 to Java

X-SVN-Rev: 28307
2010-07-14 20:11:29 +00:00 · 2010-07-14 20:11:29 +00:00 · a3d88afb60
commit a3d88afb60
parent c2228ba03e
9 changed files with 2305 additions and 466 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -88,6 +88,8 @@ icu4j/main/classes/core/.classpath -text
 icu4j/main/classes/core/.project -text
 icu4j/main/classes/core/.settings/org.eclipse.jdt.core.prefs -text
 icu4j/main/classes/core/manifest.stub -text
+icu4j/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java -text
+icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java -text
 icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java -text
 icu4j/main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch -text
 icu4j/main/classes/currdata/.settings/org.eclipse.jdt.core.prefs -text
@ -142,6 +144,7 @@ icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/BidiTest.txt -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/confusables.txt -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/confusablesWholeScript.txt -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/BiDiConformanceTest.java -text
+icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/UTS46Test.java -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text
 icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java
@ -0,0 +1,437 @@
+/*
+*******************************************************************************
+* Copyright (C) 2003-2010, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import com.ibm.icu.impl.Punycode;
+import com.ibm.icu.text.IDNA;
+import com.ibm.icu.text.StringPrep;
+import com.ibm.icu.text.StringPrepParseException;
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * IDNA2003 implementation code, moved out of com.ibm.icu.text.IDNA.java
+ * while extending that class to support IDNA2008/UTS #46 as well.
+ * @author Ram Viswanadha
+ */
+public final class IDNA2003 {
+    /* IDNA ACE Prefix is "xn--" */
+    private static char[] ACE_PREFIX                = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
+    //private static final int ACE_PREFIX_LENGTH      = ACE_PREFIX.length;
+
+    private static final int MAX_LABEL_LENGTH       = 63;
+    private static final int HYPHEN                 = 0x002D;
+    private static final int CAPITAL_A              = 0x0041;
+    private static final int CAPITAL_Z              = 0x005A;
+    private static final int LOWER_CASE_DELTA       = 0x0020;
+    private static final int FULL_STOP              = 0x002E;
+    private static final int MAX_DOMAIN_NAME_LENGTH = 255;
+
+    // The NamePrep profile object
+    private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
+    
+    private static boolean startsWithPrefix(StringBuffer src){
+        boolean startsWithPrefix = true;
+
+        if(src.length() < ACE_PREFIX.length){
+            return false;
+        }
+        for(int i=0; i<ACE_PREFIX.length;i++){
+            if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
+                startsWithPrefix = false;
+            }
+        }
+        return startsWithPrefix;
+    }
+
+    private static char toASCIILower(char ch){
+        if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
+            return (char)(ch + LOWER_CASE_DELTA);
+        }
+        return ch;
+    }
+
+    private static StringBuffer toASCIILower(CharSequence src){
+        StringBuffer dest = new StringBuffer();
+        for(int i=0; i<src.length();i++){
+            dest.append(toASCIILower(src.charAt(i)));
+        }
+        return dest;
+    }
+
+    private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
+        char c1,c2;
+        int rc;
+        for(int i =0;/* no condition */;i++) {
+            /* If we reach the ends of both strings then they match */
+            if(i == s1.length()) {
+                return 0;
+            }
+
+            c1 = s1.charAt(i);
+            c2 = s2.charAt(i);
+        
+            /* Case-insensitive comparison */
+            if(c1!=c2) {
+                rc=toASCIILower(c1)-toASCIILower(c2);
+                if(rc!=0) {
+                    return rc;
+                }
+            }
+        }
+    }
+   
+    private static int getSeparatorIndex(char[] src,int start, int limit){
+        for(; start<limit;start++){
+            if(isLabelSeparator(src[start])){
+                return start;
+            }
+        }
+        // we have not found the separator just return length
+        return start;
+    }
+    
+    /*
+    private static int getSeparatorIndex(UCharacterIterator iter){
+        int currentIndex = iter.getIndex();
+        int separatorIndex = 0;
+        int ch;
+        while((ch=iter.next())!= UCharacterIterator.DONE){
+            if(isLabelSeparator(ch)){
+                separatorIndex = iter.getIndex();
+                iter.setIndex(currentIndex);
+                return separatorIndex;
+            }
+        }
+        // reset index
+        iter.setIndex(currentIndex);
+        // we have not found the separator just return the length
+       
+    }
+    */
+    
+
+    private static boolean isLDHChar(int ch){
+        // high runner case
+        if(ch>0x007A){
+            return false;
+        }
+        //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
+        if( (ch==0x002D) || 
+            (0x0030 <= ch && ch <= 0x0039) ||
+            (0x0041 <= ch && ch <= 0x005A) ||
+            (0x0061 <= ch && ch <= 0x007A)
+          ){
+            return true;
+        }
+        return false;
+    }
+    
+    /**
+     * Ascertain if the given code point is a label separator as 
+     * defined by the IDNA RFC
+     * 
+     * @param ch The code point to be ascertained
+     * @return true if the char is a label separator
+     * @stable ICU 2.8
+     */
+    private static boolean isLabelSeparator(int ch){
+        switch(ch){
+            case 0x002e:
+            case 0x3002:
+            case 0xFF0E:
+            case 0xFF61:
+                return true;
+            default:
+                return false;           
+        }
+    }
+
+    public static StringBuffer convertToASCII(UCharacterIterator src, int options)
+            throws StringPrepParseException{
+        
+        boolean[] caseFlags = null;
+    
+        // the source contains all ascii codepoints
+        boolean srcIsASCII  = true;
+        // assume the source contains all LDH codepoints
+        boolean srcIsLDH = true; 
+
+        //get the options
+        boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0);
+        int ch;
+        // step 1
+        while((ch = src.next())!= UCharacterIterator.DONE){
+            if(ch> 0x7f){
+                srcIsASCII = false;
+            }
+        }
+        int failPos = -1;
+        src.setToStart();
+        StringBuffer processOut = null;
+        // step 2 is performed only if the source contains non ASCII
+        if(!srcIsASCII){
+            // step 2
+            processOut = namePrep.prepare(src, options);
+        }else{
+            processOut = new StringBuffer(src.getText());
+        }
+        int poLen = processOut.length();
+        
+        if(poLen==0){
+            throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
+        }
+        StringBuffer dest = new StringBuffer();
+        
+        // reset the variable to verify if output of prepare is ASCII or not
+        srcIsASCII = true;
+        
+        // step 3 & 4
+        for(int j=0;j<poLen;j++ ){
+            ch=processOut.charAt(j);
+            if(ch > 0x7F){
+                srcIsASCII = false;
+            }else if(isLDHChar(ch)==false){
+                // here we do not assemble surrogates
+                // since we know that LDH code points
+                // are in the ASCII range only
+                srcIsLDH = false;
+                failPos = j;
+            }
+        }
+    
+        if(useSTD3ASCIIRules == true){
+            // verify 3a and 3b
+            if( srcIsLDH == false /* source contains some non-LDH characters */
+                || processOut.charAt(0) ==  HYPHEN 
+                || processOut.charAt(processOut.length()-1) == HYPHEN){
+
+                /* populate the parseError struct */
+                if(srcIsLDH==false){
+                     throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
+                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
+                                              processOut.toString(),
+                                             (failPos>0) ? (failPos-1) : failPos);
+                }else if(processOut.charAt(0) == HYPHEN){
+                    throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
+     
+                }else{
+                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
+                                              processOut.toString(),
+                                              (poLen>0) ? poLen-1 : poLen);
+
+                }
+            }
+        }
+        if(srcIsASCII){
+            dest =  processOut;
+        }else{
+            // step 5 : verify the sequence does not begin with ACE prefix
+            if(!startsWithPrefix(processOut)){
+
+                //step 6: encode the sequence with punycode
+                caseFlags = new boolean[poLen];
+
+                StringBuilder punyout = Punycode.encode(processOut,caseFlags);
+
+                // convert all codepoints to lower case ASCII
+                StringBuffer lowerOut = toASCIILower(punyout);
+
+                //Step 7: prepend the ACE prefix
+                dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
+                //Step 6: copy the contents in b2 into dest
+                dest.append(lowerOut);
+            }else{
+
+                throw new StringPrepParseException("The input does not start with the ACE Prefix.",
+                                         StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
+            }
+        }
+        if(dest.length() > MAX_LABEL_LENGTH){
+            throw new StringPrepParseException("The labels in the input are too long. Length > 63.", 
+                                     StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
+        }
+        return dest;
+    }
+
+    public static StringBuffer convertIDNToASCII(String src,int options)
+            throws StringPrepParseException{
+
+        char[] srcArr = src.toCharArray();
+        StringBuffer result = new StringBuffer();
+        int sepIndex=0;
+        int oldSepIndex=0;
+        for(;;){
+            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
+            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
+            //make sure this is not a root label separator.
+            if(!(label.length()==0 && sepIndex==srcArr.length)){
+                UCharacterIterator iter = UCharacterIterator.getInstance(label);
+                result.append(convertToASCII(iter,options));
+            }
+            if(sepIndex==srcArr.length){
+                break;
+            }
+            
+            // increment the sepIndex to skip past the separator
+            sepIndex++;
+            oldSepIndex = sepIndex;
+            result.append((char)FULL_STOP);
+        }
+        if(result.length() > MAX_DOMAIN_NAME_LENGTH){
+            throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
+        }
+        return result;
+    }
+
+    public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
+            throws StringPrepParseException{
+        
+        boolean[] caseFlags = null;
+                
+        // the source contains all ascii codepoints
+        boolean srcIsASCII  = true;
+        // assume the source contains all LDH codepoints
+        //boolean srcIsLDH = true; 
+        
+        //get the options
+        //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
+        
+        //int failPos = -1;
+        int ch;
+        int saveIndex = src.getIndex();
+        // step 1: find out if all the codepoints in src are ASCII  
+        while((ch=src.next())!= UCharacterIterator.DONE){
+            if(ch>0x7F){
+                srcIsASCII = false;
+            }/*else if((srcIsLDH = isLDHChar(ch))==false){
+                failPos = src.getIndex();
+            }*/
+        }
+        StringBuffer processOut;
+        
+        if(srcIsASCII == false){
+            try {
+                // step 2: process the string
+                src.setIndex(saveIndex);
+                processOut = namePrep.prepare(src,options);
+            } catch (StringPrepParseException ex) {
+                return new StringBuffer(src.getText());
+            }
+
+        }else{
+            //just point to source
+            processOut = new StringBuffer(src.getText());
+        }
+        // TODO:
+        // The RFC states that 
+        // <quote>
+        // ToUnicode never fails. If any step fails, then the original input
+        // is returned immediately in that step.
+        // </quote>
+        
+        //step 3: verify ACE Prefix
+        if(startsWithPrefix(processOut)){
+            StringBuffer decodeOut = null;
+
+            //step 4: Remove the ACE Prefix
+            String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
+
+            //step 5: Decode using punycode
+            try {
+                decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags));
+            } catch (StringPrepParseException e) {
+                decodeOut = null;
+            }
+
+            //step 6:Apply toASCII
+            if (decodeOut != null) {
+                StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options);
+    
+                //step 7: verify
+                if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
+//                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
+//                                             StringPrepParseException.VERIFICATION_ERROR); 
+                    decodeOut = null;
+                }
+            }
+
+            //step 8: return output of step 5
+             if (decodeOut != null) {
+                 return decodeOut;
+             }
+        }
+            
+//        }else{
+//            // verify that STD3 ASCII rules are satisfied
+//            if(useSTD3ASCIIRules == true){
+//                if( srcIsLDH == false /* source contains some non-LDH characters */
+//                    || processOut.charAt(0) ==  HYPHEN 
+//                    || processOut.charAt(processOut.length()-1) == HYPHEN){
+//    
+//                    if(srcIsLDH==false){
+//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
+//                                                 (failPos>0) ? (failPos-1) : failPos);
+//                    }else if(processOut.charAt(0) == HYPHEN){
+//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
+//                                                 processOut.toString(),0);
+//         
+//                    }else{
+//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
+//                                                 processOut.toString(),
+//                                                 processOut.length());
+//    
+//                    }
+//                }
+//            }
+//            // just return the source
+//            return new StringBuffer(src.getText());
+//        }  
+        
+        return new StringBuffer(src.getText());
+    }
+
+    public static StringBuffer convertIDNToUnicode(String src, int options)
+            throws StringPrepParseException{
+        
+        char[] srcArr = src.toCharArray();
+        StringBuffer result = new StringBuffer();
+        int sepIndex=0;
+        int oldSepIndex=0;
+        for(;;){
+            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
+            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
+            if(label.length()==0 && sepIndex!=srcArr.length ){
+                throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
+            }
+            UCharacterIterator iter = UCharacterIterator.getInstance(label);
+            result.append(convertToUnicode(iter,options));
+            if(sepIndex==srcArr.length){
+                break;
+            }
+            // Unlike the ToASCII operation we don't normalize the label separators
+            result.append(srcArr[sepIndex]);
+            // increment the sepIndex to skip past the separator
+            sepIndex++;
+            oldSepIndex =sepIndex;
+        }
+        if(result.length() > MAX_DOMAIN_NAME_LENGTH){
+            throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
+        }
+        return result;
+    }
+
+    public static int compare(String s1, String s2, int options) throws StringPrepParseException{
+        StringBuffer s1Out = convertIDNToASCII(s1, options);
+        StringBuffer s2Out = convertIDNToASCII(s2, options);
+        return compareCaseInsensitiveASCII(s1Out,s2Out);
+    }
+}
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
@ -8,7 +8,6 @@ package com.ibm.icu.impl;

 import java.io.IOException;
 import java.io.InputStream;
-import java.util.MissingResourceException;

 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.Normalizer2;
@ -328,13 +327,12 @@ public final class Norm2AllModes {
    private static CacheBase<String, Norm2AllModes, InputStream> cache =
        new SoftCache<String, Norm2AllModes, InputStream>() {
            protected Norm2AllModes createInstance(String key, InputStream data) {
+                Normalizer2Impl impl;
                if(data==null) {
-                    throw new MissingResourceException(
-                            "No Normalizer2 data name \""+key+"\" cached, and InputStream is null",
-                            "Normalizer2",
-                            key);
+                    impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm");
+                } else {
+                    impl=new Normalizer2Impl().load(data);
                }
-                Normalizer2Impl impl=new Normalizer2Impl().load(data);
                return new Norm2AllModes(impl);
            }
        };
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
@ -340,6 +340,9 @@ public final class Normalizer2Impl {
         * @draft ICU 4.6
         */
        public static boolean equal(CharSequence s1,  CharSequence s2) {
+            if(s1==s2) {
+                return true;
+            }
            int length=s1.length();
            if(length!=s2.length()) {
                return false;
@ -368,6 +371,9 @@ public final class Normalizer2Impl {
            if((limit1-start1)!=(limit2-start2)) {
                return false;
            }
+            if(s1==s2 && start1==start2) {
+                return true;
+            }
            while(start1<limit1) {
                if(s1.charAt(start1++)!=s2.charAt(start2++)) {
                    return false;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Punycode.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 2003-2009, International Business Machines Corporation and    *
+ * Copyright (C) 2003-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -132,7 +132,7 @@ public final class Punycode {
     * @param caseFlags The boolean array of case flags.
     * @return An array of ASCII code points.
     */
-    public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws StringPrepParseException{
+    public static StringBuilder encode(CharSequence src, boolean[] caseFlags) throws StringPrepParseException{
        
        int[] cpBuffer = new int[MAX_CP_COUNT];
        int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
@ -140,7 +140,7 @@ public final class Punycode {
        int srcLength = src.length();
        int destCapacity = MAX_CP_COUNT;
        char[] dest = new char[destCapacity];
-        StringBuffer result = new StringBuffer();
+        StringBuilder result = new StringBuilder();
        /*
         * Handle the basic code points and
         * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
@ -290,12 +290,12 @@ public final class Punycode {
     * 
     * @param src The source of the string buffer being passed.
     * @param caseFlags The array of boolean case flags.
-     * @return StringBuffer string.
+     * @return StringBuilder string.
     */
-    public static StringBuffer decode(StringBuffer src, boolean[] caseFlags) 
+    public static StringBuilder decode(CharSequence src, boolean[] caseFlags) 
                               throws StringPrepParseException{
        int srcLength = src.length();
-        StringBuffer result = new StringBuffer();
+        StringBuilder result = new StringBuilder();
        int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
                destCPCount, firstSupplementaryIndex, cpLength;
        char b;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java
@ -0,0 +1,739 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.util.EnumSet;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UCharacterCategory;
+import com.ibm.icu.lang.UCharacterDirection;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.text.IDNA;
+import com.ibm.icu.text.Normalizer2;
+import com.ibm.icu.text.StringPrepParseException;
+
+// Note about tests for IDNA.Error.DOMAIN_NAME_TOO_LONG:
+//
+// The domain name length limit is 255 octets in an internal DNS representation
+// where the last ("root") label is the empty label
+// represented by length byte 0 alone.
+// In a conventional string, this translates to 253 characters, or 254
+// if there is a trailing dot for the root label.
+
+/**
+ * UTS #46 (IDNA2008) implementation.
+ * @author Markus Scherer
+ * @since 2010jul09
+ */
+public final class UTS46 extends IDNA {
+    public UTS46(int options) {
+        this.options=options;
+    }
+
+    @Override
+    public StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info) {
+        return process(label, true, true, dest, info);
+    }
+
+    @Override
+    public StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info) {
+        return process(label, true, false, dest, info);
+    }
+
+    @Override
+    public StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info) {
+        process(name, false, true, dest, info);
+        if( dest.length()>=254 && !info.getErrors().contains(Error.DOMAIN_NAME_TOO_LONG) &&
+            isASCIIString(dest) &&
+            (dest.length()>254 || dest.charAt(253)!='.')
+        ) {
+            addError(info, Error.DOMAIN_NAME_TOO_LONG);
+        }
+        return dest;
+    }
+
+    @Override
+    public StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info) {
+        return process(name, false, false, dest, info);
+    }
+
+    private static final Normalizer2 uts46Norm2=
+        Normalizer2.getInstance(null, "uts46", Normalizer2.Mode.COMPOSE);  // uts46.nrm
+    final int options;
+
+    // Severe errors which usually result in a U+FFFD replacement character in the result string.
+    private static final EnumSet<Error> severeErrors=EnumSet.of(
+        Error.LEADING_COMBINING_MARK,
+        Error.DISALLOWED,
+        Error.PUNYCODE,
+        Error.LABEL_HAS_DOT,
+        Error.INVALID_ACE_LABEL);
+
+    private static boolean
+    isASCIIString(CharSequence dest) {
+        int length=dest.length();
+        for(int i=0; i<length; ++i) {
+            if(dest.charAt(i)>0x7f) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // UTS #46 data for ASCII characters.
+    // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase
+    // and passes through all other ASCII characters.
+    // If USE_STD3_RULES is set, then non-LDH characters are disallowed
+    // using this data.
+    // The ASCII fastpath also uses this data.
+    // Values: -1=disallowed  0==valid  1==mapped (lowercase)
+    private static final byte asciiData[]={
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        // 002D..002E; valid  #  HYPHEN-MINUS..FULL STOP
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1,
+        // 0030..0039; valid  #  DIGIT ZERO..DIGIT NINE
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1,
+        // 0041..005A; mapped  #  LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+        -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1,
+        // 0061..007A; valid  #  LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+        -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1
+    };
+
+    private StringBuilder
+    process(CharSequence src,
+            boolean isLabel, boolean toASCII,
+            StringBuilder dest,
+            Info info) {
+        // uts46Norm2.normalize() would do all of this error checking and setup,
+        // but with the ASCII fastpath we do not always call it, and do not
+        // call it first.
+        if(dest==src) {
+            throw new IllegalArgumentException();
+        }
+        // Arguments are fine, reset output values.
+        dest.delete(0, 0x7fffffff);
+        resetInfo(info);
+        int srcLength=src.length();
+        if(srcLength==0) {
+            if(toASCII) {
+                addError(info, Error.EMPTY_LABEL);
+            }
+            return dest;
+        }
+        // ASCII fastpath
+        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
+        int labelStart=0;
+        int i;
+        for(i=0;; ++i) {
+            if(i==srcLength) {
+                if(toASCII) {
+                    if((i-labelStart)>63) {
+                        addLabelError(info, Error.LABEL_TOO_LONG);
+                    }
+                    // There is a trailing dot if labelStart==i.
+                    if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
+                        addError(info, Error.DOMAIN_NAME_TOO_LONG);
+                    }
+                }
+                promoteAndResetLabelErrors(info);
+                return dest;
+            }
+            char c=src.charAt(i);
+            if(c>0x7f) {
+                break;
+            }
+            int cData=asciiData[c];
+            if(cData>0) {
+                dest.append((char)(c+0x20));  // Lowercase an uppercase ASCII letter.
+            } else if(cData<0 && disallowNonLDHDot) {
+                break;  // Replacing with U+FFFD can be complicated for toASCII.
+            } else {
+                dest.append(c);
+                if(c=='-') {  // hyphen
+                    if(i==(labelStart+3) && src.charAt(i-1)=='-') {
+                        // "??--..." is Punycode or forbidden.
+                        ++i;  // '-' was copied to dest already
+                        break;
+                    }
+                    if(i==labelStart) {
+                        // label starts with "-"
+                        addLabelError(info, Error.LEADING_HYPHEN);
+                    }
+                    if((i+1)==srcLength || src.charAt(i+1)=='.') {
+                        // label ends with "-"
+                        addLabelError(info, Error.TRAILING_HYPHEN);
+                    }
+                } else if(c=='.') {  // dot
+                    if(isLabel) {
+                        // Replacing with U+FFFD can be complicated for toASCII.
+                        ++i;  // '.' was copied to dest already
+                        break;
+                    }
+                    if(toASCII) {
+                        // Permit an empty label at the end but not elsewhere.
+                        if(i==labelStart && i<(srcLength-1)) {
+                            addLabelError(info, Error.EMPTY_LABEL);
+                        } else if((i-labelStart)>63) {
+                            addLabelError(info, Error.LABEL_TOO_LONG);
+                        }
+                    }
+                    promoteAndResetLabelErrors(info);
+                    labelStart=i+1;
+                }
+            }
+        }
+        promoteAndResetLabelErrors(info);
+        processUnicode(src, labelStart, i, isLabel, toASCII, dest, info);
+        if( isBiDi(info) && !hasCertainErrors(info, severeErrors) &&
+            (!isOkBiDi(info) || (labelStart>0 && !isASCIIOkBiDi(dest, labelStart)))
+        ) {
+            addError(info, Error.BIDI);
+        }
+        return dest;
+    }
+
+    private StringBuilder
+    processUnicode(CharSequence src,
+                   int labelStart, int mappingStart,
+                   boolean isLabel, boolean toASCII,
+                   StringBuilder dest,
+                   Info info) {
+        if(mappingStart==0) {
+            uts46Norm2.normalize(src, dest);
+        } else {
+            uts46Norm2.normalizeSecondAndAppend(dest, src.subSequence(mappingStart, src.length()));
+        }
+        boolean doMapDevChars=
+            toASCII ? (options&NONTRANSITIONAL_TO_ASCII)==0 :
+                      (options&NONTRANSITIONAL_TO_UNICODE)==0;
+        int destLength=dest.length();
+        int labelLimit=labelStart;
+        while(labelLimit<destLength) {
+            char c=dest.charAt(labelLimit);
+            if(c=='.' && !isLabel) {
+                int labelLength=labelLimit-labelStart;
+                int newLength=processLabel(dest, labelStart, labelLength,
+                                                toASCII, info);
+                promoteAndResetLabelErrors(info);
+                destLength+=newLength-labelLength;
+                labelLimit=labelStart+=newLength+1;
+            } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) {
+                setTransitionalDifferent(info);
+                if(doMapDevChars) {
+                    destLength=mapDevChars(dest, labelStart, labelLimit);
+                    // Do not increment labelLimit in case c was removed.
+                    // All deviation characters have been mapped, no need to check for them again.
+                    doMapDevChars=false;
+                } else {
+                    ++labelLimit;
+                }
+            } else {
+                ++labelLimit;
+            }
+        }
+        // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok)
+        // but not an empty label elsewhere nor a completely empty domain name.
+        // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0.
+        if(0==labelStart || labelStart<labelLimit) {
+            processLabel(dest, labelStart, labelLimit-labelStart, toASCII, info);
+            promoteAndResetLabelErrors(info);
+        }
+        return dest;
+    }
+
+    // returns the new dest.length()
+    private int
+    mapDevChars(StringBuilder dest, int labelStart, int mappingStart) {
+        int length=dest.length();
+        boolean didMapDevChars=false;
+        for(int i=mappingStart; i<length;) {
+            char c=dest.charAt(i);
+            switch(c) {
+            case 0xdf:
+                // Map sharp s to ss.
+                didMapDevChars=true;
+                dest.setCharAt(i++, 's');
+                dest.insert(i++, 's');
+                ++length;
+                break;
+            case 0x3c2:  // Map final sigma to nonfinal sigma.
+                didMapDevChars=true;
+                dest.setCharAt(i++, '\u03c3');
+                break;
+            case 0x200c:  // Ignore/remove ZWNJ.
+            case 0x200d:  // Ignore/remove ZWJ.
+                didMapDevChars=true;
+                dest.delete(i, i+1);
+                --length;
+                break;
+            default:
+                ++i;
+                break;
+            }
+        }
+        if(didMapDevChars) {
+            // Mapping deviation characters might have resulted in an un-NFC string.
+            // We could use either the NFC or the UTS #46 normalizer.
+            // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file.
+            String normalized=uts46Norm2.normalize(dest.subSequence(labelStart, dest.length()));
+            dest.replace(labelStart, 0x7fffffff, normalized);
+            return dest.length();
+        }
+        return length;
+    }
+
+    // Replace the label in dest with the label string, if the label was modified.
+    // If label==dest then the label was modified in-place and labelLength
+    // is the new label length, different from label.length().
+    // If label!=dest then labelLength==label.length().
+    // Returns labelLength (= the new label length).
+    private static int
+    replaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength,
+                 CharSequence label, int labelLength) {
+        if(label!=dest) {
+            dest.delete(destLabelStart, destLabelStart+destLabelLength).insert(destLabelStart, label);
+            // or dest.replace(destLabelStart, destLabelStart+destLabelLength, label.toString());
+            // which would create a String rather than moving characters in the StringBuilder.
+        }
+        return labelLength;
+    }
+
+    // returns the new label length
+    private int
+    processLabel(StringBuilder dest,
+                 int labelStart, int labelLength,
+                 boolean toASCII,
+                 Info info) {
+        StringBuilder fromPunycode;
+        StringBuilder labelString;
+        int destLabelStart=labelStart;
+        int destLabelLength=labelLength;
+        boolean wasPunycode;
+        if( labelLength>=4 &&
+            dest.charAt(labelStart)=='x' && dest.charAt(labelStart+1)=='n' &&
+            dest.charAt(labelStart+2)=='-' && dest.charAt(labelStart+3)=='-'
+        ) {
+            // Label starts with "xn--", try to un-Punycode it.
+            wasPunycode=true;
+            try {
+                fromPunycode=Punycode.decode(dest.subSequence(labelStart+4, labelStart+labelLength), null);
+            } catch (StringPrepParseException e) {
+                addLabelError(info, Error.PUNYCODE);
+                return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
+            }
+            // Check for NFC, and for characters that are not
+            // valid or deviation characters according to the normalizer.
+            // If there is something wrong, then the string will change.
+            // Note that the normalizer passes through non-LDH ASCII and deviation characters.
+            // Deviation characters are ok in Punycode even in transitional processing.
+            // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
+            // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
+            boolean isValid=uts46Norm2.isNormalized(fromPunycode);
+            if(!isValid) {
+                addLabelError(info, Error.INVALID_ACE_LABEL);
+                return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
+            }
+            labelString=fromPunycode;
+            labelStart=0;
+            labelLength=fromPunycode.length();
+        } else {
+            wasPunycode=false;
+            labelString=dest;
+        }
+        // Validity check
+        if(labelLength==0) {
+            if(toASCII) {
+                addLabelError(info, Error.EMPTY_LABEL);
+            }
+            return replaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength);
+        }
+        // labelLength>0
+        if(labelLength>=4 && labelString.charAt(labelStart+2)=='-' && labelString.charAt(labelStart+3)=='-') {
+            // label starts with "??--"
+            addLabelError(info, Error.HYPHEN_3_4);
+        }
+        if(labelString.charAt(labelStart)=='-') {
+            // label starts with "-"
+            addLabelError(info, Error.LEADING_HYPHEN);
+        }
+        if(labelString.charAt(labelStart+labelLength-1)=='-') {
+            // label ends with "-"
+            addLabelError(info, Error.TRAILING_HYPHEN);
+        }
+        // If the label was not a Punycode label, then it was the result of
+        // mapping, normalization and label segmentation.
+        // If the label was in Punycode, then we mapped it again above
+        // and checked its validity.
+        // Now we handle the STD3 restriction to LDH characters (if set)
+        // and we look for U+FFFD which indicates disallowed characters
+        // in a non-Punycode label or U+FFFD itself in a Punycode label.
+        // We also check for dots which can come from the input to a single-label function.
+        // Ok to cast away const because we own the UnicodeString.
+        int i=labelStart;
+        int limit=labelStart+labelLength;
+        char oredChars=0;
+        // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
+        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
+        do {
+            char c=labelString.charAt(i);
+            if(c<=0x7f) {
+                if(c=='.') {
+                    addLabelError(info, Error.LABEL_HAS_DOT);
+                    labelString.setCharAt(i, '\ufffd');
+                } else if(disallowNonLDHDot && asciiData[c]<0) {
+                    addLabelError(info, Error.DISALLOWED);
+                    labelString.setCharAt(i, '\ufffd');
+                }
+            } else {
+                oredChars|=c;
+                if(c==0xfffd) {
+                    addLabelError(info, Error.DISALLOWED);
+                    ++i;
+                }
+            }
+            ++i;
+        } while(i<limit);
+        // Check for a leading combining mark after other validity checks
+        // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here.
+        int c;
+        // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
+        c=labelString.codePointAt(labelStart);
+        if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) {
+            addLabelError(info, Error.LEADING_COMBINING_MARK);
+            labelString.setCharAt(labelStart, '\ufffd');
+            if(c>0xffff) {
+                // Remove c's trail surrogate.
+                labelString.deleteCharAt(labelStart+1);
+                --labelLength;
+                if(labelString==dest) {
+                    --destLabelLength;
+                }
+            }
+        }
+        if(!hasCertainLabelErrors(info, severeErrors)) {
+            // Do contextual checks only if we do not have U+FFFD from a severe error
+            // because U+FFFD can make these checks fail.
+            if((options&CHECK_BIDI)!=0 && (!isBiDi(info) || isOkBiDi(info))) {
+                checkLabelBiDi(labelString, labelStart, labelLength, info);
+            }
+            if( (options&CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c &&
+                !isLabelOkContextJ(labelString, labelStart, labelLength)
+            ) {
+                addLabelError(info, Error.CONTEXTJ);
+            }
+            if(toASCII) {
+                if(wasPunycode) {
+                    // Leave a Punycode label unchanged if it has no severe errors.
+                    if(destLabelLength>63) {
+                        addLabelError(info, Error.LABEL_TOO_LONG);
+                    }
+                    return destLabelLength;
+                } else if(oredChars>=0x80) {
+                    // Contains non-ASCII characters.
+                    StringBuilder punycode;
+                    try {
+                        punycode=Punycode.encode(labelString.subSequence(labelStart, labelStart+labelLength), null);
+                    } catch (StringPrepParseException e) {
+                        throw new RuntimeException(e);  // unexpected
+                    }
+                    punycode.insert(0, "xn--");
+                    if(punycode.length()>63) {
+                        addLabelError(info, Error.LABEL_TOO_LONG);
+                    }
+                    return replaceLabel(dest, destLabelStart, destLabelLength,
+                                        punycode, punycode.length());
+                } else {
+                    // all-ASCII label
+                    if(labelLength>63) {
+                        addLabelError(info, Error.LABEL_TOO_LONG);
+                    }
+                }
+            }
+        } else {
+            // If a Punycode label has severe errors,
+            // then leave it but make sure it does not look valid.
+            if(wasPunycode) {
+                addLabelError(info, Error.INVALID_ACE_LABEL);
+                return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info);
+            }
+        }
+        return replaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength);
+    }
+    private int
+    markBadACELabel(StringBuilder dest,
+                    int labelStart, int labelLength,
+                    boolean toASCII, Info info) {
+        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
+        boolean isASCII=true;
+        boolean onlyLDH=true;
+        int i=labelStart+4;  // After the initial "xn--".
+        int limit=labelStart+labelLength;
+        do {
+            char c=dest.charAt(i);
+            if(c<=0x7f) {
+                if(c=='.') {
+                    addLabelError(info, Error.LABEL_HAS_DOT);
+                    dest.setCharAt(i, '\ufffd');
+                    isASCII=onlyLDH=false;
+                } else if(asciiData[c]<0) {
+                    onlyLDH=false;
+                    if(disallowNonLDHDot) {
+                        dest.setCharAt(i, '\ufffd');
+                        isASCII=false;
+                    }
+                }
+            } else {
+                isASCII=onlyLDH=false;
+            }
+        } while(++i<limit);
+        if(onlyLDH) {
+            dest.insert(labelStart+labelLength, '\ufffd');
+            ++labelLength;
+        } else {
+            if(toASCII && isASCII && labelLength>63) {
+                addLabelError(info, Error.LABEL_TOO_LONG);
+            }
+        }
+        return labelLength;
+    }
+
+    private static final int L_MASK=U_MASK(UCharacterDirection.LEFT_TO_RIGHT);
+    private static final int R_AL_MASK=
+        U_MASK(UCharacterDirection.RIGHT_TO_LEFT)|
+        U_MASK(UCharacterDirection.RIGHT_TO_LEFT_ARABIC);
+    private static final int L_R_AL_MASK=L_MASK|R_AL_MASK;
+
+    private static final int R_AL_AN_MASK=R_AL_MASK|U_MASK(UCharacterDirection.ARABIC_NUMBER);
+
+    private static final int EN_AN_MASK=
+        U_MASK(UCharacterDirection.EUROPEAN_NUMBER)|
+        U_MASK(UCharacterDirection.ARABIC_NUMBER);
+    private static final int R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK;
+    private static final int L_EN_MASK=L_MASK|U_MASK(UCharacterDirection.EUROPEAN_NUMBER);
+
+    private static final int ES_CS_ET_ON_BN_NSM_MASK=
+        U_MASK(UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR)|
+        U_MASK(UCharacterDirection.COMMON_NUMBER_SEPARATOR)|
+        U_MASK(UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR)|
+        U_MASK(UCharacterDirection.OTHER_NEUTRAL)|
+        U_MASK(UCharacterDirection.BOUNDARY_NEUTRAL)|
+        U_MASK(UCharacterDirection.DIR_NON_SPACING_MARK);
+    private static final int L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
+    private static final int R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
+
+    // We scan the whole label and check both for whether it contains RTL characters
+    // and whether it passes the BiDi Rule.
+    // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find
+    // that a domain name is a BiDi domain name (has an RTL label) only after
+    // processing several earlier labels.
+    private void
+    checkLabelBiDi(CharSequence label, int labelStart, int labelLength, Info info) {
+        // IDNA2008 BiDi rule
+        // Get the directionality of the first character.
+        int c;
+        int i=labelStart;
+        c=Character.codePointAt(label, i);
+        i+=Character.charCount(c);
+        int firstMask=U_MASK(UCharacter.getDirection(c));
+        // 1. The first character must be a character with BIDI property L, R
+        // or AL.  If it has the R or AL property, it is an RTL label; if it
+        // has the L property, it is an LTR label.
+        if((firstMask&~L_R_AL_MASK)!=0) {
+            setNotOkBiDi(info);
+        }
+        // Get the directionality of the last non-NSM character.
+        int lastMask;
+        int labelLimit=labelStart+labelLength;
+        for(;;) {
+            if(i>=labelLimit) {
+                lastMask=firstMask;
+                break;
+            }
+            c=Character.codePointBefore(label, labelLimit);
+            labelLimit-=Character.charCount(c);
+            int dir=UCharacter.getDirection(c);
+            if(dir!=UCharacterDirection.DIR_NON_SPACING_MARK) {
+                lastMask=U_MASK(dir);
+                break;
+            }
+        }
+        // 3. In an RTL label, the end of the label must be a character with
+        // BIDI property R, AL, EN or AN, followed by zero or more
+        // characters with BIDI property NSM.
+        // 6. In an LTR label, the end of the label must be a character with
+        // BIDI property L or EN, followed by zero or more characters with
+        // BIDI property NSM.
+        if( (firstMask&L_MASK)!=0 ?
+                (lastMask&~L_EN_MASK)!=0 :
+                (lastMask&~R_AL_EN_AN_MASK)!=0
+        ) {
+            setNotOkBiDi(info);
+        }
+        // Get the directionalities of the intervening characters.
+        int mask=0;
+        while(i<labelLimit) {
+            c=Character.codePointAt(label, i);
+            i+=Character.charCount(c);
+            mask|=U_MASK(UCharacter.getDirection(c));
+        }
+        if((firstMask&L_MASK)!=0) {
+            // 5. In an LTR label, only characters with the BIDI properties L, EN,
+            // ES, CS, ET, ON, BN and NSM are allowed.
+            if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
+                setNotOkBiDi(info);
+            }
+        } else {
+            // 2. In an RTL label, only characters with the BIDI properties R, AL,
+            // AN, EN, ES, CS, ET, ON, BN and NSM are allowed.
+            if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
+                setNotOkBiDi(info);
+            }
+            // 4. In an RTL label, if an EN is present, no AN may be present, and
+            // vice versa.
+            if((mask&EN_AN_MASK)==EN_AN_MASK) {
+                setNotOkBiDi(info);
+            }
+        }
+        // An RTL label is a label that contains at least one character of type
+        // R, AL or AN. [...]
+        // A "BIDI domain name" is a domain name that contains at least one RTL
+        // label. [...]
+        // The following rule, consisting of six conditions, applies to labels
+        // in BIDI domain names.
+        if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
+            setBiDi(info);
+        }
+    }
+
+    // Special code for the ASCII prefix of a BiDi domain name.
+    // The ASCII prefix is all-LTR.
+
+    // IDNA2008 BiDi rule, parts relevant to ASCII labels:
+    // 1. The first character must be a character with BIDI property L [...]
+    // 5. In an LTR label, only characters with the BIDI properties L, EN,
+    // ES, CS, ET, ON, BN and NSM are allowed.
+    // 6. In an LTR label, the end of the label must be a character with
+    // BIDI property L or EN [...]
+
+    // UTF-16 version, called for mapped ASCII prefix.
+    // Cannot contain uppercase A-Z.
+    // s[length-1] must be the trailing dot.
+    private static boolean
+    isASCIIOkBiDi(CharSequence s, int length) {
+        int labelStart=0;
+        for(int i=0; i<length; ++i) {
+            char c=s.charAt(i);
+            if(c=='.') {  // dot
+                if(i>labelStart) {
+                    c=s.charAt(i-1);
+                    if(!('a'<=c && c<='z') && !('0'<=c && c<='9')) {
+                        // Last character in the label is not an L or EN.
+                        return false;
+                    }
+                }
+                labelStart=i+1;
+            } else if(i==labelStart) {
+                if(!('a'<=c && c<='z')) {
+                    // First character in the label is not an L.
+                    return false;
+                }
+            } else {
+                if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
+                    // Intermediate character in the label is a B, S or WS.
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    private boolean
+    isLabelOkContextJ(CharSequence label, int labelStart, int labelLength) {
+        // [IDNA2008-Tables]
+        // 200C..200D  ; CONTEXTJ    # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+        int labelLimit=labelStart+labelLength;
+        for(int i=labelStart; i<labelLimit; ++i) {
+            if(label.charAt(i)==0x200c) {
+                // Appendix A.1. ZERO WIDTH NON-JOINER
+                // Rule Set:
+                //  False;
+                //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
+                //  If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
+                //     (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
+                if(i==labelStart) {
+                    return false;
+                }
+                int c;
+                int j=i;
+                c=Character.codePointBefore(label, j);
+                j-=Character.charCount(c);
+                if(UCharacter.getCombiningClass(c)==9) {
+                    continue;
+                }
+                // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
+                for(;;) {
+                    /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+                    if(type==UCharacter.JoiningType.TRANSPARENT) {
+                        if(j==0) {
+                            return false;
+                        }
+                        c=Character.codePointBefore(label, j);
+                        j-=Character.charCount(c);
+                    } else if(type==UCharacter.JoiningType.LEFT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
+                        break;  // precontext fulfilled
+                    } else {
+                        return false;
+                    }
+                }
+                // check postcontext (Joining_Type:T)*(Joining_Type:{R,D})
+                for(j=i+1;;) {
+                    if(j==labelLimit) {
+                        return false;
+                    }
+                    c=Character.codePointAt(label, j);
+                    j+=Character.charCount(c);
+                    /* UJoiningType */ int type=UCharacter.getIntPropertyValue(c, UProperty.JOINING_TYPE);
+                    if(type==UCharacter.JoiningType.TRANSPARENT) {
+                        // just skip this character
+                    } else if(type==UCharacter.JoiningType.RIGHT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
+                        break;  // postcontext fulfilled
+                    } else {
+                        return false;
+                    }
+                }
+            } else if(label.charAt(i)==0x200d) {
+                // Appendix A.2. ZERO WIDTH JOINER (U+200D)
+                // Rule Set:
+                //  False;
+                //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
+                if(i==labelStart) {
+                    return false;
+                }
+                int c=Character.codePointBefore(label, i);
+                if(UCharacter.getCombiningClass(c)!=9) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    // TODO: make public(?) -- in C, these are public in uchar.h
+    private static int U_MASK(int x) {
+        return 1<<x;
+    }
+    private static int U_GET_GC_MASK(int c) {
+        return (1<<UCharacter.getType(c));
+    }
+    private static int U_GC_M_MASK=
+        U_MASK(UCharacterCategory.NON_SPACING_MARK)|
+        U_MASK(UCharacterCategory.ENCLOSING_MARK)|
+        U_MASK(UCharacterCategory.COMBINING_SPACING_MARK);
+}
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/IDNA.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/IDNA.java
@ -1,17 +1,35 @@
 /*
 *******************************************************************************
- * Copyright (C) 2003-2009, International Business Machines Corporation and    *
+ * Copyright (C) 2003-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */

 package com.ibm.icu.text;

-import com.ibm.icu.impl.Punycode;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Set;
+
+import com.ibm.icu.impl.IDNA2003;
+import com.ibm.icu.impl.UTS46;

 /**
- *
- * IDNA API implements the IDNA protocol as defined in the <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
+ * Abstract base class for IDNA processing.
+ * See http://www.unicode.org/reports/tr46/
+ * and http://www.ietf.org/rfc/rfc3490.txt
+ * <p>
+ * The IDNA class is not intended for public subclassing.
+ * <p>
+ * The non-static methods implement UTS #46 and IDNA2008.
+ * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
+ * <p>
+ * The static methods implement IDNA2003.
+ * <p>
+ * IDNA2003 API Overview:
+ * <p>
+ * The static IDNA API methods implement the IDNA protocol as defined in the
+ * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
 * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels 
 * containing non-ASCII code points are required to be processed by
 * ToASCII operation before passing it to resolver libraries. Domain names
@ -30,177 +48,369 @@ import com.ibm.icu.impl.Punycode;
 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
 * 
- * @author Ram Viswanadha
+ * @author Ram Viswanadha, Markus Scherer
 * @stable ICU 2.8
 */
-public final class IDNA {
-
-    /* IDNA ACE Prefix is "xn--" */
-    private static char[] ACE_PREFIX                = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
-    //private static final int ACE_PREFIX_LENGTH      = ACE_PREFIX.length;
-
-    private static final int MAX_LABEL_LENGTH       = 63;
-    private static final int HYPHEN                 = 0x002D;
-    private static final int CAPITAL_A              = 0x0041;
-    private static final int CAPITAL_Z              = 0x005A;
-    private static final int LOWER_CASE_DELTA       = 0x0020;
-    private static final int FULL_STOP              = 0x002E;
-    private static final int MAX_DOMAIN_NAME_LENGTH = 255;
+public abstract class IDNA {
    /** 
-     * Option to prohibit processing of unassigned codepoints in the input and
-     * do not check if the input conforms to STD-3 ASCII rules.
-     * 
-     * @see  #convertToASCII #convertToUnicode
+     * Default options value: None of the other options are set.
     * @stable ICU 2.8
     */
-    public static final int DEFAULT             = 0x0000;
+    public static final int DEFAULT = 0;
    /** 
-     * Option to allow processing of unassigned codepoints in the input
-     * 
-     * @see  #convertToASCII #convertToUnicode
+     * Option to allow unassigned code points in domain names and labels.
+     * This option is ignored by the UTS46 implementation.
+     * (UTS #46 disallows unassigned code points.)
     * @stable ICU 2.8
     */
-    public static final int ALLOW_UNASSIGNED    = 0x0001;
+    public static final int ALLOW_UNASSIGNED = 1;
    /** 
-     * Option to check if input conforms to STD-3 ASCII rules
-     * 
-     * @see #convertToASCII #convertToUnicode
+     * Option to check whether the input conforms to the STD3 ASCII rules,
+     * for example the restriction of labels to LDH characters
+     * (ASCII Letters, Digits and Hyphen-Minus).
     * @stable ICU 2.8
     */
-    public static final int USE_STD3_RULES      = 0x0002;
-    
-    // static final singleton object that is initialized
-    // at class initialization time, hence guaranteed to
-    // be initialized and thread safe
-    private static final IDNA singleton  = new IDNA();
-    
-    // The NamePrep profile object
-    private StringPrep namePrep;
-    
-    /* private constructor to prevent construction of the object */
-    private IDNA(){
-        namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
-    }
-    
-    private static boolean startsWithPrefix(StringBuffer src){
-        boolean startsWithPrefix = true;
-
-        if(src.length() < ACE_PREFIX.length){
-            return false;
-        }
-        for(int i=0; i<ACE_PREFIX.length;i++){
-            if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
-                startsWithPrefix = false;
-            }
-        }
-        return startsWithPrefix;
-    }
-
-    private static char toASCIILower(char ch){
-        if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
-            return (char)(ch + LOWER_CASE_DELTA);
-        }
-        return ch;
-    }
-
-    private static StringBuffer toASCIILower(StringBuffer src){
-        StringBuffer dest = new StringBuffer();
-        for(int i=0; i<src.length();i++){
-            dest.append(toASCIILower(src.charAt(i)));
-        }
-        return dest;
-    }
-
-    private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
-        char c1,c2;
-        int rc;
-        for(int i =0;/* no condition */;i++) {
-            /* If we reach the ends of both strings then they match */
-            if(i == s1.length()) {
-                return 0;
-            }
-
-            c1 = s1.charAt(i);
-            c2 = s2.charAt(i);
-        
-            /* Case-insensitive comparison */
-            if(c1!=c2) {
-                rc=toASCIILower(c1)-toASCIILower(c2);
-                if(rc!=0) {
-                    return rc;
-                }
-            }
-        }
-    }
-   
-    private static int getSeparatorIndex(char[] src,int start, int limit){
-        for(; start<limit;start++){
-            if(isLabelSeparator(src[start])){
-                return start;
-            }
-        }
-        // we have not found the separator just return length
-        return start;
-    }
-    
-    /*
-    private static int getSeparatorIndex(UCharacterIterator iter){
-        int currentIndex = iter.getIndex();
-        int separatorIndex = 0;
-        int ch;
-        while((ch=iter.next())!= UCharacterIterator.DONE){
-            if(isLabelSeparator(ch)){
-                separatorIndex = iter.getIndex();
-                iter.setIndex(currentIndex);
-                return separatorIndex;
-            }
-        }
-        // reset index
-        iter.setIndex(currentIndex);
-        // we have not found the separator just return the length
-       
-    }
-    */
-    
-
-    private static boolean isLDHChar(int ch){
-        // high runner case
-        if(ch>0x007A){
-            return false;
-        }
-        //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
-        if( (ch==0x002D) || 
-            (0x0030 <= ch && ch <= 0x0039) ||
-            (0x0041 <= ch && ch <= 0x005A) ||
-            (0x0061 <= ch && ch <= 0x007A)
-          ){
-            return true;
-        }
-        return false;
-    }
-    
+    public static final int USE_STD3_RULES = 2;
    /**
-     * Ascertain if the given code point is a label separator as 
-     * defined by the IDNA RFC
-     * 
-     * @param ch The code point to be ascertained
-     * @return true if the char is a label separator
-     * @stable ICU 2.8
+     * IDNA option to check for whether the input conforms to the BiDi rules.
+     * This option is ignored by the IDNA2003 implementation.
+     * (IDNA2003 always performs a BiDi check.)
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
     */
-    private static boolean isLabelSeparator(int ch){
-        switch(ch){
-            case 0x002e:
-            case 0x3002:
-            case 0xFF0E:
-            case 0xFF61:
-                return true;
-            default:
-                return false;           
+    public static final int CHECK_BIDI = 4;
+    /**
+     * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
+     * This option is ignored by the IDNA2003 implementation.
+     * (The CONTEXTJ check is new in IDNA2008.)
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final int CHECK_CONTEXTJ = 8;
+    /**
+     * IDNA option for nontransitional processing in ToASCII().
+     * By default, ToASCII() uses transitional processing.
+     * This option is ignored by the IDNA2003 implementation.
+     * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
+    /**
+     * IDNA option for nontransitional processing in ToUnicode().
+     * By default, ToUnicode() uses transitional processing.
+     * This option is ignored by the IDNA2003 implementation.
+     * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
+
+    /**
+     * Returns an IDNA instance which implements UTS #46.
+     * Returns an unmodifiable instance, owned by the caller.
+     * Cache it for multiple operations, and delete it when done.
+     * The instance is thread-safe, that is, it can be used concurrently.
+     * <p>
+     * UTS #46 defines Unicode IDNA Compatibility Processing,
+     * updated to the latest version of Unicode and compatible with both
+     * IDNA2003 and IDNA2008.
+     * <p>
+     * The worker functions use transitional processing, including deviation mappings,
+     * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
+     * is used in which case the deviation characters are passed through without change.
+     * <p>
+     * Disallowed characters are mapped to U+FFFD.
+     * <p>
+     * Operations with the UTS #46 instance do not support the
+     * ALLOW_UNASSIGNED option.
+     * <p>
+     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
+     * When the USE_STD3_RULES option is used, ASCII characters other than
+     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
+     *
+     * @param options Bit set to modify the processing and error checking.
+     * @return the UTS #46 IDNA instance, if successful
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static IDNA getUTS46Instance(int options) {
+        return new UTS46(options);
+    }
+
+    /**
+     * Converts a single domain name label into its ASCII form for DNS lookup.
+     * If any processing step fails, then info.hasErrors() will be true and
+     * the result might not be an ASCII string.
+     * The label might be modified according to the types of errors.
+     * Labels with severe errors will be left in (or turned into) their Unicode form.
+     *
+     * @param label Input domain name label
+     * @param dest Destination string object
+     * @param info Output container of IDNA processing details.
+     * @return dest
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
+
+    /**
+     * Converts a single domain name label into its Unicode form for human-readable display.
+     * If any processing step fails, then info.hasErrors() will be true.
+     * The label might be modified according to the types of errors.
+     *
+     * @param label Input domain name label
+     * @param dest Destination string object
+     * @param info Output container of IDNA processing details.
+     * @return dest
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
+
+    /**
+     * Converts a whole domain name into its ASCII form for DNS lookup.
+     * If any processing step fails, then info.hasErrors() will be true and
+     * the result might not be an ASCII string.
+     * The domain name might be modified according to the types of errors.
+     * Labels with severe errors will be left in (or turned into) their Unicode form.
+     *
+     * @param name Input domain name
+     * @param dest Destination string object
+     * @param info Output container of IDNA processing details.
+     * @return dest
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
+
+    /**
+     * Converts a whole domain name into its Unicode form for human-readable display.
+     * If any processing step fails, then info.hasErrors() will be true.
+     * The domain name might be modified according to the types of errors.
+     *
+     * @param name Input domain name
+     * @param dest Destination string object
+     * @param info Output container of IDNA processing details.
+     * @return dest
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
+
+    /**
+     * Output container for IDNA processing errors.
+     * The Info class is not suitable for subclassing.
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final class Info {
+        /**
+         * Constructor.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Info() {
+            errors=EnumSet.noneOf(Error.class);
+            labelErrors=EnumSet.noneOf(Error.class);
+            isTransDiff=false;
+            isBiDi=false;
+            isOkBiDi=true;
+        }
+        /**
+         * Were there IDNA processing errors?
+         * @return true if there were processing errors
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        public boolean hasErrors() { return !errors.isEmpty(); }
+        /**
+         * Returns a set indicating IDNA processing errors.
+         * @return set of processing errors (modifiable, and not null)
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Set<Error> getErrors() { return errors; }
+        /**
+         * Returns true if transitional and nontransitional processing produce different results.
+         * This is the case when the input label or domain name contains
+         * one or more deviation characters outside a Punycode label (see UTS #46).
+         * <ul>
+         * <li>With nontransitional processing, such characters are
+         * copied to the destination string.
+         * <li>With transitional processing, such characters are
+         * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
+         * </ul>
+         * @return true if transitional and nontransitional processing produce different results
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        public boolean isTransitionalDifferent() { return isTransDiff; }
+
+        private void reset() {
+            errors.clear();
+            labelErrors.clear();
+            isTransDiff=false;
+            isBiDi=false;
+            isOkBiDi=true;
+        }
+
+        private EnumSet<Error> errors, labelErrors;
+        private boolean isTransDiff;
+        private boolean isBiDi;
+        private boolean isOkBiDi;
+    }
+
+    // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
+    // The IDNAInfo also provides intermediate state that is publicly invisible,
+    // avoiding the allocation of another worker object.
+    protected static void resetInfo(Info info) {
+        info.reset();
+    }
+    protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
+        return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
+    }
+    protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
+        return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
+    }
+    protected static void addLabelError(Info info, Error error) {
+        info.labelErrors.add(error);
+    }
+    protected static void promoteAndResetLabelErrors(Info info) {
+        if(!info.labelErrors.isEmpty()) {
+            info.errors.addAll(info.labelErrors);
+            info.labelErrors.clear();
        }
    }
-       
+    protected static void addError(Info info, Error error) {
+        info.errors.add(error);
+    }
+    protected static void setTransitionalDifferent(Info info) {
+        info.isTransDiff=true;
+    }
+    protected static void setBiDi(Info info) {
+        info.isBiDi=true;
+    }
+    protected static boolean isBiDi(Info info) {
+        return info.isBiDi;
+    }
+    protected static void setNotOkBiDi(Info info) {
+        info.isOkBiDi=false;
+    }
+    protected static boolean isOkBiDi(Info info) {
+        return info.isOkBiDi;
+    }
+
    /**
-     * This function implements the ToASCII operation as defined in the IDNA RFC.
+     * IDNA error bit set values.
+     * When a domain name or label fails a processing step or does not meet the
+     * validity criteria, then one or more of these error bits are set.
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static enum Error {
+        /**
+         * A non-final domain name label (or the whole domain name) is empty.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        EMPTY_LABEL,
+        /**
+         * A domain name label is longer than 63 bytes.
+         * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+         * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        LABEL_TOO_LONG,
+        /**
+         * A domain name is longer than 255 bytes in its storage form.
+         * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+         * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        DOMAIN_NAME_TOO_LONG,
+        /**
+         * A label starts with a hyphen-minus ('-').
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        LEADING_HYPHEN,
+        /**
+         * A label ends with a hyphen-minus ('-').
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        TRAILING_HYPHEN,
+        /**
+         * A label contains hyphen-minus ('-') in the third and fourth positions.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        HYPHEN_3_4,
+        /**
+         * A label starts with a combining mark.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        LEADING_COMBINING_MARK,
+        /**
+         * A label or domain name contains disallowed characters.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        DISALLOWED,
+        /**
+         * A label starts with "xn--" but does not contain valid Punycode.
+         * That is, an xn-- label failed Punycode decoding.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        PUNYCODE,
+        /**
+         * A label contains a dot=full stop.
+         * This can occur in an input string for a single-label function.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        LABEL_HAS_DOT,
+        /**
+         * An ACE label does not contain a valid label string.
+         * The label was successfully ACE (Punycode) decoded but the resulting
+         * string had severe validation errors. For example,
+         * it might contain characters that are not allowed in ACE labels,
+         * or it might not be normalized.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        INVALID_ACE_LABEL,
+        /**
+         * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        BIDI,
+        /**
+         * A label does not meet the IDNA CONTEXTJ requirements.
+         * @draft ICU 4.6
+         * @provisional This API might change or be removed in a future release.
+         */
+        CONTEXTJ
+    }
+
+    /**
+     * Sole constructor. (For invocation by subclass constructors, typically implicit.)
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    protected IDNA() {
+    }
+
+    /* IDNA2003 API ------------------------------------------------------------- */
+
+    /**
+     * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * ASCII names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; e.g." "www.example.com" is composed of 3 labels 
@ -231,7 +441,7 @@ public final class IDNA {
    }
    
    /**
-     * This function implements the ToASCII operation as defined in the IDNA RFC.
+     * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * ASCII names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; e.g." "www.example.com" is composed of 3 labels 
@ -261,7 +471,7 @@ public final class IDNA {
    }
    
    /**
-     * This function implements the ToASCII operation as defined in the IDNA RFC.
+     * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * ASCII names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; e.g." "www.example.com" is composed of 3 labels 
@ -286,115 +496,11 @@ public final class IDNA {
     */
    public static StringBuffer convertToASCII(UCharacterIterator src, int options)
                throws StringPrepParseException{
-        
-        boolean[] caseFlags = null;
-    
-        // the source contains all ascii codepoints
-        boolean srcIsASCII  = true;
-        // assume the source contains all LDH codepoints
-        boolean srcIsLDH = true; 
-
-        //get the options
-        boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
-        int ch;
-        // step 1
-        while((ch = src.next())!= UCharacterIterator.DONE){
-            if(ch> 0x7f){
-                srcIsASCII = false;
-            }
-        }
-        int failPos = -1;
-        src.setToStart();
-        StringBuffer processOut = null;
-        // step 2 is performed only if the source contains non ASCII
-        if(!srcIsASCII){
-            // step 2
-            processOut = singleton.namePrep.prepare(src, options);
-        }else{
-            processOut = new StringBuffer(src.getText());
-        }
-        int poLen = processOut.length();
-        
-        if(poLen==0){
-            throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
-        }
-        StringBuffer dest = new StringBuffer();
-        
-        // reset the variable to verify if output of prepare is ASCII or not
-        srcIsASCII = true;
-        
-        // step 3 & 4
-        for(int j=0;j<poLen;j++ ){
-            ch=processOut.charAt(j);
-            if(ch > 0x7F){
-                srcIsASCII = false;
-            }else if(isLDHChar(ch)==false){
-                // here we do not assemble surrogates
-                // since we know that LDH code points
-                // are in the ASCII range only
-                srcIsLDH = false;
-                failPos = j;
-            }
-        }
-    
-        if(useSTD3ASCIIRules == true){
-            // verify 3a and 3b
-            if( srcIsLDH == false /* source contains some non-LDH characters */
-                || processOut.charAt(0) ==  HYPHEN 
-                || processOut.charAt(processOut.length()-1) == HYPHEN){
-
-                /* populate the parseError struct */
-                if(srcIsLDH==false){
-                     throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
-                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
-                                              processOut.toString(),
-                                             (failPos>0) ? (failPos-1) : failPos);
-                }else if(processOut.charAt(0) == HYPHEN){
-                    throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
-                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
-     
-                }else{
-                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
-                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
-                                              processOut.toString(),
-                                              (poLen>0) ? poLen-1 : poLen);
-
-                }
-            }
-        }
-        if(srcIsASCII){
-            dest =  processOut;
-        }else{
-            // step 5 : verify the sequence does not begin with ACE prefix
-            if(!startsWithPrefix(processOut)){
-
-                //step 6: encode the sequence with punycode
-                caseFlags = new boolean[poLen];
-
-                StringBuffer punyout = Punycode.encode(processOut,caseFlags);
-
-                // convert all codepoints to lower case ASCII
-                StringBuffer lowerOut = toASCIILower(punyout);
-
-                //Step 7: prepend the ACE prefix
-                dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
-                //Step 6: copy the contents in b2 into dest
-                dest.append(lowerOut);
-            }else{
-
-                throw new StringPrepParseException("The input does not start with the ACE Prefix.",
-                                         StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
-            }
-        }
-        if(dest.length() > MAX_LABEL_LENGTH){
-            throw new StringPrepParseException("The labels in the input are too long. Length > 63.", 
-                                     StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
-        }
-        return dest;
+        return IDNA2003.convertToASCII(src, options);
    }
-        
+
    /**
-     * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     * It is important to note that this operation can fail. If it fails, then the input 
     * domain name cannot be used as an Internationalized Domain Name and the application
@ -428,7 +534,7 @@ public final class IDNA {
    }
    
    /**
-     * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     * It is important to note that this operation can fail. If it fails, then the input 
     * domain name cannot be used as an Internationalized Domain Name and the application
@ -462,7 +568,7 @@ public final class IDNA {
    }
    
    /**
-     * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     * It is important to note that this operation can fail. If it fails, then the input 
     * domain name cannot be used as an Internationalized Domain Name and the application
@ -492,37 +598,12 @@ public final class IDNA {
     */
    public static StringBuffer convertIDNToASCII(String src,int options)
            throws StringPrepParseException{
-
-        char[] srcArr = src.toCharArray();
-        StringBuffer result = new StringBuffer();
-        int sepIndex=0;
-        int oldSepIndex=0;
-        for(;;){
-            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
-            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
-            //make sure this is not a root label separator.
-            if(!(label.length()==0 && sepIndex==srcArr.length)){
-                UCharacterIterator iter = UCharacterIterator.getInstance(label);
-                result.append(convertToASCII(iter,options));
-            }
-            if(sepIndex==srcArr.length){
-                break;
-            }
-            
-            // increment the sepIndex to skip past the separator
-            sepIndex++;
-            oldSepIndex = sepIndex;
-            result.append((char)FULL_STOP);
-        }
-        if(result.length() > MAX_DOMAIN_NAME_LENGTH){
-            throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
-        }
-        return result;
+        return IDNA2003.convertIDNToASCII(src, options);
    }

    
    /**
-     * This function implements the ToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * Unicode names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
@ -552,7 +633,7 @@ public final class IDNA {
    }
    
    /**
-     * This function implements the ToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * Unicode names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
@ -582,7 +663,7 @@ public final class IDNA {
    }
       
    /**
-     * Function that implements the ToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
     * Unicode names. A label is an individual part of a domain name. Labels are usually
     * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
@ -607,116 +688,11 @@ public final class IDNA {
     */
    public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
           throws StringPrepParseException{
-        
-        boolean[] caseFlags = null;
-                
-        // the source contains all ascii codepoints
-        boolean srcIsASCII  = true;
-        // assume the source contains all LDH codepoints
-        //boolean srcIsLDH = true; 
-        
-        //get the options
-        //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
-        
-        //int failPos = -1;
-        int ch;
-        int saveIndex = src.getIndex();
-        // step 1: find out if all the codepoints in src are ASCII  
-        while((ch=src.next())!= UCharacterIterator.DONE){
-            if(ch>0x7F){
-                srcIsASCII = false;
-            }/*else if((srcIsLDH = isLDHChar(ch))==false){
-                failPos = src.getIndex();
-            }*/
-        }
-        StringBuffer processOut;
-        
-        if(srcIsASCII == false){
-            try {
-                // step 2: process the string
-                src.setIndex(saveIndex);
-                processOut = singleton.namePrep.prepare(src,options);
-            } catch (StringPrepParseException ex) {
-                return new StringBuffer(src.getText());
-            }
-
-        }else{
-            //just point to source
-            processOut = new StringBuffer(src.getText());
-        }
-        // TODO:
-        // The RFC states that 
-        // <quote>
-        // ToUnicode never fails. If any step fails, then the original input
-        // is returned immediately in that step.
-        // </quote>
-        
-        //step 3: verify ACE Prefix
-        if(startsWithPrefix(processOut)){
-            StringBuffer decodeOut = null;
-
-            //step 4: Remove the ACE Prefix
-            String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
-
-            //step 5: Decode using punycode
-            try {
-                decodeOut = Punycode.decode(new StringBuffer(temp),caseFlags);
-            } catch (StringPrepParseException e) {
-                decodeOut = null;
-            }
-        
-            //step 6:Apply toASCII
-            if (decodeOut != null) {
-                StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
-    
-                //step 7: verify
-                if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
-//                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
-//                                             StringPrepParseException.VERIFICATION_ERROR); 
-                    decodeOut = null;
-                }
-            }
-
-            //step 8: return output of step 5
-             if (decodeOut != null) {
-                 return decodeOut;
-             }
-        }
-            
-//        }else{
-//            // verify that STD3 ASCII rules are satisfied
-//            if(useSTD3ASCIIRules == true){
-//                if( srcIsLDH == false /* source contains some non-LDH characters */
-//                    || processOut.charAt(0) ==  HYPHEN 
-//                    || processOut.charAt(processOut.length()-1) == HYPHEN){
-//    
-//                    if(srcIsLDH==false){
-//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
-//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
-//                                                 (failPos>0) ? (failPos-1) : failPos);
-//                    }else if(processOut.charAt(0) == HYPHEN){
-//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
-//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
-//                                                 processOut.toString(),0);
-//         
-//                    }else{
-//                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
-//                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
-//                                                 processOut.toString(),
-//                                                 processOut.length());
-//    
-//                    }
-//                }
-//            }
-//            // just return the source
-//            return new StringBuffer(src.getText());
-//        }  
-        
-        return new StringBuffer(src.getText());
+        return IDNA2003.convertToUnicode(src, options);
    }
    
    /**
-     * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     *
     * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
@ -747,7 +723,7 @@ public final class IDNA {
    }
    
    /**
-     * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     *
     * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
@ -778,7 +754,7 @@ public final class IDNA {
    }
    
    /**
-     * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+     * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
     * This operation is done on complete domain names, e.g: "www.example.com". 
     *
     * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
@ -804,37 +780,12 @@ public final class IDNA {
     * @stable ICU 2.8
     */
    public static StringBuffer convertIDNToUnicode(String src, int options)
-        throws StringPrepParseException{
-            
-        char[] srcArr = src.toCharArray();
-        StringBuffer result = new StringBuffer();
-        int sepIndex=0;
-        int oldSepIndex=0;
-        for(;;){
-            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
-            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
-            if(label.length()==0 && sepIndex!=srcArr.length ){
-                throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
-            }
-            UCharacterIterator iter = UCharacterIterator.getInstance(label);
-            result.append(convertToUnicode(iter,options));
-            if(sepIndex==srcArr.length){
-                break;
-            }
-            // Unlike the ToASCII operation we don't normalize the label separators
-            result.append(srcArr[sepIndex]);
-            // increment the sepIndex to skip past the separator
-            sepIndex++;
-            oldSepIndex =sepIndex;
-        }
-        if(result.length() > MAX_DOMAIN_NAME_LENGTH){
-            throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
-        }
-        return result;
+            throws StringPrepParseException{
+        return IDNA2003.convertIDNToUnicode(src, options);
    }
    
    /**
-     * Compare two IDN strings for equivalence.
+     * IDNA2003: Compare two IDN strings for equivalence.
     * This function splits the domain names into labels and compares them.
     * According to IDN RFC, whenever two labels are compared, they are 
     * considered equal if and only if their ASCII forms (obtained by 
@ -860,19 +811,16 @@ public final class IDNA {
     * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
     * @stable ICU 2.8
     */
-    //  TODO: optimize
    public static int compare(StringBuffer s1, StringBuffer s2, int options)
        throws StringPrepParseException{
        if(s1==null || s2 == null){
            throw new IllegalArgumentException("One of the source buffers is null");
        }
-        StringBuffer s1Out = convertIDNToASCII(s1.toString(),options);
-        StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
-        return compareCaseInsensitiveASCII(s1Out,s2Out);
+        return IDNA2003.compare(s1.toString(), s2.toString(), options);
    }
    
    /**
-     * Compare two IDN strings for equivalence.
+     * IDNA2003: Compare two IDN strings for equivalence.
     * This function splits the domain names into labels and compares them.
     * According to IDN RFC, whenever two labels are compared, they are 
     * considered equal if and only if their ASCII forms (obtained by 
@ -898,18 +846,14 @@ public final class IDNA {
     * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
     * @stable ICU 2.8
     */
-    //  TODO: optimize
-    public static int compare(String s1, String s2, int options)
-        throws StringPrepParseException{
+    public static int compare(String s1, String s2, int options) throws StringPrepParseException{
        if(s1==null || s2 == null){
            throw new IllegalArgumentException("One of the source buffers is null");
        }
-        StringBuffer s1Out = convertIDNToASCII(s1, options);
-        StringBuffer s2Out = convertIDNToASCII(s2, options);
-        return compareCaseInsensitiveASCII(s1Out,s2Out);
+        return IDNA2003.compare(s1, s2, options);
    }
    /**
-     * Compare two IDN strings for equivalence.
+     * IDNA2003: Compare two IDN strings for equivalence.
     * This function splits the domain names into labels and compares them.
     * According to IDN RFC, whenever two labels are compared, they are 
     * considered equal if and only if their ASCII forms (obtained by 
@ -935,14 +879,11 @@ public final class IDNA {
     * @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2
     * @stable ICU 2.8
     */
-    //  TODO: optimize
    public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
        throws StringPrepParseException{
        if(s1==null || s2 == null){
            throw new IllegalArgumentException("One of the source buffers is null");
        }
-        StringBuffer s1Out = convertIDNToASCII(s1.getText(), options);
-        StringBuffer s2Out = convertIDNToASCII(s2.getText(), options);
-        return compareCaseInsensitiveASCII(s1Out,s2Out);
+        return IDNA2003.compare(s1.getText(), s2.getText(), options);
    }
 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestAll.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestAll.java
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2008, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and
+ * others. All Rights Reserved.
 *******************************************************************************
 */
 package com.ibm.icu.dev.test.normalizer;
@ -25,6 +25,7 @@ public class TestAll extends TestGroup {
            "TestCanonicalIterator",
            "NormalizationMonkeyTest",
            "NormalizerRegressionTests",
+            "UTS46Test"
        });
    }

--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/UTS46Test.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/UTS46Test.java
@ -0,0 +1,714 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.dev.test.normalizer;
+
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
+import com.ibm.icu.text.IDNA;
+
+/**
+ * UTS #46 (IDNA2008) test.
+ * @author Markus Scherer
+ * @since 2010jul10
+ */
+public class UTS46Test extends TestFmwk {
+    public static void main(String[] args) throws Exception {
+        new UTS46Test().run(args);
+    }
+    public UTS46Test() {
+        trans=IDNA.getUTS46Instance(IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI|IDNA.CHECK_CONTEXTJ);
+        nontrans=IDNA.getUTS46Instance(IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI|IDNA.CHECK_CONTEXTJ|
+                                       IDNA.NONTRANSITIONAL_TO_ASCII|IDNA.NONTRANSITIONAL_TO_UNICODE);
+    }
+
+    public void TestAPI() {
+        StringBuilder result=new StringBuilder();
+        IDNA.Info info=new IDNA.Info();
+        String input="www.eXample.cOm";
+        String expected="www.example.com";
+        trans.nameToASCII(input, result, info);
+        if(info.hasErrors() || !UTF16Plus.equal(result, expected)) {
+            errln(String.format("T.nameToASCII(www.example.com) info.errors=%s result matches=%b",
+                                info.getErrors(), UTF16Plus.equal(result, expected)));
+        }
+        input="xn--bcher.de-65a";
+        expected="xn--bcher\uFFFDde-65a";
+        nontrans.labelToASCII(input, result, info);
+        if( !info.getErrors().equals(EnumSet.of(IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL)) ||
+            !UTF16Plus.equal(result, expected)
+        ) {
+            errln(String.format("N.labelToASCII(label-with-dot) failed with errors %s",
+                                info.getErrors()));
+        }
+        // Java API tests that are not parallel to C++ tests
+        // because the C++ specifics (error codes etc.) do not apply here.
+        String resultString=trans.nameToUnicode("fA\u00DF.de", result, info).toString();
+        if(info.hasErrors() || !resultString.equals("fass.de")) {
+            errln(String.format("T.nameToUnicode(fA\u00DF.de) info.errors=%s result matches=%b",
+                                info.getErrors(), resultString.equals("fass.de")));
+        }
+        try {
+            nontrans.labelToUnicode(result, result, info);
+            errln("N.labelToUnicode(result, result) did not throw an Exception");
+        } catch(Exception e) {
+            // as expected (should be an IllegalArgumentException, or an ICU version of it)
+        }
+    }
+
+    public void TestNotSTD3() {
+        IDNA not3=IDNA.getUTS46Instance(IDNA.CHECK_BIDI);
+        String input="\u0000A_2+2=4\n.e\u00DFen.net";
+        StringBuilder result=new StringBuilder();
+        IDNA.Info info=new IDNA.Info();
+        if( !not3.nameToUnicode(input, result, info).toString().equals("\u0000a_2+2=4\n.essen.net") ||
+            info.hasErrors()
+        ) {
+            errln(String.format("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %s string %s",
+                                info.getErrors(), prettify(result.toString())));
+        }
+        // A space (BiDi class WS) is not allowed in a BiDi domain name.
+        input="a z.xn--4db.edu";
+        not3.nameToASCII(input, result, info);
+        if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) {
+            errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
+        }
+    }
+
+    private static final Map<String, IDNA.Error> errorNamesToErrors;
+    static {
+        errorNamesToErrors=new TreeMap<String, IDNA.Error>();
+        errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL);
+        errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG);
+        errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG);
+        errorNamesToErrors.put("UIDNA_ERROR_LEADING_HYPHEN", IDNA.Error.LEADING_HYPHEN);
+        errorNamesToErrors.put("UIDNA_ERROR_TRAILING_HYPHEN", IDNA.Error.TRAILING_HYPHEN);
+        errorNamesToErrors.put("UIDNA_ERROR_HYPHEN_3_4", IDNA.Error.HYPHEN_3_4);
+        errorNamesToErrors.put("UIDNA_ERROR_LEADING_COMBINING_MARK", IDNA.Error.LEADING_COMBINING_MARK);
+        errorNamesToErrors.put("UIDNA_ERROR_DISALLOWED", IDNA.Error.DISALLOWED);
+        errorNamesToErrors.put("UIDNA_ERROR_PUNYCODE", IDNA.Error.PUNYCODE);
+        errorNamesToErrors.put("UIDNA_ERROR_LABEL_HAS_DOT", IDNA.Error.LABEL_HAS_DOT);
+        errorNamesToErrors.put("UIDNA_ERROR_INVALID_ACE_LABEL", IDNA.Error.INVALID_ACE_LABEL);
+        errorNamesToErrors.put("UIDNA_ERROR_BIDI", IDNA.Error.BIDI);
+        errorNamesToErrors.put("UIDNA_ERROR_CONTEXTJ", IDNA.Error.CONTEXTJ);
+    }
+
+    private static final class TestCase {
+        private TestCase() {
+            errors=EnumSet.noneOf(IDNA.Error.class);
+        }
+        private void set(String[] data) {
+            s=data[0];
+            o=data[1];
+            u=data[2];
+            errors.clear();
+            if(data[3].length()!=0) {
+                for(String e: data[3].split("\\|")) {
+                    errors.add(errorNamesToErrors.get(e));
+                }
+            }
+        }
+        // Input string and options string (Nontransitional/Transitional/Both).
+        private String s, o;
+        // Expected Unicode result string.
+        private String u;
+        private EnumSet<IDNA.Error> errors;
+    };
+
+    private static final String testCases[][]={
+        { "www.eXample.cOm", "B",  // all ASCII
+          "www.example.com", "" },
+        { "B\u00FCcher.de", "B",  // u-umlaut
+          "b\u00FCcher.de", "" },
+        { "\u00D6BB", "B",  // O-umlaut
+          "\u00F6bb", "" },
+        { "fa\u00DF.de", "N",  // sharp s
+          "fa\u00DF.de", "" },
+        { "fa\u00DF.de", "T",  // sharp s
+          "fass.de", "" },
+        { "XN--fA-hia.dE", "B",  // sharp s in Punycode
+          "fa\u00DF.de", "" },
+        { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "N",  // Greek with final sigma
+          "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "" },
+        { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "T",  // Greek with final sigma
+          "\u03B2\u03CC\u03BB\u03BF\u03C3.com", "" },
+        { "xn--nxasmm1c", "B",  // Greek with final sigma in Punycode
+          "\u03B2\u03CC\u03BB\u03BF\u03C2", "" },
+        { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "N",  // "Sri" in "Sri Lanka" has a ZWJ
+          "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
+        { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "T",  // "Sri" in "Sri Lanka" has a ZWJ
+          "www.\u0DC1\u0DCA\u0DBB\u0DD3.com", "" },
+        { "www.xn--10cl1a0b660p.com", "B",  // "Sri" in Punycode
+          "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
+        { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "N",  // ZWNJ
+          "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "" },
+        { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "T",  // ZWNJ
+          "\u0646\u0627\u0645\u0647\u0627\u06CC", "" },
+        { "xn--mgba3gch31f060k.com", "B",  // ZWNJ in Punycode
+          "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com", "" },
+        { "a.b\uFF0Ec\u3002d\uFF61", "B",
+          "a.b.c.d.", "" },
+        { "U\u0308.xn--tda", "B",  // U+umlaut.u-umlaut
+          "\u00FC.\u00FC", "" },
+        { "xn--u-ccb", "B",  // u+umlaut in Punycode
+          "xn--u-ccb\uFFFD", "UIDNA_ERROR_INVALID_ACE_LABEL" },
+        { "a\u2488com", "B",  // contains 1-dot
+          "a\uFFFDcom", "UIDNA_ERROR_DISALLOWED" },
+        { "xn--a-ecp.ru", "B",  // contains 1-dot in Punycode
+          "xn--a-ecp\uFFFD.ru", "UIDNA_ERROR_INVALID_ACE_LABEL" },
+        { "xn--0.pt", "B",  // invalid Punycode
+          "xn--0\uFFFD.pt", "UIDNA_ERROR_PUNYCODE" },
+        { "xn--a.pt", "B",  // U+0080
+          "xn--a\uFFFD.pt", "UIDNA_ERROR_INVALID_ACE_LABEL" },
+        { "xn--a-\u00C4.pt", "B",  // invalid Punycode
+          "xn--a-\u00E4.pt", "UIDNA_ERROR_PUNYCODE" },
+        { "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B",  // Japanese with fullwidth ".jp"
+          "\u65E5\u672C\u8A9E.jp", "" },
+        { "\u2615", "B", "\u2615", "" },  // Unicode 4.0 HOT BEVERAGE
+        // many deviation characters, test the special mapping code
+        { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
+          "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "N",
+          "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
+          "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz",
+          "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ" },
+        { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
+          "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
+          "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "T",
+          "1.assbcssssssssd"+
+          "\u03C3\u03C3sssssssssssssssse"+
+          "ssssssssssssssssssssx"+
+          "ssssssssssssssssssssy"+
+          "sssssssssssssss\u015Dssz", "UIDNA_ERROR_LABEL_TOO_LONG" },
+        // "xn--bss" with deviation characters
+        { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "N",
+          "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "UIDNA_ERROR_CONTEXTJ" },
+        { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "T",
+          "\u5919", "" },
+        // "xn--bssffl" written as:
+        // 02E3 MODIFIER LETTER SMALL X
+        // 034F COMBINING GRAPHEME JOINER (ignored)
+        // 2115 DOUBLE-STRUCK CAPITAL N
+        // 200B ZERO WIDTH SPACE (ignored)
+        // FE63 SMALL HYPHEN-MINUS
+        // 00AD SOFT HYPHEN (ignored)
+        // FF0D FULLWIDTH HYPHEN-MINUS
+        // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored)
+        // 212C SCRIPT CAPITAL B
+        // FE00 VARIATION SELECTOR-1 (ignored)
+        // 017F LATIN SMALL LETTER LONG S
+        // 2064 INVISIBLE PLUS (ignored)
+        // 1D530 MATHEMATICAL FRAKTUR SMALL S
+        // E01EF VARIATION SELECTOR-256 (ignored)
+        // FB04 LATIN SMALL LIGATURE FFL
+        { "\u02E3\u034F\u2115\u200B\uFE63\u00AD\uFF0D\u180C"+
+          "\u212C\uFE00\u017F\u2064"+"\uD835\uDD30\uDB40\uDDEF"/*1D530 E01EF*/+"\uFB04", "B",
+          "\u5921\u591E\u591C\u5919", "" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901.", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901.", "" },
+        // Domain name >256 characters, forces slow path in UTF-8 processing.
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "12345678901234567890123456789012345678901234567890123456789012", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "12345678901234567890123456789012345678901234567890123456789012",
+          "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789\u05D0", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789\u05D0",
+          "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890",
+          "UIDNA_ERROR_LABEL_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890.", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890.",
+          "UIDNA_ERROR_LABEL_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901234."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901",
+          "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
+        // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te
+        { "\u00E41234567890123456789012345678901234567890123456789012345", "B",
+          "\u00E41234567890123456789012345678901234567890123456789012345", "" },
+        { "1234567890\u00E41234567890123456789012345678901234567890123456", "B",
+          "1234567890\u00E41234567890123456789012345678901234567890123456", "UIDNA_ERROR_LABEL_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901.", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901.", "" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "12345678901234567890123456789012345678901234567890123456789012", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E4123456789012345678901234567890123456789012345."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "12345678901234567890123456789012345678901234567890123456789012",
+          "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890",
+          "UIDNA_ERROR_LABEL_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890.", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "123456789012345678901234567890123456789012345678901234567890.",
+          "UIDNA_ERROR_LABEL_TOO_LONG" },
+        { "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901", "B",
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890\u00E41234567890123456789012345678901234567890123456."+
+          "123456789012345678901234567890123456789012345678901234567890123."+
+          "1234567890123456789012345678901234567890123456789012345678901",
+          "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
+        // hyphen errors and empty-label errors
+        // "xn---q----jra"=="-q--a-umlaut-"
+        { "a.b..-q--a-.e", "B", "a.b..-q--a-.e",
+          "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
+          "UIDNA_ERROR_HYPHEN_3_4" },
+        { "a.b..-q--\u00E4-.e", "B", "a.b..-q--\u00E4-.e",
+          "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
+          "UIDNA_ERROR_HYPHEN_3_4" },
+        { "a.b..xn---q----jra.e", "B", "a.b..-q--\u00E4-.e",
+          "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
+          "UIDNA_ERROR_HYPHEN_3_4" },
+        { "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
+        { "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
+        { "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
+        { "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
+        { "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
+        { "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
+        { "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
+        { "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
+        { "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
+        { "\u00E4.bc--de.f", "B", "\u00E4.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
+        { "a.b.\u0308c.d", "B", "a.b.\uFFFDc.d", "UIDNA_ERROR_LEADING_COMBINING_MARK" },
+        { "a.b.xn--c-bcb.d", "B",
+          "a.b.xn--c-bcb\uFFFD.d", "UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL" },
+        // BiDi
+        { "A0", "B", "a0", "" },
+        { "0A", "B", "0a", "" },  // all-LTR is ok to start with a digit (EN)
+        { "0A.\u05D0", "B",  // ASCII label does not start with L/R/AL
+          "0a.\u05D0", "UIDNA_ERROR_BIDI" },
+        { "c.xn--0-eha.xn--4db", "B",  // 2nd label does not start with L/R/AL
+          "c.0\u00FC.\u05D0", "UIDNA_ERROR_BIDI" },
+        { "b-.\u05D0", "B",  // label does not end with L/EN
+          "b-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
+        { "d.xn----dha.xn--4db", "B",  // 2nd label does not end with L/EN
+          "d.\u00FC-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
+        { "a\u05D0", "B", "a\u05D0", "UIDNA_ERROR_BIDI" },  // first dir != last dir
+        { "\u05D0\u05C7", "B", "\u05D0\u05C7", "" },
+        { "\u05D09\u05C7", "B", "\u05D09\u05C7", "" },
+        { "\u05D0a\u05C7", "B", "\u05D0a\u05C7", "UIDNA_ERROR_BIDI" },  // first dir != last dir
+        { "\u05D0\u05EA", "B", "\u05D0\u05EA", "" },
+        { "\u05D0\u05F3\u05EA", "B", "\u05D0\u05F3\u05EA", "" },
+        { "a\u05D0Tz", "B", "a\u05D0tz", "UIDNA_ERROR_BIDI" },  // mixed dir
+        { "\u05D0T\u05EA", "B", "\u05D0t\u05EA", "UIDNA_ERROR_BIDI" },  // mixed dir
+        { "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
+        { "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" },  // Arabic 7 in the middle
+        { "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" },  // AN digit in LTR
+        { "\u05D07\u0667\u05EA", "B",  // mixed EN/AN digits in RTL
+          "\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
+        // ZWJ
+        { "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" },  // Virama+ZWJ
+        { "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
+        { "\u200D", "N", "\u200D", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
+        // ZWNJ
+        { "\u0BB9\u0BCD\u200C", "N", "\u0BB9\u0BCD\u200C", "" },  // Virama+ZWNJ
+        { "\u0BB9\u200C", "N", "\u0BB9\u200C", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
+        { "\u200C", "N", "\u200C", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
+        { "\u0644\u0670\u200C\u06ED\u06EF", "N",  // Joining types D T ZWNJ T R
+          "\u0644\u0670\u200C\u06ED\u06EF", "" },
+        { "\u0644\u0670\u200C\u06EF", "N",  // D T ZWNJ R
+          "\u0644\u0670\u200C\u06EF", "" },
+        { "\u0644\u200C\u06ED\u06EF", "N",  // D ZWNJ T R
+          "\u0644\u200C\u06ED\u06EF", "" },
+        { "\u0644\u200C\u06EF", "N",  // D ZWNJ R
+          "\u0644\u200C\u06EF", "" },
+        { "\u0644\u0670\u200C\u06ED", "N",  // D T ZWNJ T
+          "\u0644\u0670\u200C\u06ED", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
+        { "\u06EF\u200C\u06EF", "N",  // R ZWNJ R
+          "\u06EF\u200C\u06EF", "UIDNA_ERROR_CONTEXTJ" },
+        { "\u0644\u200C", "N",  // D ZWNJ
+          "\u0644\u200C", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
+        // { "", "B",
+        //   "", "" },
+    };
+
+    public void TestSomeCases() {
+        StringBuilder aT=new StringBuilder(), uT=new StringBuilder();
+        StringBuilder aN=new StringBuilder(), uN=new StringBuilder();
+        IDNA.Info aTInfo=new IDNA.Info(), uTInfo=new IDNA.Info();
+        IDNA.Info aNInfo=new IDNA.Info(), uNInfo=new IDNA.Info();
+
+        StringBuilder aTuN=new StringBuilder(), uTaN=new StringBuilder();
+        StringBuilder aNuN=new StringBuilder(), uNaN=new StringBuilder();
+        IDNA.Info aTuNInfo=new IDNA.Info(), uTaNInfo=new IDNA.Info();
+        IDNA.Info aNuNInfo=new IDNA.Info(), uNaNInfo=new IDNA.Info();
+
+        StringBuilder aTL=new StringBuilder(), uTL=new StringBuilder();
+        StringBuilder aNL=new StringBuilder(), uNL=new StringBuilder();
+        IDNA.Info aTLInfo=new IDNA.Info(), uTLInfo=new IDNA.Info();
+        IDNA.Info aNLInfo=new IDNA.Info(), uNLInfo=new IDNA.Info();
+
+        EnumSet<IDNA.Error> uniErrors=EnumSet.noneOf(IDNA.Error.class);
+
+        TestCase testCase=new TestCase();
+        int i;
+        for(i=0; i<testCases.length; ++i) {
+            testCase.set(testCases[i]);
+            String input=testCase.s;
+            String expected=testCase.u;
+            // ToASCII/ToUnicode, transitional/nontransitional
+            try {
+                trans.nameToASCII(input, aT, aTInfo);
+                trans.nameToUnicode(input, uT, uTInfo);
+                nontrans.nameToASCII(input, aN, aNInfo);
+                nontrans.nameToUnicode(input, uN, uNInfo);
+            } catch(Exception e) {
+                errln(String.format("first-level processing [%d/%s] %s - %s",
+                                    i, testCase.o, testCase.s, e));
+                continue;
+            }
+            // ToUnicode does not set length errors.
+            uniErrors.clear();
+            uniErrors.addAll(testCase.errors);
+            uniErrors.removeAll(lengthErrors);
+            char mode=testCase.o.charAt(0);
+            if(mode=='B' || mode=='N') {
+                if(!sameErrors(uNInfo, uniErrors)) {
+                    errln(String.format("N.nameToUnicode([%d] %s) unexpected errors %s",
+                                        i, testCase.s, uNInfo.getErrors()));
+                    continue;
+                }
+                if(!UTF16Plus.equal(uN, expected)) {
+                    errln(String.format("N.nameToUnicode([%d] %s) unexpected string %s",
+                                        i, testCase.s, prettify(uN.toString())));
+                    continue;
+                }
+                if(!sameErrors(aNInfo, testCase.errors)) {
+                    errln(String.format("N.nameToASCII([%d] %s) unexpected errors %s",
+                                        i, testCase.s, aNInfo.getErrors()));
+                    continue;
+                }
+            }
+            if(mode=='B' || mode=='T') {
+                if(!sameErrors(uTInfo, uniErrors)) {
+                    errln(String.format("T.nameToUnicode([%d] %s) unexpected errors %s",
+                                        i, testCase.s, uTInfo.getErrors()));
+                    continue;
+                }
+                if(!UTF16Plus.equal(uT, expected)) {
+                    errln(String.format("T.nameToUnicode([%d] %s) unexpected string %s",
+                                        i, testCase.s, prettify(uT.toString())));
+                    continue;
+                }
+                if(!sameErrors(aTInfo, testCase.errors)) {
+                    errln(String.format("T.nameToASCII([%d] %s) unexpected errors %s",
+                                        i, testCase.s, aTInfo.getErrors()));
+                    continue;
+                }
+            }
+            // ToASCII is all-ASCII if no severe errors
+            if(!hasCertainErrors(aNInfo, severeErrors) && !isASCII(aN)) {
+                errln(String.format("N.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
+                                    i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
+                continue;
+            }
+            if(!hasCertainErrors(aTInfo, severeErrors) && !isASCII(aT)) {
+                errln(String.format("T.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
+                                    i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
+                continue;
+            }
+            if(isVerbose()) {
+                char m= mode=='B' ? mode : 'N';
+                logln(String.format("%c.nameToASCII([%d] %s) (errors %s) result string: %s",
+                                    m, i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
+                if(mode!='B') {
+                    logln(String.format("T.nameToASCII([%d] %s) (errors %s) result string: %s",
+                                        i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
+                }
+            }
+            // second-level processing
+            try {
+                nontrans.nameToUnicode(aT, aTuN, aTuNInfo);
+                nontrans.nameToASCII(uT, uTaN, uTaNInfo);
+                nontrans.nameToUnicode(aN, aNuN, aNuNInfo);
+                nontrans.nameToASCII(uN, uNaN, uNaNInfo);
+            } catch(Exception e) {
+                errln(String.format("second-level processing [%d/%s] %s - %s",
+                                    i, testCase.o, testCase.s, e));
+                continue;
+            }
+            if(!UTF16Plus.equal(aN, uNaN)) {
+                errln(String.format("N.nameToASCII([%d] %s)!=N.nameToUnicode().N.nameToASCII() "+
+                                    "(errors %s) %s vs. %s",
+                                    i, testCase.s, aNInfo.getErrors(),
+                                    prettify(aN.toString()), prettify(uNaN.toString())));
+                continue;
+            }
+            if(!UTF16Plus.equal(aT, uTaN)) {
+                errln(String.format("T.nameToASCII([%d] %s)!=T.nameToUnicode().N.nameToASCII() "+
+                                    "(errors %s) %s vs. %s",
+                                    i, testCase.s, aNInfo.getErrors(),
+                                    prettify(aT.toString()), prettify(uTaN.toString())));
+                continue;
+            }
+            if(!UTF16Plus.equal(uN, aNuN)) {
+                errln(String.format("N.nameToUnicode([%d] %s)!=N.nameToASCII().N.nameToUnicode() "+
+                                    "(errors %s) %s vs. %s",
+                                    i, testCase.s, uNInfo.getErrors(), prettify(uN.toString()), prettify(aNuN.toString())));
+                continue;
+            }
+            if(!UTF16Plus.equal(uT, aTuN)) {
+                errln(String.format("T.nameToUnicode([%d] %s)!=T.nameToASCII().N.nameToUnicode() "+
+                                    "(errors %s) %s vs. %s",
+                                    i, testCase.s, uNInfo.getErrors(),
+                                    prettify(uT.toString()), prettify(aTuN.toString())));
+                continue;
+            }
+            // labelToUnicode
+            try {
+                trans.labelToASCII(input, aTL, aTLInfo);
+                trans.labelToUnicode(input, uTL, uTLInfo);
+                nontrans.labelToASCII(input, aNL, aNLInfo);
+                nontrans.labelToUnicode(input, uNL, uNLInfo);
+            } catch(Exception e) {
+                errln(String.format("labelToXYZ processing [%d/%s] %s - %s",
+                                    i, testCase.o, testCase.s, e));
+                continue;
+            }
+            if(aN.indexOf(".")<0) {
+                if(!UTF16Plus.equal(aN, aNL) || !sameErrors(aNInfo, aNLInfo)) {
+                    errln(String.format("N.nameToASCII([%d] %s)!=N.labelToASCII() "+
+                                        "(errors %s vs %04lx) %s vs. %s",
+                                        i, testCase.s, aNInfo.getErrors(), aNLInfo.getErrors(),
+                                        prettify(aN.toString()), prettify(aNL.toString())));
+                    continue;
+                }
+            } else {
+                if(!hasError(aNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
+                    errln(String.format("N.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
+                                        i, testCase.s, aNLInfo.getErrors()));
+                    continue;
+                }
+            }
+            if(aT.indexOf(".")<0) {
+                if(!UTF16Plus.equal(aT, aTL) || !sameErrors(aTInfo, aTLInfo)) {
+                    errln(String.format("T.nameToASCII([%d] %s)!=T.labelToASCII() "+
+                                        "(errors %s vs %04lx) %s vs. %s",
+                                        i, testCase.s, aTInfo.getErrors(), aTLInfo.getErrors(),
+                                        prettify(aT.toString()), prettify(aTL.toString())));
+                    continue;
+                }
+            } else {
+                if(!hasError(aTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
+                    errln(String.format("T.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
+                                        i, testCase.s, aTLInfo.getErrors()));
+                    continue;
+                }
+            }
+            if(uN.indexOf(".")<0) {
+                if(!UTF16Plus.equal(uN, uNL) || !sameErrors(uNInfo, uNLInfo)) {
+                    errln(String.format("N.nameToUnicode([%d] %s)!=N.labelToUnicode() "+
+                                        "(errors %s vs %04lx) %s vs. %s",
+                                        i, testCase.s, uNInfo.getErrors(), uNLInfo.getErrors(),
+                                        prettify(uN.toString()), prettify(uNL.toString())));
+                    continue;
+                }
+            } else {
+                if(!hasError(uNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
+                    errln(String.format("N.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
+                                        i, testCase.s, uNLInfo.getErrors()));
+                    continue;
+                }
+            }
+            if(uT.indexOf(".")<0) {
+                if(!UTF16Plus.equal(uT, uTL) || !sameErrors(uTInfo, uTLInfo)) {
+                    errln(String.format("T.nameToUnicode([%d] %s)!=T.labelToUnicode() "+
+                                        "(errors %s vs %04lx) %s vs. %s",
+                                        i, testCase.s, uTInfo.getErrors(), uTLInfo.getErrors(),
+                                        prettify(uT.toString()), prettify(uTL.toString())));
+                    continue;
+                }
+            } else {
+                if(!hasError(uTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
+                    errln(String.format("T.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
+                                        i, testCase.s, uTLInfo.getErrors()));
+                    continue;
+                }
+            }
+            // Differences between transitional and nontransitional processing
+            if(mode=='B') {
+                if( aNInfo.isTransitionalDifferent() ||
+                    aTInfo.isTransitionalDifferent() ||
+                    uNInfo.isTransitionalDifferent() ||
+                    uTInfo.isTransitionalDifferent() ||
+                    aNLInfo.isTransitionalDifferent() ||
+                    aTLInfo.isTransitionalDifferent() ||
+                    uNLInfo.isTransitionalDifferent() ||
+                    uTLInfo.isTransitionalDifferent()
+                ) {
+                    errln(String.format("B.process([%d] %s) isTransitionalDifferent()", i, testCase.s));
+                    continue;
+                }
+                if( !UTF16Plus.equal(aN, aT) || !UTF16Plus.equal(uN, uT) ||
+                    !UTF16Plus.equal(aNL, aTL) || !UTF16Plus.equal(uNL, uTL) ||
+                    !sameErrors(aNInfo, aTInfo) || !sameErrors(uNInfo, uTInfo) ||
+                    !sameErrors(aNLInfo, aTLInfo) || !sameErrors(uNLInfo, uTLInfo)
+                ) {
+                    errln(String.format("N.process([%d] %s) vs. T.process() different errors or result strings",
+                                        i, testCase.s));
+                    continue;
+                }
+            } else {
+                if( !aNInfo.isTransitionalDifferent() ||
+                    !aTInfo.isTransitionalDifferent() ||
+                    !uNInfo.isTransitionalDifferent() ||
+                    !uTInfo.isTransitionalDifferent() ||
+                    !aNLInfo.isTransitionalDifferent() ||
+                    !aTLInfo.isTransitionalDifferent() ||
+                    !uNLInfo.isTransitionalDifferent() ||
+                    !uTLInfo.isTransitionalDifferent()
+                ) {
+                    errln(String.format("%s.process([%d] %s) !isTransitionalDifferent()",
+                                        testCase.o, i, testCase.s));
+                    continue;
+                }
+                if( UTF16Plus.equal(aN, aT) || UTF16Plus.equal(uN, uT) ||
+                    UTF16Plus.equal(aNL, aTL) || UTF16Plus.equal(uNL, uTL)
+                ) {
+                    errln(String.format("N.process([%d] %s) vs. T.process() same result strings",
+                                        i, testCase.s));
+                    continue;
+                }
+            }
+        }
+    }
+
+    private final IDNA trans, nontrans;
+
+    private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
+        IDNA.Error.LEADING_COMBINING_MARK,
+        IDNA.Error.DISALLOWED,
+        IDNA.Error.PUNYCODE,
+        IDNA.Error.LABEL_HAS_DOT,
+        IDNA.Error.INVALID_ACE_LABEL);
+    private static final EnumSet<IDNA.Error> lengthErrors=EnumSet.of(
+            IDNA.Error.EMPTY_LABEL,
+            IDNA.Error.LABEL_TOO_LONG,
+            IDNA.Error.DOMAIN_NAME_TOO_LONG);
+
+    private boolean hasError(IDNA.Info info, IDNA.Error error) {
+        return info.getErrors().contains(error);
+    }
+    // assumes that certainErrors is not empty
+    private boolean hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors) {
+        return !errors.isEmpty() && !Collections.disjoint(errors, certainErrors);
+    }
+    private boolean hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors) {
+        return hasCertainErrors(info.getErrors(), certainErrors);
+    }
+    private boolean sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b) {
+        return a.equals(b);
+    }
+    private boolean sameErrors(IDNA.Info a, IDNA.Info b) {
+        return sameErrors(a.getErrors(), b.getErrors());
+    }
+    private boolean sameErrors(IDNA.Info a, Set<IDNA.Error> b) {
+        return sameErrors(a.getErrors(), b);
+    }
+
+    private static boolean
+    isASCII(CharSequence str) {
+        int length=str.length();
+        for(int i=0; i<length; ++i) {
+            if(str.charAt(i)>=0x80) {
+                return false;
+            }
+        }
+        return true;
+    }
+}