ICU-3064 StringPrep port take 1

X-SVN-Rev: 12908
This commit is contained in:
Ram Viswanadha 2003-08-21 23:44:28 +00:00
parent 869bbeb681
commit dacfe88ad2
26 changed files with 6936 additions and 36 deletions

6
.gitattributes vendored
View File

@ -56,6 +56,11 @@ icu4c/source/test/testdata/iscii.bin -text
icu4c/source/test/testdata/uni-text.bin -text
icu4j/src/com/ibm/icu/dev/data/ThaiWordFreq.xls -text
icu4j/src/com/ibm/icu/dev/data/holidays_jp.ucs -text
icu4j/src/com/ibm/icu/dev/data/nfscis.spp -text
icu4j/src/com/ibm/icu/dev/data/nfscsi.spp -text
icu4j/src/com/ibm/icu/dev/data/nfscss.spp -text
icu4j/src/com/ibm/icu/dev/data/nfsmxp.spp -text
icu4j/src/com/ibm/icu/dev/data/nfsmxs.spp -text
icu4j/src/com/ibm/icu/dev/data/rbbi/english.dict -text
icu4j/src/com/ibm/icu/dev/data/thai6.ucs -text
icu4j/src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Asian.txt -text
@ -74,6 +79,7 @@ icu4j/src/com/ibm/icu/impl/data/ICULocaleData.jar -text
icu4j/src/com/ibm/icu/impl/data/invuca.icu -text
icu4j/src/com/ibm/icu/impl/data/pnames.icu -text
icu4j/src/com/ibm/icu/impl/data/ucadata.icu -text
icu4j/src/com/ibm/icu/impl/data/uidna.spp -text
icu4j/src/com/ibm/icu/impl/data/unames.icu -text
icu4j/src/com/ibm/icu/impl/data/unorm.icu -text
icu4j/src/com/ibm/icu/impl/data/uprops.icu -text

View File

@ -6,8 +6,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/build.xml,v $
* $Date: 2003/07/03 20:48:58 $
* $Revision: 1.75 $
* $Date: 2003/08/21 23:44:28 $
* $Revision: 1.76 $
*
*******************************************************************************
* This is the ant build file for ICU4J. See readme.html for more information.
@ -16,34 +16,44 @@
<project name="ICU4J" default="core" basedir=".">
<!-- ### begin clover setup ### -->
<property name="icu4j.tempdir" value="tmp"/>
<property name="clover.initstring" value="${icu4j.tempdir}/icu4j.db"/>
<path id="clover.classpath">
<!-- ***** you will need to change the paths below to point
to clover.jar and velocity.jar on your filesystem ***** -->
<pathelement path="${java.class.path}/"/>
<pathelement location="clover.jar"/>
<pathelement location="velocity.jar"/>
</path>
<!-- ### Begin Clover 1.2 setup ### -->
<typedef resource="clovertypes"/>
<taskdef resource="clovertasks"/>
<!-- target to switch Clover on -->
<target name="with.clover">
<property name="build.compiler" value="org.apache.tools.ant.taskdefs.CloverCompilerAdapter"/>
<!-- instruct clover to use a class-based instrumentation strategy when instrumenting SimpleTimeZone.java -->
<property name="clover.useclass.includes" value="**/SimpleTimeZone.java"/>
<clover-setup initString="tmp/icu4j.db">
<files>
<exclude name="**/dev/**/*.java"/>
</files>
</clover-setup>
</target>
<target name="clover.html" depends="with.clover">
<clover-report >
<current outfile="icu4j_html" >
<format type="html" />
</current>
</clover-report>
</target>
<target name="clover.log" depends="with.clover">
<clover-log level="method">
<package name="com.ibm.icu.text"/>
</clover-log>
<clover-log level="method">
<package name="com.ibm.icu.lang"/>
</clover-log>
<clover-log level="method">
<package name="com.ibm.icu.math"/>
</clover-log>
<clover-log level="method">
<package name="com.ibm.icu.util"/>
</clover-log>
<clover-log level="method">
<package name="com.ibm.icu.impl"/>
</clover-log>
</target>
<!-- target to generate a Clover html report -->
<target name="clover.report">
<java classname="com.cortexeb.tools.clover.reporters.html.HtmlReporter" fork="yes">
<arg line="--outputdir ${icu4j.tempdir}/icu4j_html --showSrc --initstring ${clover.initstring} --title 'icu4j core'"/>
<classpath refid="clover.classpath"/>
</java>
</target>
<!-- ### end clover setup ### -->
<!-- ### End Clover 1.2 setup ### -->
<target name="anthack1">
<!-- It's a real pain to set properties conditionally.
Ant 1.5 isn't really any better than 1.4, in fact 1.5 enforces that
@ -78,7 +88,6 @@
<path id="build.classpath">
<pathelement path="${build.dir}"/>
<path refid="clover.classpath"/>
</path>
<property name="richedit.dir" value="richedit"/>
@ -121,7 +130,8 @@
srcdir="${src.dir}"
destdir="${build.dir}"
classpathref="build.classpath"
debug="on" deprecation="off"/>
debug="on" deprecation="off"
encoding="ascii"/>
</target>
<target name="tests" depends="core,testData">
@ -182,7 +192,7 @@
<target name ="coreData" depends="init">
<copy todir="${build.dir}/com/ibm/icu/impl/data">
<fileset dir="${src.dir}/com/ibm/icu/impl/data"
includes="Transliterator_*.txt,*.icu"
includes="Transliterator_*.txt,*.icu,*.spp"
excludes="**/CVS/**/*,Transliterator_Han_Latin_*.txt"/>
</copy>
</target>
@ -196,6 +206,10 @@
destdir="${build.dir}"
classpathref="build.classpath"
debug="on" deprecation="off"/>
<copy todir="${build.dir}/com/ibm/icu/dev/data">
<fileset dir="${src.dir}/com/ibm/icu/dev/data"
includes="*.spp"/>
</copy>
</target>
<!-- builds richedit and richedit tests -->

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestFmwk.java,v $
* $Date: 2003/06/11 18:27:08 $
* $Revision: 1.47 $
* $Date: 2003/08/21 23:42:03 $
* $Revision: 1.48 $
*
*****************************************************************************************
*/
@ -820,7 +820,23 @@ public class TestFmwk extends AbstractTestLog {
public static String hex(StringBuffer s) {
return hex(s.toString());
}
public static String prettify(String s) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < s.length(); ++i) {
char ch =s.charAt(i);
if(ch > 0x7f){
result.append("\\u");
result.append(hex(ch));
}else{
result.append(ch);
}
}
return result.toString();
}
public static String prettify(StringBuffer s) {
return prettify(s.toString());
}
private static class ASCIIWriter extends PrintWriter {
private Writer w;
private StringBuffer buffer = new StringBuffer();

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestUtil.java,v $
* $Date: 2003/06/03 18:49:28 $
* $Revision: 1.5 $
* $Date: 2003/08/21 23:42:03 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -14,8 +14,10 @@ package com.ibm.icu.dev.test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
public class TestUtil {
/**
@ -86,6 +88,14 @@ public class TestUtil {
public static final BufferedReader getDataReader(String name) throws IOException {
return getDataReader(name, 1024);
}
/**
* Return an input stream on the data file at path 'name' rooted at the data path
*/
public static final InputStream getDataStream(String name) throws IOException{
File file = getDataFile(name);
FileInputStream st = new FileInputStream(file);
return st;
}
static final char DIGITS[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',

View File

@ -0,0 +1,408 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.stringprep.ParseException;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class IDNAReference {
private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
private static final int ACE_PREFIX_LENGTH = 4;
private static final int MAX_LABEL_LENGTH = 63;
private static final int HYPHEN = 0x002D;
private static final int CAPITAL_A = 0x0041;
private static final int CAPITAL_Z = 0x005A;
private static final int LOWER_CASE_DELTA = 0x0020;
private static final int FULL_STOP = 0x002E;
public static final int DEFAULT = 0x0000;
public static final int ALLOW_UNASSIGNED = 0x0001;
public static final int USE_STD3_RULES = 0x0002;
public static final NamePrepTransform transform = NamePrepTransform.getInstance();
private static boolean startsWithPrefix(StringBuffer src){
boolean startsWithPrefix = true;
if(src.length() < ACE_PREFIX_LENGTH){
return false;
}
for(int i=0; i<ACE_PREFIX_LENGTH;i++){
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
startsWithPrefix = false;
}
}
return startsWithPrefix;
}
private static char toASCIILower(char ch){
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
return (char)(ch + LOWER_CASE_DELTA);
}
return ch;
}
private static StringBuffer toASCIILower(StringBuffer src){
StringBuffer dest = new StringBuffer();
for(int i=0; i<src.length();i++){
dest.append(toASCIILower(src.charAt(i)));
}
return dest;
}
private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
char c1,c2;
int rc;
for(int i =0;/* no condition */;i++) {
/* If we reach the ends of both strings then they match */
if(i == s1.length()) {
return 0;
}
c1 = s1.charAt(i);
c2 = s2.charAt(i);
/* Case-insensitive comparison */
if(c1!=c2) {
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
if(rc!=0) {
return rc;
}
}
}
}
private static int getSeparatorIndex(char[] src,int start, int limit){
for(; start<limit;start++){
if(NamePrepTransform.isLabelSeparator(src[start])){
return start;
}
}
// we have not found the separator just return length
return start;
}
private static boolean isLDHChar(int ch){
// high runner case
if(ch>0x007A){
return false;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return true;
}
return false;
}
public static StringBuffer convertToASCII(String src, int options)
throws ParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
public static StringBuffer convertToASCII(StringBuffer src, int options)
throws ParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
throws ParseException{
char[] caseFlags = null;
// the source contains all ascii codepoints
boolean srcIsASCII = true;
// assume the source contains all LDH codepoints
boolean srcIsLDH = true;
//get the options
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
int failPos = -1;
// step 2
//StringPrep prep = StringPrep.getNameprepInstance();
StringBuffer processOut = transform.prepare(srcIter,options);
int poLen = processOut.length();
StringBuffer dest = new StringBuffer();
// step 3 & 4
for(int j=0;j<poLen;j++ ){
char ch=processOut.charAt(j);
if(ch > 0x7F){
srcIsASCII = false;
}
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
if(isLDHChar(ch)==false){
srcIsLDH = false;
failPos = j;
}
}
if(useSTD3ASCIIRules == true){
// verify 3a and 3b
if( srcIsLDH == false /* source contains some non-LDH characters */
|| processOut.charAt(0) == HYPHEN
|| processOut.charAt(processOut.length()-1) == HYPHEN){
/* populate the parseError struct */
if(srcIsLDH==false){
throw new ParseException( "The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
(failPos>0) ? (failPos-1) : failPos);
}else if(processOut.charAt(0) == HYPHEN){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
}else{
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
(poLen>0) ? poLen-1 : poLen);
}
}
}
if(srcIsASCII){
dest = processOut;
}else{
// step 5 : verify the sequence does not begin with ACE prefix
if(!startsWithPrefix(processOut)){
//step 6: encode the sequence with punycode
StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
// convert all codepoints to lower case ASCII
StringBuffer lowerOut = toASCIILower(punyout);
//Step 7: prepend the ACE prefix
dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
//Step 6: copy the contents in b2 into dest
dest.append(lowerOut);
}else{
throw new ParseException("The input does not start with the ACE Prefix.",
ParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
}
}
if(dest.length() > MAX_LABEL_LENGTH){
throw new ParseException("The labels in the input are too long. Length > 64.",
ParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
}
return dest;
}
public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
throws ParseException{
return convertIDNToASCII(iter.getText(), options);
}
public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
throws ParseException{
return convertIDNToASCII(str.toString(), options);
}
public static StringBuffer convertIDNToASCII(String src,int options)
throws ParseException{
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex = 0;
for(;;){
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
result.append(convertToASCII(iter,options));
if(sepIndex==srcArr.length){
break;
}
// increment the sepIndex to skip past the separator
sepIndex++;
oldSepIndex = sepIndex;
result.append((char)FULL_STOP);
}
return result;
}
public static StringBuffer convertToUnicode(String src, int options)
throws ParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
public static StringBuffer convertToUnicode(StringBuffer src, int options)
throws ParseException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
throws ParseException{
char[] caseFlags = null;
//get the options
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
// the source contains all ascii codepoints
boolean srcIsASCII = true;
// assume the source contains all LDH codepoints
boolean srcIsLDH = true;
int failPos = -1;
int ch;
int saveIndex = iter.getIndex();
// step 1: find out if all the codepoints in src are ASCII
while((ch=iter.next())!= UCharacterIterator.DONE){
if(ch>0x7F){
srcIsASCII = false;
}
if((srcIsLDH = isLDHChar(ch))==false){
failPos = iter.getIndex();
}
}
StringBuffer processOut;
if(srcIsASCII == false){
// step 2: process the string
iter.setIndex(saveIndex);
processOut = transform.prepare(iter,options);
}else{
//just point to source
processOut = new StringBuffer(iter.getText());
}
// TODO:
// The RFC states that
// <quote>
// ToUnicode never fails. If any step fails, then the original input
// is returned immediately in that step.
// </quote>
//step 3: verify ACE Prefix
if(startsWithPrefix(processOut)){
//step 4: Remove the ACE Prefix
String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());
//step 5: Decode using punycode
StringBuffer decodeOut = PunycodeReference.decode(new StringBuffer(temp),caseFlags);
//step 6:Apply toASCII
StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
//step 7: verify
if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
throw new ParseException("The verification step prescribed by the RFC 3491 failed",
ParseException.VERIFICATION_ERROR);
}
//step 8: return output of step 5
return decodeOut;
}else{
// verify that STD3 ASCII rules are satisfied
if(useSTD3ASCIIRules == true){
if( srcIsLDH == false /* source contains some non-LDH characters */
|| processOut.charAt(0) == HYPHEN
|| processOut.charAt(processOut.length()-1) == HYPHEN){
if(srcIsLDH==false){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
(failPos>0) ? (failPos-1) : failPos);
}else if(processOut.charAt(0) == HYPHEN){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),0);
}else{
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
processOut.length());
}
}
}
// just return the source
return new StringBuffer(iter.getText());
}
}
public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
throws ParseException{
return convertIDNToUnicode(iter.getText(), options);
}
public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
throws ParseException{
return convertIDNToUnicode(str.toString(), options);
}
public static StringBuffer convertIDNToUnicode(String src, int options)
throws ParseException{
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex=0;
for(;;){
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
result.append(convertToUnicode(iter,options));
if(sepIndex==srcArr.length){
break;
}
// increment the sepIndex to skip past the separator
sepIndex++;
oldSepIndex = sepIndex;
result.append((char)FULL_STOP);
}
return result;
}
// TODO: optimize
public static int compare(StringBuffer s1, StringBuffer s2, int options)
throws ParseException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
// TODO: optimize
public static int compare(String s1, String s2, int options)
throws ParseException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(s1, options);
StringBuffer s2Out = convertIDNToASCII(s2, options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
// TODO: optimize
public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
throws ParseException{
if(i1==null || i2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,173 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import com.ibm.icu.dev.test.TestUtil;
import com.ibm.icu.stringprep.ParseException;
import com.ibm.icu.stringprep.StringPrep;
import com.ibm.icu.text.UCharacterIterator;
/**
* @author ram
*
* This is a dumb implementation of NFS4 profiles. It is a direct port of
* C code, does not use Object Oriented principles. Quick and Dirty implementation
* for testing.
*/
public final class NFS4StringPrep {
private static final String[] NFS4DataFileNames ={
"nfscss.spp",
"nfscsi.spp",
"nfscis.spp",
"nfsmxp.spp",
"nfsmxs.spp"
};
private StringPrep nfscss = null;
private StringPrep nfscsi = null;
private StringPrep nfscis = null;
private StringPrep nfsmxp = null;
private StringPrep nfsmxs = null;
//singleton instance
private static NFS4StringPrep prep = null;
// we donot synchronize the constructor because we
// know that the constructor is only called from
// getInstance method if and only if the the singleton
// intance is null, which means this constructor is called
// only once
private NFS4StringPrep ()throws IOException{
InputStream nfscssFile = TestUtil.getDataStream(NFS4DataFileNames[0]);
nfscss = StringPrep.getInstance(nfscssFile);
nfscssFile.close();
InputStream nfscsiFile = TestUtil.getDataStream(NFS4DataFileNames[1]);
nfscsi = StringPrep.getInstance(nfscsiFile);
nfscsiFile.close();
InputStream nfscisFile = TestUtil.getDataStream(NFS4DataFileNames[2]);
nfscis = StringPrep.getInstance(nfscisFile);
nfscsiFile.close();
InputStream nfsmxpFile = TestUtil.getDataStream(NFS4DataFileNames[3]);
nfsmxp = StringPrep.getInstance(nfsmxpFile);
nfscsiFile.close();
InputStream nfsmxsFile = TestUtil.getDataStream(NFS4DataFileNames[4]);
nfsmxs = StringPrep.getInstance(nfsmxsFile);
nfsmxsFile.close();
}
public static synchronized final NFS4StringPrep getInstance()
throws IOException{
if(prep==null){
prep = new NFS4StringPrep();
}
return prep;
}
private static byte[] prepare(byte[] src, StringPrep prep)
throws ParseException, UnsupportedEncodingException{
String s = new String(src, "UTF-8");
UCharacterIterator iter = UCharacterIterator.getInstance(s);
StringBuffer out = prep.prepare(iter,StringPrep.NONE);
return out.toString().getBytes("UTF-8");
}
public static byte[] cs_prepare(byte[] src, boolean caseInsensitive)
throws IOException, ParseException, UnsupportedEncodingException{
NFS4StringPrep prep = getInstance();
if(caseInsensitive){
return prepare(src, prep.nfscsi);
}else{
return prepare(src,prep.nfscsi);
}
}
public static byte[] cis_prepare(byte[] src)
throws IOException, ParseException, UnsupportedEncodingException{
NFS4StringPrep prep = getInstance();
return prepare(src, prep.nfscis);
}
/* sorted array for binary search*/
private static final String[] special_prefixes={
"ANONYMOUS",
"AUTHENTICATED",
"BATCH",
"DIALUP",
"EVERYONE",
"GROUP",
"INTERACTIVE",
"NETWORK",
"OWNER",
};
/* binary search the sorted array */
private static final int findStringIndex(String[] sortedArr,String target){
int left, middle, right,rc;
left =0;
right= sortedArr.length-1;
while(left <= right){
middle = (left+right)/2;
rc= sortedArr[middle].compareTo(target);
if(rc<0){
left = middle+1;
}else if(rc >0){
right = middle -1;
}else{
return middle;
}
}
return -1;
}
private static final char AT_SIGN = '@';
public static byte[] mixed_prepare(byte[] src)
throws IOException, ParseException, UnsupportedEncodingException{
String s = new String(src, "UTF-8");
int index = s.indexOf(AT_SIGN);
StringBuffer out = new StringBuffer();
NFS4StringPrep prep = getInstance();
if(index > -1){
/* special prefixes must not be followed by suffixes! */
String prefixString = s.substring(0,index);
int i= findStringIndex(special_prefixes, prefixString);
String suffixString = s.substring(index+1, s.length());
if(i>-1 && !suffixString.equals("")){
throw new ParseException("Suffix following a special index", ParseException.INVALID_CHAR_FOUND);
}
UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
out.append(prep.nfsmxp.prepare(prefix,StringPrep.NONE));
out.append(AT_SIGN); // add the delimiter
out.append(prep.nfsmxs.prepare(suffix, StringPrep.NONE));
}else{
UCharacterIterator iter = UCharacterIterator.getInstance(s);
out.append(prep.nfsmxp.prepare(iter,StringPrep.NONE));
}
return out.toString().getBytes("UTF-8");
}
}

View File

@ -0,0 +1,172 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NamePrepTransform.java,v $
* $Date: 2003/08/21 23:42:21 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import java.util.ResourceBundle;
import com.ibm.icu.impl.ICULocaleData;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterDirection;
import com.ibm.icu.stringprep.ParseException;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.Transliterator;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class NamePrepTransform {
private static final NamePrepTransform transform = new NamePrepTransform();
private UnicodeSet labelSeparatorSet;
private UnicodeSet prohibitedSet;
private UnicodeSet unassignedSet;
private Transliterator mapTransform;
public static final int NONE = 0;
public static final int ALLOW_UNASSIGNED = 1;
private NamePrepTransform(){
// load the resource bundle
ResourceBundle bundle = ICULocaleData.getResourceBundle("com.ibm.icu.dev.test.stringprep","IDNA","rules");
String mapRules = bundle.getString("Map");
mapRules += bundle.getString("CaseMap");
mapTransform = Transliterator.createFromRules("CaseMap",mapRules,Transliterator.FORWARD);
labelSeparatorSet = new UnicodeSet(bundle.getString("LabelSeparatorSet"));
prohibitedSet = new UnicodeSet(bundle.getString("ProhibitedSet"));
unassignedSet = new UnicodeSet(bundle.getString("UnassignedSet"));
}
public static final NamePrepTransform getInstance(){
return transform;
}
public static boolean isLabelSeparator(int ch){
return transform.labelSeparatorSet.contains(ch);
}
/*
1) Map -- For each character in the input, check if it has a mapping
and, if so, replace it with its mapping.
2) Normalize -- Possibly normalize the result of step 1 using Unicode
normalization.
3) Prohibit -- Check for any characters that are not allowed in the
output. If any are found, return an error.
4) Check bidi -- Possibly check for right-to-left characters, and if
any are found, make sure that the whole string satisfies the
requirements for bidirectional strings. If the string does not
satisfy the requirements for bidirectional strings, return an
error.
[Unicode3.2] defines several bidirectional categories; each character
has one bidirectional category assigned to it. For the purposes of
the requirements below, an "RandALCat character" is a character that
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
is a character that has Unicode bidirectional category "L". Note
that there are many characters which fall in neither of the above
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
this because they have bidirectional category "EN".
In any profile that specifies bidirectional character handling, all
three of the following requirements MUST be met:
1) The characters in section 5.8 MUST be prohibited.
2) If a string contains any RandALCat character, the string MUST NOT
contain any LCat character.
3) If a string contains any RandALCat character, a RandALCat
character MUST be the first character of the string, and a
RandALCat character MUST be the last character of the string.
*/
public StringBuffer prepare(UCharacterIterator src,
int options)
throws ParseException{
return prepare(src.getText(),options);
}
private String map ( String src, int options)
throws ParseException{
// map
boolean allowUnassigned = (boolean) ((options & ALLOW_UNASSIGNED)>0);
String caseMapOut = transform.mapTransform.transliterate(src);
UCharacterIterator iter = UCharacterIterator.getInstance(caseMapOut);
int ch;
while((ch=iter.nextCodePoint())!=UCharacterIterator.DONE){
if(transform.unassignedSet.contains(ch)==true && allowUnassigned ==false){
throw new ParseException("An unassigned code point was found in the input",
ParseException.UNASSIGNED_ERROR);
}
}
return caseMapOut;
}
public StringBuffer prepare(String src,int options)
throws ParseException{
int ch;
String mapOut = map(src,options);
UCharacterIterator iter = UCharacterIterator.getInstance(mapOut);
int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
int rtlPos=-1, ltrPos=-1;
boolean rightToLeft=false, leftToRight=false;
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
if(transform.prohibitedSet.contains(ch)==true){
throw new ParseException("A prohibited code point was found in the input",
ParseException.PROHIBITED_ERROR,
iter.getText(),iter.getIndex());
}
direction = UCharacter.getDirection(ch);
if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
if(direction == UCharacterDirection.LEFT_TO_RIGHT){
leftToRight = true;
ltrPos = iter.getIndex()-1;
}
if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
rightToLeft = true;
rtlPos = iter.getIndex()-1;
}
}
// satisfy 2
if( leftToRight == true && rightToLeft == true){
throw new ParseException("The input does not conform to the rules for BiDi code points.",
ParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
}
//satisfy 3
if( rightToLeft == true &&
!((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
){
throw new ParseException("The input does not conform to the rules for BiDi code points.",
ParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
}
return new StringBuffer(mapOut);
}
}

View File

@ -0,0 +1,388 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/PunycodeReference.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
/*
*
Disclaimer and license
Regarding this entire document or any portion of it (including
the pseudocode and C code), the author makes no guarantees and
is not responsible for any damage resulting from its use. The
author grants irrevocable permission to anyone to use, modify,
and distribute it in any way that does not diminish the rights
of anyone else to use, modify, and distribute it, provided that
redistributed derivative works do not contain misleading author or
version information. Derivative works need not be licensed under
similar terms.
punycode.c 0.4.0 (2001-Nov-17-Sat)
http://www.cs.berkeley.edu/~amc/idn/
Adam M. Costello
http://www.nicemice.net/amc/
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.stringprep.ParseException;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UTF16;
/**
* The implementation is direct port of C code in the RFC
*/
public final class PunycodeReference {
/*** punycode status codes */
public static final int punycode_success=0;
public static final int punycode_bad_input=1; /* Input is invalid. */
public static final int punycode_big_output=2; /* Output would exceed the space provided. */
public static final int punycode_overflow =3; /* Input needs wider integers to process. */
/*** Bootstring parameters for Punycode ***/
private static final int base = 36;
private static final int tmin = 1;
private static final int tmax = 26;
private static final int skew = 38;
private static final int damp = 700;
private static final int initial_bias = 72;
private static final int initial_n = 0x80;
private static final int delimiter = 0x2D;
private static final long UNSIGNED_INT_MASK = 0xffffffffL;
/* basic(cp) tests whether cp is a basic code point: */
private static boolean basic(int cp){
return (char)(cp) < 0x80;
}
/* delim(cp) tests whether cp is a delimiter: */
private static boolean delim(int cp){
return ((cp) == delimiter);
}
/* decode_digit(cp) returns the numeric value of a basic code */
/* point (for use in representing integers) in the range 0 to */
/* base-1, or base if cp is does not represent a value. */
private static int decode_digit(int cp)
{
return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
cp - 97 < 26 ? cp - 97 : base;
}
/* encode_digit(d,flag) returns the basic code point whose value */
/* (when used for representing integers) is d, which needs to be in */
/* the range 0 to base-1. The lowercase form is used unless flag is */
/* nonzero, in which case the uppercase form is used. The behavior */
/* is undefined if flag is nonzero and digit d has no uppercase form. */
private static char encode_digit(int d, int flag)
{
return (char) (d + 22 + (75 * ((d < 26) ? 1 : 0) - (((flag != 0) ? 1 :0) << 5)));
/* 0..25 map to ASCII a..z or A..Z */
/* 26..35 map to ASCII 0..9 */
}
/* flagged(bcp) tests whether a basic code point is flagged */
/* (uppercase). The behavior is undefined if bcp is not a */
/* basic code point. */
private static boolean flagged(int bcp){
return ((bcp) - 65 < 26);
}
/* encode_basic(bcp,flag) forces a basic code point to lowercase */
/* if flag is zero, uppercase if flag is nonzero, and returns */
/* the resulting code point. The code point is unchanged if it */
/* is caseless. The behavior is undefined if bcp is not a basic */
/* code point. */
private static char encode_basic(int bcp, int flag)
{
bcp -= (((bcp - 97) < 26) ? 1 :0 ) << 5;
boolean mybcp = (bcp - 65 < 26);
return (char) (bcp + (((flag==0) && mybcp ) ? 1 : 0 ) << 5);
}
/*** Platform-specific constants ***/
/* maxint is the maximum value of a punycode_uint variable: */
private static long maxint = 0xFFFFFFFFL;
/* Because maxint is unsigned, -1 becomes the maximum value. */
/*** Bias adaptation function ***/
private static int adapt(int delta, int numpoints, boolean firsttime ){
int k;
delta = (firsttime==true) ? delta / damp : delta >> 1;
/* delta >> 1 is a faster way of doing delta / 2 */
delta += delta / numpoints;
for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
delta /= base - tmin;
}
return k + (base - tmin + 1) * delta / (delta + skew);
}
/*** Main encode function ***/
public static final int encode( int input_length,
int input[],
char[] case_flags,
int[] output_length,
char output[] ){
int delta, h, b, out, max_out, bias, j, q, k, t;
long m,n;
/* Initialize the state: */
n = initial_n;
delta = out = 0;
max_out = output_length[0];
bias = initial_bias;
/* Handle the basic code points: */
for (j = 0; j < input_length; ++j) {
if (basic(input[j])) {
if (max_out - out < 2) return punycode_big_output;
output[out++] = (char)
(case_flags!=null ? encode_basic(input[j], case_flags[j]) : input[j]);
}
/* else if (input[j] < n) return punycode_bad_input; */
/* (not needed for Punycode with unsigned code points) */
}
h = b = out;
/* h is the number of code points that have been handled, b is the */
/* number of basic code points, and out is the number of characters */
/* that have been output. */
if (b > 0) output[out++] = delimiter;
/* Main encoding loop: */
while (h < input_length) {
/* All non-basic code points < n have been */
/* handled already. Find the next larger one: */
for (m = maxint, j = 0; j < input_length; ++j) {
/* if (basic(input[j])) continue; */
/* (not needed for Punycode) */
if (input[j] >= n && input[j] < m) m = input[j];
}
/* Increase delta enough to advance the decoder's */
/* <n,i> state to <m,0>, but guard against overflow: */
if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_length; ++j) {
/* Punycode does not need to check whether input[j] is basic: */
if (input[j] < n /* || basic(input[j]) */ ) {
if (++delta == 0) return punycode_overflow;
}
if (input[j] == n) {
/* Represent delta as a generalized variable-length integer: */
for (q = delta, k = base; ; k += base) {
if (out >= max_out) return punycode_big_output;
t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
k >= bias + tmax ? tmax : k - bias;
if (q < t) break;
output[out++] = encode_digit(t + (q - t) % (base - t), 0);
q = (q - t) / (base - t);
}
output[out++] = encode_digit(q, (case_flags !=null) ? case_flags[j] : 0);
bias = adapt(delta, h + 1, (h == b));
delta = 0;
++h;
}
}
++delta;
++n;
}
output_length[0] = out;
return punycode_success;
}
public static final StringBuffer encode(StringBuffer input,char[] case_flags)
throws ParseException{
int[] in = new int[input.length()];
int inLen = 0;
int ch;
StringBuffer result = new StringBuffer();
UCharacterIterator iter = UCharacterIterator.getInstance(input);
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
in[inLen++]=ch;
}
int[] outLen = new int[1];
outLen[0] = input.length()*4;
char[] output = new char[outLen[0]];
int rc = punycode_success;
for(;;){
rc = encode(inLen,in,case_flags, outLen, output);
if(rc==punycode_big_output){
outLen[0] = outLen[0]*4;
output = new char[outLen[0]];
// continue to convert
continue;
}
break;
}
if(rc==punycode_success){
return result.append(output,0,outLen[0]);
}
getException(rc);
return result;
}
private static void getException(int rc)
throws ParseException{
switch(rc){
case punycode_big_output:
throw new ParseException("The output capacity was not sufficient.",ParseException.BUFFER_OVERFLOW_ERROR);
case punycode_bad_input:
throw new ParseException("Illegal char found in the input",ParseException.ILLEGAL_CHAR_FOUND);
case punycode_overflow:
throw new ParseException("Invalid char found in the input",ParseException.INVALID_CHAR_FOUND);
}
}
private static final int MAX_BUFFER_SIZE = 100;
public static final StringBuffer decode(StringBuffer input,char[] case_flags)
throws ParseException{
char[] in = input.toString().toCharArray();
int[] outLen = new int[1];
outLen[0] = MAX_BUFFER_SIZE;
int[] output = new int[outLen[0]];
int rc = punycode_success;
StringBuffer result = new StringBuffer();
for(;;){
rc = decode(input.length(),in, outLen, output,case_flags);
if(rc==punycode_big_output){
outLen[0] = output.length * 4;
output = new int[outLen[0]];
continue;
}
break;
}
if(rc==punycode_success){
for(int i=0; i < outLen[0]; i++ ){
UTF16.append(result,output[i]);
}
}else{
getException(rc);
}
return result;
}
/*** Main decode function ***/
public static final int decode(int input_length,
char[] input,
int[] output_length,
int[] output,
char[] case_flags ){
int n, out, i, max_out, bias,
b, j, in, oldi, w, k, digit, t;
/* Initialize the state: */
n = initial_n;
out = i = 0;
max_out = output_length[0];
bias = initial_bias;
/* Handle the basic code points: Let b be the number of input code */
/* points before the last delimiter, or 0 if there is none, then */
/* copy the first b code points to the output. */
for (b = j = 0; j < input_length; ++j){
if (delim(input[j])==true){
b = j;
}
}
if (b > max_out) return punycode_big_output;
for (j = 0; j < b; ++j) {
if (case_flags != null) case_flags[out] = (char)(flagged(input[j]) ? 1 : 0);
if (!basic(input[j])) return punycode_bad_input;
output[out++] = input[j];
}
/* Main decoding loop: Start just after the last delimiter if any */
/* basic code points were copied; start at the beginning otherwise. */
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
/* in is the index of the next character to be consumed, and */
/* out is the number of code points in the output array. */
/* Decode a generalized variable-length integer into delta, */
/* which gets added to i. The overflow checking is easier */
/* if we increase i as we go, then subtract off its starting */
/* value at the end to obtain delta. */
for (oldi = i, w = 1, k = base; ; k += base) {
if (in >= input_length) return punycode_bad_input;
digit = decode_digit(input[in++]);
if (digit >= base) return punycode_bad_input;
if (digit > (maxint - i) / w) return punycode_overflow;
i += digit * w;
t = (k <= bias) /* + tmin */ ? tmin : /* +tmin not needed */
(k >= (bias + tmax)) ? tmax : k - bias;
if (digit < t) break;
if (w > maxint / (base - t)) return punycode_overflow;
w *= (base - t);
}
bias = adapt(i - oldi, out + 1, (oldi == 0));
/* i was supposed to wrap around from out+1 to 0, */
/* incrementing n each time, so we'll fix that now: */
if (i / (out + 1) > maxint - n) return punycode_overflow;
n += i / (out + 1);
i %= (out + 1);
/* Insert n at position i of the output: */
/* not needed for Punycode: */
/* if (decode_digit(n) <= base) return punycode_invalid_input; */
if (out >= max_out) return punycode_big_output;
if (case_flags != null) {
System.arraycopy(case_flags, i, case_flags, i + 1, out - i);
/* Case of last character determines uppercase flag: */
case_flags[i] = (char)(flagged(input[in - 1]) ? 0 :1);
}
System.arraycopy(output, i, output, i + 1, (out - i));
output[i++] = n;
}
output_length[0] = out;
return punycode_success;
}
}

View File

@ -0,0 +1,42 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestAll.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.dev.test.TestFmwk.TestGroup;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class TestAll extends TestGroup {
public static void main(String[] args) throws Exception {
new TestAll().run(args);
}
public TestAll() {
super(
new String[] {
"TestIDNA",
"TestStringPrep",
"TestIDNARef"
},
"StringPrep and IDNA test");
}
public static final String CLASS_TARGET_NAME = "StringPrep";
}

View File

@ -0,0 +1,631 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestData.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.stringprep.IDNA;
import com.ibm.icu.stringprep.ParseException;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class TestData {
public static final char[][] unicodeIn ={
{
0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F
},
{
0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587,
},
{
0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
0x0065, 0x0073, 0x006B, 0x0079,
},
{
0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
0x05D1, 0x05E8, 0x05D9, 0x05EA,
},
{
0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
0x0939, 0x0948, 0x0902,
},
{
0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B,
},
/*
{
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C,
},
*/
{
0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
0x0438,
},
{
0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
0x0061, 0x00F1, 0x006F, 0x006C,
},
{
0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587,
},
{
0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
0x0056, 0x0069, 0x1EC7, 0x0074,
},
{
0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F,
},
{
0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053,
},
{
0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240,
},
{
0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032,
},
{
0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
0x308B, 0x0035, 0x79D2, 0x524D,
},
{
0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0,
},
{
0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067,
},
// test non-BMP code points
{
0xD800, 0xDF00, 0xD800, 0xDF01, 0xD800, 0xDF02, 0xD800, 0xDF03, 0xD800, 0xDF05,
0xD800, 0xDF06, 0xD800, 0xDF07, 0xD800, 0xDF09, 0xD800, 0xDF0A, 0xD800, 0xDF0B,
},
{
0xD800, 0xDF0D, 0xD800, 0xDF0C, 0xD800, 0xDF1E, 0xD800, 0xDF0F, 0xD800, 0xDF16,
0xD800, 0xDF15, 0xD800, 0xDF14, 0xD800, 0xDF12, 0xD800, 0xDF10, 0xD800, 0xDF20,
0xD800, 0xDF21,
},
// Greek
{
0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac
},
// Maltese
{
0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
0x0127, 0x0061
},
// Russian
{
0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
0x0441, 0x0441, 0x043a, 0x0438
},
};
public static final String[] asciiIn = {
"xn--egbpdaj6bu4bxfgehfvwxn",
"xn--ihqwcrb4cv8a8dqg056pqjye",
"xn--Proprostnemluvesky-uyb24dma41a",
"xn--4dbcagdahymbxekheh6e0a7fei0b",
"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
/* "xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",*/
"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l",
"xn--PorqunopuedensimplementehablarenEspaol-fmd56a",
"xn--ihqwctvzc91f659drss3x8bo0yb",
"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
"xn--3B-ww4c5e180e575a65lsy2b",
"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b",
"xn--2-u9tlzr9756bt3uc0v",
"xn--MajiKoi5-783gue6qz075azm5e",
"xn--de-jg4avhby1noc0d",
"xn--d9juau41awczczp",
"XN--097CCDEKGHQJK",
"XN--db8CBHEJLGH4E0AL",
"xn--hxargifdar", // Greek
"xn--bonusaa-5bb1da", // Maltese
"xn--b1abfaaepdrnnbgefbadotcwatmq2g4l", // Russian (Cyrillic)
};
public static final String[] domainNames = {
"slip129-37-118-146.nc.us.ibm.net",
"saratoga.pe.utexas.edu",
"dial-120-45.ots.utexas.edu",
"woo-085.dorms.waller.net",
"hd30-049.hil.compuserve.com",
"pem203-31.pe.ttu.edu",
"56K-227.MaxTNT3.pdq.net",
"dial-36-2.ots.utexas.edu",
"slip129-37-23-152.ga.us.ibm.net",
"ts45ip119.cadvision.com",
"sdn-ts-004txaustP05.dialsprint.net",
"bar-tnt1s66.erols.com",
"101.st-louis-15.mo.dial-access.att.net",
"h92-245.Arco.COM",
"dial-13-2.ots.utexas.edu",
"net-redynet29.datamarkets.com.ar",
"ccs-shiva28.reacciun.net.ve",
"7.houston-11.tx.dial-access.att.net",
"ingw129-37-120-26.mo.us.ibm.net",
"dialup6.austintx.com",
"dns2.tpao.gov.tr",
"slip129-37-119-194.nc.us.ibm.net",
"cs7.dillons.co.uk.203.119.193.in-addr.arpa",
"swprd1.innovplace.saskatoon.sk.ca",
"bikini.bologna.maraut.it",
"node91.subnet159-198-79.baxter.com",
"cust19.max5.new-york.ny.ms.uu.net",
"balexander.slip.andrew.cmu.edu",
"pool029.max2.denver.co.dynip.alter.net",
"cust49.max9.new-york.ny.ms.uu.net",
"s61.abq-dialin2.hollyberry.com",
};
public static final String[] domainNames1Uni = {
"http://\u0917\u0928\u0947\u0936.sanjose.ibm.com",
"www.\u0121.com",
"www.\u00E0\u00B3\u00AF.com",
"www.\u00C2\u00A4.com",
"www.\u00C2\u00A3.com",
"\u0025",
"\u005C\u005C",
"@",
"\u002F",
"www.\u0021.com",
"www.\u0024.com",
"\u003f",
// These yeild U_IDNA_PROHIBITED_ERROR
//"\\u00CF\\u0082.com",
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
//"\\u00E2\\u0098\\u00BA.com",
"\u00C3\u00BC.com"
};
public static final String[] domainNamesToASCIIOut = {
"xn--http://-3mo7iufsh.sanjose.ibm.com",
"www.xn--vea.com",
"www.xn--3 -iia80t.com",
"www.xn--bba7j.com",
"www.xn--9a9j.com",
"\u0025",
"\u005C\u005C",
"@",
"\u002F",
"www.\u0021.com",
"www.\u0024.com",
"\u003f",
"xn--14-ria7423a.com"
};
public static final String[] domainNamesToUnicodeOut = {
"http://\u0917\u0928\u0947\u0936.sanjose.ibm.com",
"www.\u0121.com",
"www.\u00E0\u0033\u0020\u0304.com",
"www.\u00E2\u00A4.com",
"www.\u00E2\u00A3.com",
"\u0025",
"\u005C\u005C",
"@",
"\u002F",
"www.\u0021.com",
"www.\u0024.com",
"\u003f",
"\u00E3\u0031\u2044\u0034.com"
};
public static class ErrorCase{
public char[] unicode;
public String ascii;
public Exception expected;
public boolean useSTD3ASCIIRules;
public boolean testToUnicode;
public boolean testLabel;
ErrorCase(char[] uniIn, String asciiIn, Exception ex,
boolean std3, boolean testToUni, boolean testlabel){
unicode = uniIn;
ascii = asciiIn;
expected = ex;
useSTD3ASCIIRules = std3;
testToUnicode = testToUni;
testLabel = testlabel;
}
};
public static final ErrorCase[] errorCases = {
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x070F,/*prohibited*/
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.XN--8mb5595fsoa28orucya378bqre2tcwop06c5qbw82a1rffmae0361dea96b.com",
new ParseException("",ParseException.PROHIBITED_ERROR),
false, true, true),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x0221, 0x0234/*Unassigned code points*/,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.XN--6lA2Bz548Fj1GuA391Bf1Gb1N59Ab29A7iA.com",
new ParseException("",ParseException.UNASSIGNED_ERROR),
false, true, true
),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x0644, 0x064A, 0x0647,/*Arabic code points. Cannot mix RTL with LTR*/
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.xn--ghBGI4851OiyA33VqrD6Az86C4qF83CtRv93D5xBk15AzfG0nAgA0578DeA71C.com",
new ParseException("",ParseException.CHECK_BIDI_ERROR),
false, true, true
),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
/* labels cannot begin with an HYPHEN */
0x002D, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x002E,
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.xn----b95Ew8SqA315Ao5FbuMlnNmhA.com",
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
true, true, false
),
new ErrorCase( new char[]{
/* correct ACE-prefix followed by unicode */
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0x0078, 0x006e, 0x002d,0x002d, /* ACE Prefix */
0x002D, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x002D,
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
/* wrong ACE-prefix followed by valid ACE-encoded ASCII */
"www.XY-----b91I0V65S96C2A355Cw1E5yCeQr19CsnP1mFfmAE0361DeA96B.com",
new ParseException("",ParseException.ACE_PREFIX_ERROR),
false, false, false
),
/* cannot verify U_IDNA_VERIFICATION_ERROR */
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.xn--989AoMsVi5E83Db1D2A355Cv1E0vAk1DwRv93D5xBh15A0Dt30A5JpSD879Ccm6FeA98C.com",
new ParseException("",ParseException.LABEL_TOO_LONG_ERROR),
false, true, true
),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0x0030, 0x0644, 0x064A, 0x0647, 0x0031, /* Arabic code points squashed between EN codepoints */
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
},
"www.xn--01-tvdmo.com",
new ParseException("",ParseException.CHECK_BIDI_ERROR),
false, true, true
),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, // www.
0x206C, 0x0644, 0x064A, 0x0647, 0x206D, // Arabic code points squashed between BN codepoints
0x002e, 0x0063, 0x006f, 0x006d, // com.
},
"www.XN--ghbgi278xia.com",
new ParseException("",ParseException.PROHIBITED_ERROR),
false, true, true
),
new ErrorCase( new char[] {
0x0077, 0x0077, 0x0077, 0x002e, // www.
0x002D, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, // HYPHEN at the start of label
0x002e, 0x0063, 0x006f, 0x006d, // com.
},
"www.-abcde.com",
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
true, true, false
),
new ErrorCase( new char[] {
0x0077, 0x0077, 0x0077, 0x002e, // www.
0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x002D, // HYPHEN at the end of the label
0x002e, 0x0063, 0x006f, 0x006d, // com.
},
"www.abcde-.com",
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
true, true, false
),
new ErrorCase( new char[]{
0x0077, 0x0077, 0x0077, 0x002e, // www.
0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x0040, // Containing non LDH code point
0x002e, 0x0063, 0x006f, 0x006d, // com.
},
"www.abcde@.com",
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
true, true, false
),
};
public static final class ConformanceTestCase{
String comment;
String input;
String output;
String profile;
int flags;
Exception expected;
private static byte[] getBytes(String in){
if(in==null){
return null;
}
byte[] bytes = new byte[in.length()];
for(int i=0; i < in.length();i++){
bytes[i] = (byte)in.charAt(i);
}
return bytes;
}
ConformanceTestCase(String comt, String in, String out,
String prof, int flg, Exception ex)
{
try{
comment = comt;
byte[] bytes = getBytes(in);
input = new String(bytes,"UTF-8");
bytes = getBytes(out);
output = (bytes==null)? null : new String(bytes,"UTF-8");
profile = prof;
flags = flg;
expected = ex;
}catch (Exception e){
e.printStackTrace();
throw new RuntimeException();
}
}
}
public static final ConformanceTestCase[] conformanceTestCases =
{
new ConformanceTestCase(
"Case folding ASCII U+0043 U+0041 U+0046 U+0045",
"\u0043\u0041\u0046\u0045", "\u0063\u0061\u0066\u0065",
"Nameprep", IDNA.DEFAULT,
null
),
new ConformanceTestCase(
"Case folding 8bit U+00DF (german sharp s)",
"\u00C3\u009F", "\u0073\u0073",
"Nameprep", IDNA.DEFAULT,
null
),
new ConformanceTestCase(
"Non-ASCII multibyte space character U+1680",
"\u00E1\u009A\u0080", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Non-ASCII 8bit control character U+0085",
"\u00C2\u0085", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Non-ASCII multibyte control character U+180E",
"\u00E1\u00A0\u008E", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Non-ASCII control character U+1D175",
"\u00F0\u009D\u0085\u00B5", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Plane 0 private use character U+F123",
"\u00EF\u0084\u00A3", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Plane 15 private use character U+F1234",
"\u00F3\u00B1\u0088\u00B4", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Plane 16 private use character U+10F234",
"\u00F4\u008F\u0088\u00B4", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Non-character code point U+8FFFE",
"\u00F2\u008F\u00BF\u00BE", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Non-character code point U+10FFFF",
"\u00F4\u008F\u00BF\u00BF", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
/*
{
"Surrogate code U+DF42",
"\u00ED\u00BD\u0082", null, "Nameprep", InternationalizedDomainNames.DEFAULT,
U_IDNA_PROHIBITED_ERROR
},
*/
new ConformanceTestCase(
"Non-plain text character U+FFFD",
"\u00EF\u00BF\u00BD", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Ideographic description character U+2FF5",
"\u00E2\u00BF\u00B5", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Display property character U+0341",
"\u00CD\u0081", "\u00CC\u0081",
"Nameprep", IDNA.DEFAULT,
null
),
new ConformanceTestCase(
"Left-to-right mark U+200E",
"\u00E2\u0080\u008E", "\u00CC\u0081",
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Deprecated U+202A",
"\u00E2\u0080\u00AA", "\u00CC\u0081",
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Language tagging character U+E0001",
"\u00F3\u00A0\u0080\u0081", "\u00CC\u0081",
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Language tagging character U+E0042",
"\u00F3\u00A0\u0081\u0082", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.PROHIBITED_ERROR)
),
new ConformanceTestCase(
"Bidi: RandALCat character U+05BE and LCat characters",
"\u0066\u006F\u006F\u00D6\u00BE\u0062\u0061\u0072", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.CHECK_BIDI_ERROR)
),
new ConformanceTestCase(
"Bidi: RandALCat character U+FD50 and LCat characters",
"\u0066\u006F\u006F\u00EF\u00B5\u0090\u0062\u0061\u0072", null,
"Nameprep",IDNA.DEFAULT ,
new ParseException("",ParseException.CHECK_BIDI_ERROR)
),
new ConformanceTestCase(
"Bidi: RandALCat character U+FB38 and LCat characters",
"\u0066\u006F\u006F\u00EF\u00B9\u00B6\u0062\u0061\u0072", "\u0066\u006F\u006F \u00d9\u008e\u0062\u0061\u0072",
"Nameprep", IDNA.DEFAULT,
null
),
new ConformanceTestCase(
"Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
"\u00D8\u00A7\u0031", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.CHECK_BIDI_ERROR)
),
new ConformanceTestCase(
"Bidi: RandALCat character U+0627 U+0031 U+0628",
"\u00D8\u00A7\u0031\u00D8\u00A8", "\u00D8\u00A7\u0031\u00D8\u00A8",
"Nameprep", IDNA.DEFAULT,
null
),
new ConformanceTestCase(
"Unassigned code point U+E0002",
"\u00F3\u00A0\u0080\u0082", null,
"Nameprep", IDNA.DEFAULT,
new ParseException("",ParseException.UNASSIGNED_ERROR)
),
/* // Invalid UTF-8
{
"Larger test (shrinking)",
"X\u00C2\u00AD\u00C3\u00DF\u00C4\u00B0\u00E2\u0084\u00A1\u006a\u00cc\u008c\u00c2\u00a0\u00c2"
"\u00aa\u00ce\u00b0\u00e2\u0080\u0080", "xssi\u00cc\u0087""tel\u00c7\u00b0 a\u00ce\u00b0 ",
"Nameprep",
InternationalizedDomainNames.DEFAULT, U_ZERO_ERROR
},
{
"Larger test (expanding)",
"X\u00C3\u00DF\u00e3\u008c\u0096\u00C4\u00B0\u00E2\u0084\u00A1\u00E2\u0092\u009F\u00E3\u008c\u0080",
"xss\u00e3\u0082\u00ad\u00e3\u0083\u00ad\u00e3\u0083\u00a1\u00e3\u0083\u00bc\u00e3\u0083\u0088"
"\u00e3\u0083\u00ab""i\u00cc\u0087""tel\u0028""d\u0029\u00e3\u0082\u00a2\u00e3\u0083\u0091"
"\u00e3\u0083\u00bc\u00e3\u0083\u0088"
"Nameprep",
InternationalizedDomainNames.DEFAULT, U_ZERO_ERROR
},
*/
};
}

View File

@ -0,0 +1,700 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java,v $
* $Date: 2003/08/21 23:42:21 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import java.io.InputStream;
import java.util.Random;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.stringprep.IDNA;
import com.ibm.icu.stringprep.StringPrep;
import com.ibm.icu.stringprep.ParseException;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.impl.Utility;
/**
* @author ram
*/
public class TestIDNA extends TestFmwk {
public static void main(String[] args) throws Exception {
new TestIDNA().run(args);
}
private ParseException unassignedException = new ParseException("",ParseException.UNASSIGNED_ERROR);
public void TestToUnicode() throws Exception{
for(int i=0; i<TestData.asciiIn.length; i++){
// test StringBuffer toUnicode
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.DEFAULT, null);
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.ALLOW_UNASSIGNED, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES|IDNA.ALLOW_UNASSIGNED, null);
}
}
public void TestToASCII() throws Exception{
for(int i=0; i<TestData.asciiIn.length; i++){
// test StringBuffer toUnicode
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNA.DEFAULT, null);
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNA.ALLOW_UNASSIGNED, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES|IDNA.ALLOW_UNASSIGNED, null);
}
}
public void TestIDNToASCII() throws Exception{
for(int i=0; i<TestData.domainNames.length; i++){
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.DEFAULT, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.USE_STD3_RULES, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED|IDNA.USE_STD3_RULES, null);
}
for(int i=0; i<TestData.domainNames1Uni.length; i++){
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNA.DEFAULT, null);
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNA.ALLOW_UNASSIGNED, null);
}
}
public void TestIDNToUnicode() throws Exception{
for(int i=0; i<TestData.domainNames.length; i++){
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.DEFAULT, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.USE_STD3_RULES, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED|IDNA.USE_STD3_RULES, null);
}
for(int i=0; i<TestData.domainNamesToASCIIOut.length; i++){
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNA.DEFAULT, null);
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNA.ALLOW_UNASSIGNED, null);
}
}
private void doTestToUnicode(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNA.convertToUnicode(src,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertToUnicode(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertToUnicode(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("Did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
}
private void doTestIDNToUnicode(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNA.convertIDNToUnicode(src,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertIDNToUnicode(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertIDNToUnicode(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("Did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
private void doTestToASCII(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNA.convertToASCII(src,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +"\n Got: "+ ex.toString() +"\n Expected: " +ex.toString());
}
}
try{
StringBuffer out = IDNA.convertToASCII(inBuf,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertToASCII(inIter,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+ out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
private void doTestIDNToASCII(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNA.convertIDNToASCII(src,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToIDNASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToIDNASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToIDNASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertIDNtoASCII(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToIDNASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToIDNASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToIDNASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNA.convertIDNtoASCII(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertIDNToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+ out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertIDNToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertIDNToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
public void TestConformance()throws Exception{
for(int i=0; i<TestData.conformanceTestCases.length;i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
if(testCase.expected != null){
//Test toASCII
doTestToASCII(testCase.input,testCase.output,IDNA.DEFAULT,testCase.expected);
doTestToASCII(testCase.input,testCase.output,IDNA.ALLOW_UNASSIGNED,testCase.expected);
}
//Test toUnicode
//doTestToUnicode(testCase.input,testCase.output,IDNA.DEFAULT,testCase.expected);
}
}
public void TestNamePrepConformance() throws Exception{
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
StringPrep namePrep = StringPrep.getInstance(stream);
for(int i=0; i<TestData.conformanceTestCases.length;i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
try{
StringBuffer output = namePrep.prepare(iter,StringPrep.NONE);
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
" Got: "+ prettify(output) );
}
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(testCase.expected == null || !ex.equals(testCase.expected)){
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
}
}
try{
iter.setToStart();
StringBuffer output = namePrep.prepare(iter,StringPrep.ALLOW_UNASSIGNED);
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
" Got: "+ prettify(output) );
}
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(testCase.expected == null || !ex.equals(testCase.expected)){
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
}
}
}
}
public void TestErrorCases() throws Exception{
for(int i=0; i < TestData.errorCases.length; i++){
TestData.ErrorCase errCase = TestData.errorCases[i];
if(errCase.testLabel==true){
// Test ToASCII
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.DEFAULT,errCase.expected);
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.ALLOW_UNASSIGNED,errCase.expected);
if(errCase.useSTD3ASCIIRules){
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.USE_STD3_RULES,errCase.expected);
}
}
if(errCase.useSTD3ASCIIRules!=true){
// Test IDNToASCII
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.DEFAULT,errCase.expected);
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.ALLOW_UNASSIGNED,errCase.expected);
}else{
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.USE_STD3_RULES,errCase.expected);
}
//TestToUnicode
if(errCase.testToUnicode==true){
if(errCase.useSTD3ASCIIRules!=true){
// Test IDNToUnicode
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.DEFAULT,errCase.expected);
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.ALLOW_UNASSIGNED,errCase.expected);
}else{
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.USE_STD3_RULES,errCase.expected);
}
}
}
}
private void doTestCompare(String s1, String s2, boolean isEqual){
try{
int retVal = IDNA.compare(s1,s2,IDNA.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNA.compare(new StringBuffer(s1), new StringBuffer(s2), IDNA.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNA.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNA.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
}catch(Exception e){
e.printStackTrace();
errln("Unexpected exception thrown by IDNA.compare");
}
try{
int retVal = IDNA.compare(s1,s2,IDNA.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNA.compare(new StringBuffer(s1), new StringBuffer(s2), IDNA.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNA.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNA.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
}catch(Exception e){
errln("Unexpected exception thrown by IDNA.compare");
}
}
public void TestCompare() throws Exception{
String www = "www.";
String com = ".com";
StringBuffer source = new StringBuffer(www);
StringBuffer uni0 = new StringBuffer(www);
StringBuffer uni1 = new StringBuffer(www);
StringBuffer ascii0 = new StringBuffer(www);
StringBuffer ascii1 = new StringBuffer(www);
uni0.append(TestData.unicodeIn[0]);
uni0.append(com);
uni1.append(TestData.unicodeIn[1]);
uni1.append(com);
ascii0.append(TestData.asciiIn[0]);
ascii0.append(com);
ascii1.append(TestData.asciiIn[1]);
ascii1.append(com);
for(int i=0;i< TestData.unicodeIn.length; i++){
// for every entry in unicodeIn array
// prepend www. and append .com
source.setLength(4);
source.append(TestData.unicodeIn[i]);
source.append(com);
// a) compare it with itself
doTestCompare(source.toString(),source.toString(),true);
// b) compare it with asciiIn equivalent
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
// c) compare it with unicodeIn not equivalent
if(i==0){
doTestCompare(source.toString(), uni1.toString(), false);
}else{
doTestCompare(source.toString(),uni0.toString(), false);
}
// d) compare it with asciiIn not equivalent
if(i==0){
doTestCompare(source.toString(),ascii1.toString(), false);
}else{
doTestCompare(source.toString(),ascii0.toString(), false);
}
}
}
// test and ascertain
// func(func(func(src))) == func(src)
public void doTestChainingToASCII(String source)throws Exception{
StringBuffer expected;
StringBuffer chained;
// test convertIDNToASCII
expected = IDNA.convertIDNToASCII(source,IDNA.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNA.convertIDNtoASCII(chained,IDNA.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertIDNToASCII");
}
// test convertIDNToA
expected = IDNA.convertToASCII(source,IDNA.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNA.convertToASCII(chained,IDNA.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertToASCII");
}
}
// test and ascertain
// func(func(func(src))) == func(src)
public void doTestChainingToUnicode(String source)throws Exception{
StringBuffer expected;
StringBuffer chained;
// test convertIDNToUnicode
expected = IDNA.convertIDNToUnicode(source,IDNA.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNA.convertIDNToUnicode(chained,IDNA.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertIDNToUnicode");
}
// test convertIDNToA
expected = IDNA.convertToUnicode(source,IDNA.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNA.convertToUnicode(chained,IDNA.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertToUnicode");
}
}
public void TestChaining() throws Exception{
for(int i=0; i< TestData.asciiIn.length; i++){
doTestChainingToUnicode(TestData.asciiIn[i]);
}
for(int i=0; i< TestData.unicodeIn.length; i++){
doTestChainingToASCII(new String(TestData.unicodeIn[i]));
}
}
public void TestRootLabelSeparator() throws Exception{
String www = "www.";
String com = ".com."; /*root label separator*/
StringBuffer source = new StringBuffer(www);
StringBuffer uni0 = new StringBuffer(www);
StringBuffer uni1 = new StringBuffer(www);
StringBuffer ascii0 = new StringBuffer(www);
StringBuffer ascii1 = new StringBuffer(www);
uni0.append(TestData.unicodeIn[0]);
uni0.append(com);
uni1.append(TestData.unicodeIn[1]);
uni1.append(com);
ascii0.append(TestData.asciiIn[0]);
ascii0.append(com);
ascii1.append(TestData.asciiIn[1]);
ascii1.append(com);
for(int i=0;i< TestData.unicodeIn.length; i++){
// for every entry in unicodeIn array
// prepend www. and append .com
source.setLength(4);
source.append(TestData.unicodeIn[i]);
source.append(com);
// a) compare it with itself
doTestCompare(source.toString(),source.toString(),true);
// b) compare it with asciiIn equivalent
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
// c) compare it with unicodeIn not equivalent
if(i==0){
doTestCompare(source.toString(), uni1.toString(), false);
}else{
doTestCompare(source.toString(),uni0.toString(), false);
}
// d) compare it with asciiIn not equivalent
if(i==0){
doTestCompare(source.toString(),ascii1.toString(), false);
}else{
doTestCompare(source.toString(),ascii0.toString(), false);
}
}
}
private static final int loopCount = 100;
private static final int maxCharCount = 15;
private static final int maxCodePoint = 0x10ffff;
private Random random = null;
/**
* Return a random integer i where 0 <= i < n.
* A special function that gets random codepoints from planes 0,1,2 and 14
*/
private int rand_uni()
{
int retVal = (int)(random.nextLong()& 0x3FFFF);
if(retVal >= 0x30000){
retVal+=0xB0000;
}
return retVal;
}
private int randi(int n){
return (int) (random.nextInt(0x7fff) % (n+1));
}
private StringBuffer getTestSource(StringBuffer fillIn) {
// use uniform seed value from the framework
if(random==null){
random = createRandom();
}
int i = 0;
int charCount = (randi(maxCharCount) + 1);
while (i <charCount ) {
int codepoint = rand_uni();
if(codepoint == 0x0000){
continue;
}
UTF16.append(fillIn, (int)codepoint);
i++;
}
return fillIn;
}
public void MonkeyTest() throws Exception{
StringBuffer source = new StringBuffer();
/* do the monkey test */
for(int i=0; i<loopCount; i++){
source.setLength(0);
getTestSource(source);
doTestCompareReferenceImpl(source);
}
// test string with embedded null
source.append( "\\u0000\\u2109\\u3E1B\\U000E65CA\\U0001CAC5" );
source = new StringBuffer(Utility.unescape(source.toString()));
doTestCompareReferenceImpl(source);
//StringBuffer src = new StringBuffer(Utility.unescape("\\uDEE8\\U000E228C\\U0002EE8E\\U000E6350\\U00024DD9\u4049\\U000E0DE4\\U000E448C\\U0001869B\\U000E3380\\U00016A8E\\U000172D5\\U0001C408\\U000E9FB5"));
//doTestCompareReferenceImpl(src);
}
private void doTestCompareReferenceImpl(StringBuffer src) throws Exception{
StringBuffer label = src;
ParseException expected = null;
StringBuffer ascii = null;
int options = IDNA.DEFAULT;
logln("Comparing idnaref_toASCII with uidna_toASCII for input: " + prettify(label));
try{
ascii = IDNAReference.convertToASCII(label, options);
}catch( ParseException e){
expected = e;
if(e.equals(unassignedException)){
options = IDNA.ALLOW_UNASSIGNED;
expected = null;
try{
ascii = IDNAReference.convertToASCII(label, options);
}catch( ParseException ex){
expected = ex;
}
}
}
doTestToASCII(label.toString(),
(ascii == null) ? null : ascii.toString(),
options,
expected);
logln("Comparing idnaref_toUnicode with uidna_toUnicode for input: " + prettify(label));
StringBuffer uni =null;
if(expected == null){
options = IDNA.DEFAULT;
try{
uni = IDNAReference.convertToUnicode(ascii, options);
}catch( ParseException e ){
expected = e;
if(expected.equals(unassignedException)){
options = IDNA.ALLOW_UNASSIGNED;
expected = null;
try{
uni = IDNAReference.convertToUnicode(ascii, options);
}catch(ParseException ex){
expected = ex;
}
}
}
doTestToUnicode(ascii.toString(),
(uni==null)? null : uni.toString(),
options,
expected);
}
}
public void TestCompareRefImpl() throws Exception{
StringBuffer src = new StringBuffer();
for(int i = 0x40000 ; i< 0x10ffff; i++){
src.setLength(0);
if(isQuick()==true && i> 0x1FFFF){
return;
}
if(i >= 0x30000){
i+=0xB0000;
}
UTF16.append(src,i);
doTestCompareReferenceImpl(src);
}
}
}

View File

@ -0,0 +1,565 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNARef.java,v $
* $Date: 2003/08/21 23:42:27 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.stringprep.ParseException;
import com.ibm.icu.text.UCharacterIterator;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class TestIDNARef extends TestFmwk {
public static void main(String[] args) throws Exception {
new TestIDNARef().run(args);
}
private ParseException unassignedException = new ParseException("",ParseException.UNASSIGNED_ERROR);
public void TestToUnicode() throws Exception{
for(int i=0; i<TestData.asciiIn.length; i++){
// test StringBuffer toUnicode
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.DEFAULT, null);
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.ALLOW_UNASSIGNED, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES|IDNAReference.ALLOW_UNASSIGNED, null);
}
}
public void TestToASCII() throws Exception{
for(int i=0; i<TestData.asciiIn.length; i++){
// test StringBuffer toUnicode
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNAReference.DEFAULT, null);
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNAReference.ALLOW_UNASSIGNED, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES, null);
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES|IDNAReference.ALLOW_UNASSIGNED, null);
}
}
public void TestIDNToASCII() throws Exception{
for(int i=0; i<TestData.domainNames.length; i++){
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.DEFAULT, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.USE_STD3_RULES, null);
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED|IDNAReference.USE_STD3_RULES, null);
}
for(int i=0; i<TestData.domainNames1Uni.length; i++){
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNAReference.DEFAULT, null);
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNAReference.ALLOW_UNASSIGNED, null);
}
}
public void TestIDNToUnicode() throws Exception{
for(int i=0; i<TestData.domainNames.length; i++){
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.DEFAULT, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.USE_STD3_RULES, null);
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED|IDNAReference.USE_STD3_RULES, null);
}
for(int i=0; i<TestData.domainNamesToASCIIOut.length; i++){
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNAReference.DEFAULT, null);
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNAReference.ALLOW_UNASSIGNED, null);
}
}
private void doTestToUnicode(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNAReference.convertToUnicode(src,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertToUnicode(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertToUnicode(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("Did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
}
}
}
private void doTestIDNToUnicode(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNAReference.convertIDNToUnicode(src,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertIDNToUnicode(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertIDNToUnicode(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected)){
errln("convertToUnicode did not return expected result with options : "+ options +
" Expected: " + prettify(expected)+" Got: "+prettify(out));
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("Did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
private void doTestToASCII(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNAReference.convertToASCII(src,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertToASCII(inBuf,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertToASCII(inIter,options);
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+ out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !expectedException.equals(ex)){
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
private void doTestIDNToASCII(String src, String expected, int options, Object expectedException)
throws Exception{
StringBuffer inBuf = new StringBuffer(src);
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
try{
StringBuffer out = IDNAReference.convertIDNToASCII(src,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToIDNAReferenceSCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToIDNAReferenceSCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToIDNAReferenceSCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertIDNtoASCII(inBuf,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertToIDNAReferenceSCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertToIDNAReferenceSCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertToIDNAReferenceSCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
try{
StringBuffer out = IDNAReference.convertIDNtoASCII(inIter,options);
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
errln("convertIDNToASCII did not return expected result with options : "+ options +
" Expected: " + expected+" Got: "+ out);
}
if(expectedException!=null && !unassignedException.equals(expectedException)){
errln("convertIDNToASCII did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(expectedException == null || !ex.equals(expectedException)){
errln("convertIDNToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
}
}
}
public void TestConformance()throws Exception{
for(int i=0; i<TestData.conformanceTestCases.length;i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
if(testCase.expected != null){
//Test toASCII
doTestToASCII(testCase.input,testCase.output,IDNAReference.DEFAULT,testCase.expected);
doTestToASCII(testCase.input,testCase.output,IDNAReference.ALLOW_UNASSIGNED,testCase.expected);
}
//Test toUnicode
//doTestToUnicode(testCase.input,testCase.output,IDNAReference.DEFAULT,testCase.expected);
}
}
public void TestNamePrepConformance() throws Exception{
NamePrepTransform namePrep = NamePrepTransform.getInstance();
for(int i=0; i<TestData.conformanceTestCases.length;i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
try{
StringBuffer output = namePrep.prepare(iter,NamePrepTransform.NONE);
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
" Got: "+ prettify(output) );
}
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(testCase.expected == null || !ex.equals(testCase.expected)){
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
}
}
try{
iter.setToStart();
StringBuffer output = namePrep.prepare(iter,NamePrepTransform.ALLOW_UNASSIGNED);
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
" Got: "+ prettify(output) );
}
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
errln("Did not get the expected exception. The operation succeeded!");
}
}catch(ParseException ex){
if(testCase.expected == null || !ex.equals(testCase.expected)){
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
}
}
}
}
public void TestErrorCases() throws Exception{
for(int i=0; i < TestData.errorCases.length; i++){
TestData.ErrorCase errCase = TestData.errorCases[i];
if(errCase.testLabel==true){
// Test ToASCII
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.DEFAULT,errCase.expected);
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
if(errCase.useSTD3ASCIIRules){
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.USE_STD3_RULES,errCase.expected);
}
}
if(errCase.useSTD3ASCIIRules!=true){
// Test IDNToASCII
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.DEFAULT,errCase.expected);
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
}else{
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.USE_STD3_RULES,errCase.expected);
}
//TestToUnicode
if(errCase.testToUnicode==true){
if(errCase.useSTD3ASCIIRules!=true){
// Test IDNToUnicode
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.DEFAULT,errCase.expected);
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
}else{
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.USE_STD3_RULES,errCase.expected);
}
}
}
}
private void doTestCompare(String s1, String s2, boolean isEqual){
try{
int retVal = IDNAReference.compare(s1,s2,IDNAReference.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNAReference.compare(new StringBuffer(s1), new StringBuffer(s2), IDNAReference.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNAReference.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNAReference.DEFAULT);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
}catch(Exception e){
e.printStackTrace();
errln("Unexpected exception thrown by IDNAReference.compare");
}
try{
int retVal = IDNAReference.compare(s1,s2,IDNAReference.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNAReference.compare(new StringBuffer(s1), new StringBuffer(s2), IDNAReference.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
retVal = IDNAReference.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNAReference.ALLOW_UNASSIGNED);
if(isEqual==true && retVal != 0){
errln("Did not get the expected result for s1: "+ prettify(s1)+
" s2: "+prettify(s2));
}
}catch(Exception e){
errln("Unexpected exception thrown by IDNAReference.compare");
}
}
public void TestCompare() throws Exception{
String www = "www.";
String com = ".com";
StringBuffer source = new StringBuffer(www);
StringBuffer uni0 = new StringBuffer(www);
StringBuffer uni1 = new StringBuffer(www);
StringBuffer ascii0 = new StringBuffer(www);
StringBuffer ascii1 = new StringBuffer(www);
uni0.append(TestData.unicodeIn[0]);
uni0.append(com);
uni1.append(TestData.unicodeIn[1]);
uni1.append(com);
ascii0.append(TestData.asciiIn[0]);
ascii0.append(com);
ascii1.append(TestData.asciiIn[1]);
ascii1.append(com);
for(int i=0;i< TestData.unicodeIn.length; i++){
// for every entry in unicodeIn array
// prepend www. and append .com
source.setLength(4);
source.append(TestData.unicodeIn[i]);
source.append(com);
// a) compare it with itself
doTestCompare(source.toString(),source.toString(),true);
// b) compare it with asciiIn equivalent
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
// c) compare it with unicodeIn not equivalent
if(i==0){
doTestCompare(source.toString(), uni1.toString(), false);
}else{
doTestCompare(source.toString(),uni0.toString(), false);
}
// d) compare it with asciiIn not equivalent
if(i==0){
doTestCompare(source.toString(),ascii1.toString(), false);
}else{
doTestCompare(source.toString(),ascii0.toString(), false);
}
}
}
// test and ascertain
// func(func(func(src))) == func(src)
public void doTestChainingToASCII(String source)throws Exception{
StringBuffer expected;
StringBuffer chained;
// test convertIDNToASCII
expected = IDNAReference.convertIDNToASCII(source,IDNAReference.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNAReference.convertIDNtoASCII(chained,IDNAReference.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertIDNToASCII");
}
// test convertIDNToA
expected = IDNAReference.convertToASCII(source,IDNAReference.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNAReference.convertToASCII(chained,IDNAReference.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertToASCII");
}
}
// test and ascertain
// func(func(func(src))) == func(src)
public void doTestChainingToUnicode(String source)throws Exception{
StringBuffer expected;
StringBuffer chained;
// test convertIDNToUnicode
expected = IDNAReference.convertIDNToUnicode(source,IDNAReference.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNAReference.convertIDNToUnicode(chained,IDNAReference.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertIDNToUnicode");
}
// test convertIDNToA
expected = IDNAReference.convertToUnicode(source,IDNAReference.DEFAULT);
chained = expected;
for(int i=0; i< 4; i++){
chained = IDNAReference.convertToUnicode(chained,IDNAReference.DEFAULT);
}
if(!expected.toString().equals(chained.toString())){
errln("Chaining test failed for convertToUnicode");
}
}
public void TestChaining() throws Exception{
for(int i=0; i< TestData.unicodeIn.length; i++){
doTestChainingToASCII(new String(TestData.unicodeIn[i]));
}
for(int i=0; i< TestData.asciiIn.length; i++){
doTestChainingToUnicode(TestData.asciiIn[i]);
}
}
public void TestRootLabelSeparator() throws Exception{
String www = "www.";
String com = ".com."; /*root label separator*/
StringBuffer source = new StringBuffer(www);
StringBuffer uni0 = new StringBuffer(www);
StringBuffer uni1 = new StringBuffer(www);
StringBuffer ascii0 = new StringBuffer(www);
StringBuffer ascii1 = new StringBuffer(www);
uni0.append(TestData.unicodeIn[0]);
uni0.append(com);
uni1.append(TestData.unicodeIn[1]);
uni1.append(com);
ascii0.append(TestData.asciiIn[0]);
ascii0.append(com);
ascii1.append(TestData.asciiIn[1]);
ascii1.append(com);
for(int i=0;i< TestData.unicodeIn.length; i++){
// for every entry in unicodeIn array
// prepend www. and append .com
source.setLength(4);
source.append(TestData.unicodeIn[i]);
source.append(com);
// a) compare it with itself
doTestCompare(source.toString(),source.toString(),true);
// b) compare it with asciiIn equivalent
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
// c) compare it with unicodeIn not equivalent
if(i==0){
doTestCompare(source.toString(), uni1.toString(), false);
}else{
doTestCompare(source.toString(),uni0.toString(), false);
}
// d) compare it with asciiIn not equivalent
if(i==0){
doTestCompare(source.toString(),ascii1.toString(), false);
}else{
doTestCompare(source.toString(),ascii0.toString(), false);
}
}
}
}

View File

@ -0,0 +1,197 @@
/*
*******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java,v $
* $Date: 2003/08/21 23:42:21 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.dev.test.stringprep;
import com.ibm.icu.dev.test.TestFmwk;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class TestStringPrep extends TestFmwk {
public static void main(String[] args) throws Exception {
new TestStringPrep().run(args);
}
/*
There are several special identifiers ("who") which need to be
understood universally, rather than in the context of a particular
DNS domain. Some of these identifiers cannot be understood when an
NFS client accesses the server, but have meaning when a local process
accesses the file. The ability to display and modify these
permissions is permitted over NFS, even if none of the access methods
on the server understands the identifiers.
Who Description
_______________________________________________________________
"OWNER" The owner of the file.
"GROUP" The group associated with the file.
"EVERYONE" The world.
"INTERACTIVE" Accessed from an interactive terminal.
"NETWORK" Accessed via the network.
"DIALUP" Accessed as a dialup user to the server.
"BATCH" Accessed from a batch job.
"ANONYMOUS" Accessed without any authentication.
"AUTHENTICATED" Any authenticated user (opposite of
ANONYMOUS)
"SERVICE" Access from a system service.
To avoid conflict, these special identifiers are distinguish by an
appended "@" and should appear in the form "xxxx@" (note: no domain
name after the "@"). For example: ANONYMOUS@.
*/
private String[] mixed_prep_data ={
"OWNER@",
"GROUP@",
"EVERYONE@",
"INTERACTIVE@",
"NETWORK@",
"DIALUP@",
"BATCH@",
"ANONYMOUS@",
"AUTHENTICATED@",
"\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D@slip129-37-118-146.nc.us.ibm.net",
"\u0936\u094d\u0930\u0940\u092e\u0926\u094d@saratoga.pe.utexas.edu",
"\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e@dial-120-45.ots.utexas.edu",
"\u0905\u0927\u094d\u092f\u093e\u092f@woo-085.dorms.waller.net",
"\u0905\u0930\u094d\u091c\u0941\u0928@hd30-049.hil.compuserve.com",
"\u0935\u093f\u0937\u093e\u0926@pem203-31.pe.ttu.edu",
"\u092f\u094b\u0917@56K-227.MaxTNT3.pdq.net",
"\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930@dial-36-2.ots.utexas.edu",
"\u0909\u0935\u093E\u091A\u0943@slip129-37-23-152.ga.us.ibm.net",
"\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947@ts45ip119.cadvision.com",
"\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947@sdn-ts-004txaustP05.dialsprint.net",
"\u0938\u092e\u0935\u0947\u0924\u093e@bar-tnt1s66.erols.com",
"\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903@101.st-louis-15.mo.dial-access.att.net",
"\u092e\u093e\u092e\u0915\u093e\u0903@h92-245.Arco.COM",
"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935@dial-13-2.ots.utexas.edu",
"\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924@net-redynet29.datamarkets.com.ar",
"\u0938\u0902\u091c\u0935@ccs-shiva28.reacciun.net.ve",
"\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d@7.houston-11.tx.dial-access.att.net",
"\u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27@ingw129-37-120-26.mo.us.ibm.net",
"\u0c06\u0c28\u0c02\u0c26\u0c4d@dialup6.austintx.com",
"\u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41@dns2.tpao.gov.tr",
"\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d@slip129-37-119-194.nc.us.ibm.net",
"\u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26@cs7.dillons.co.uk.203.119.193.in-addr.arpa",
"\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d@swprd1.innovplace.saskatoon.sk.ca",
"\u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26@bikini.bologna.maraut.it",
"\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d@node91.subnet159-198-79.baxter.com",
"\u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24@cust19.max5.new-york.ny.ms.uu.net",
"\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30@balexander.slip.andrew.cmu.edu",
"\u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32@pool029.max2.denver.co.dynip.alter.net",
"\u0c30\u0c35\u0c3f@cust49.max9.new-york.ny.ms.uu.net",
"\u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d@s61.abq-dialin2.hollyberry.com",
"\u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27@\u0917\u0928\u0947\u0936.sanjose.ibm.com",
"\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f@www.\u00E0\u00B3\u00AF.com",
"\u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32@www.\u00C2\u00A4.com",
"\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D@www.\u00C2\u00A3.com",
"\u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f@\u0025",
"\u0c2e\u0c3e\u0c27\u0c35\u0c4d@\u005C\u005C",
"\u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f@www.\u0021.com",
"test@www.\u0024.com",
"help@\u00C3\u00BC.com",
};
public void TestNFS4MixedPrep(){
for(int i=0; i< mixed_prep_data.length; i++){
try{
String src = mixed_prep_data[i];
byte[] dest = NFS4StringPrep.mixed_prepare(src.getBytes("UTF-8"));
String destString = new String(dest, "UTF-8");
int destIndex = destString.indexOf('@');
if(destIndex < 0){
errln("Delimiter @ disappeared from the output!");
}
}catch(Exception e){
errln("mixed_prepare for string: " + mixed_prep_data[i] +" failed with " + e.toString());
}
}
/* test the error condition */
{
String src = "OWNER@oss.software.ibm.com";
try{
byte[] dest = NFS4StringPrep.mixed_prepare(src.getBytes("UTF-8"));
if(dest!=null){
errln("Did not get the expected exception");
}
}catch(Exception e){
logln("mixed_prepare for string: " + src +" passed with " + e.toString());
}
}
}
public void TestCISPrep(){
for(int i=0;i< (TestData.conformanceTestCases.length);i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
String src = testCase.input;
Exception expected = testCase.expected;
String expectedDest = testCase.output;
try{
byte[] dest =NFS4StringPrep.cis_prepare(src.getBytes("UTF-8"));
String destString = new String(dest, "UTF-8");
if(!expectedDest.equalsIgnoreCase(destString)){
errln("Did not get the expected output for nfs4_cis_prep at index " + i);
}
}catch(Exception e){
if(!expected.equals(e)){
errln("Did not get the expected exception");
}
}
}
}
private static String[] cs_prep_data = {
//BIDI checking is turned off .. so
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74",
};
public void TestCSPrep(){
// Checking for bidi is turned off
String src = "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74";
try{
NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), false);
}catch(Exception e){
errln("Got unexpected exception: " + e.toString());
}
// normalization is turned off
try{
src = "www.\u00E0\u00B3\u00AF.com";
byte[] dest = NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), false);
String destStr = new String(dest, "UTF-8");
if(!src.equals(destStr)){
errln("Did not get expected output. Expected: "+ prettify(src)+
" Got: " + prettify(destStr));
}
}catch(Exception e){
errln("Got unexpected exception: " + e.toString());
}
// test case insensitive string
try{
src = "THISISATEST";
byte[] dest = NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), true);
String destStr = new String(dest, "UTF-8");
if(!src.toLowerCase().equals(destStr)){
errln("Did not get expected output. Expected: "+ prettify(src)+
" Got: " + prettify(destStr));
}
}catch(Exception e){
errln("Got unexpected exception: " + e.toString());
}
}
}

View File

@ -5,13 +5,14 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/LocaleUtility.java,v $
* $Date: 2003/06/03 18:49:32 $
* $Revision: 1.8 $
* $Date: 2003/08/21 23:41:25 $
* $Revision: 1.9 $
* *****************************************************************************************
*/
package com.ibm.icu.impl;
import java.io.InputStream;
import java.util.Locale;
/**
@ -131,4 +132,10 @@ public class LocaleUtility {
}
return new Locale(parts[0], parts[1], parts[2]);
}
public static InputStream getImplDataResourceAsStream(String name){
Class myClass = new LocaleUtility().getClass();
String fullName = "data/"+name;
return myClass.getResourceAsStream(fullName);
}
}

View File

@ -0,0 +1,96 @@
/*
* Created on May 2, 2003
*
* To change the template for this generated file go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
package com.ibm.icu.impl;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public final class StringPrepDataReader implements ICUBinary.Authenticate {
private final static boolean debug = ICUDebug.enabled("NormalizerDataReader");
/**
* <p>private constructor.</p>
* @param inputStream ICU uprop.dat file input stream
* @exception IOException throw if data file fails authentication
* @draft 2.1
*/
public StringPrepDataReader(InputStream inputStream)
throws IOException{
if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
dataInputStream = new DataInputStream(inputStream);
if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
}
public void read(byte[] idnaBytes,
char[] mappingTable)
throws IOException{
//Read the bytes that make up the idnaTrie
dataInputStream.read(idnaBytes);
//Read the extra data
for(int i=0;i<mappingTable.length;i++){
mappingTable[i]=dataInputStream.readChar();
}
}
public byte[] getDataFormatVersion(){
return DATA_FORMAT_VERSION;
}
public boolean isDataVersionAcceptable(byte version[]){
return version[0] == DATA_FORMAT_VERSION[0]
&& version[2] == DATA_FORMAT_VERSION[2]
&& version[3] == DATA_FORMAT_VERSION[3];
}
public int[] readIndexes(int length)throws IOException{
int[] indexes = new int[length];
//Read the indexes
for (int i = 0; i <length ; i++) {
indexes[i] = dataInputStream.readInt();
}
return indexes;
}
public byte[] getUnicodeVersion(){
return unicodeVersion;
}
// private data members -------------------------------------------------
/**
* ICU data file input stream
*/
private DataInputStream dataInputStream;
private byte[] unicodeVersion;
/**
* File format version that this class understands.
* No guarantees are made if a older version is used
* see store.c of gennorm for more information and values
*/
///* dataFormat="SPRP" 0x53, 0x50, 0x52, 0x50 */
private static final byte DATA_FORMAT_ID[] = {(byte)0x53, (byte)0x50,
(byte)0x52, (byte)0x50};
private static final byte DATA_FORMAT_VERSION[] = {(byte)0x3, (byte)0x2,
(byte)0x5, (byte)0x2};
}

Binary file not shown.

View File

@ -0,0 +1,918 @@
/*
*******************************************************************************
* Copyright (C) 2003-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/IDNA.java,v $
* $Date: 2003/08/21 23:40:42 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.stringprep;
import java.io.IOException;
import java.io.InputStream;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.text.UCharacterIterator;
/**
*
* UIDNA API implements the IDNA protocol as defined in the IDNA draft
* (http://www.ietf.org/rfc/rfc3490.txt).
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
* containing non-ASCII code points are required to be processed by
* ToASCII operation before passing it to resolver libraries. Domain names
* that are obtained from resolver libraries are required to be processed by
* ToUnicode operation before displaying the domain name to the user.
* IDNA requires that implementations process input strings with Nameprep
* (http://www.ietf.org/rfc/rfc3491.txt),
* which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
* and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
* neither Nameprep nor Punycode are optional.
* The input and output of ToASCII and ToUnicode operations are Unicode
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
* multiple times to an input string will yield the same result as applying the operation
* once.
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
*
*/
public final class IDNA {
/* IDNA ACE Prefix is "xn--" */
private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
private static final int ACE_PREFIX_LENGTH = 4;
private static final int MAX_LABEL_LENGTH = 63;
private static final int HYPHEN = 0x002D;
private static final String NAME_PREP_PROFILE = "uidna";
private static final int CAPITAL_A = 0x0041;
private static final int CAPITAL_Z = 0x005A;
private static final int LOWER_CASE_DELTA = 0x0020;
private static final int FULL_STOP = 0x002E;
/**
* Option to prohibit processing of unassigned codepoints in the input and
* do not check if the input conforms to STD-3 ASCII rules.
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
*/
public static final int DEFAULT = 0x0000;
/**
* Option to allow processing of unassigned codepoints in the input
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
*/
public static final int ALLOW_UNASSIGNED = 0x0001;
/**
* Option to check if input conforms to STD-3 ASCII rules
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
*/
public static final int USE_STD3_RULES = 0x0002;
private static StringPrep prep = null;
private static synchronized void loadInstance()
throws IOException{
if(prep==null){
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
prep = StringPrep.getInstance(stream);
stream.close();
}
}
private static boolean startsWithPrefix(StringBuffer src){
boolean startsWithPrefix = true;
if(src.length() < ACE_PREFIX_LENGTH){
return false;
}
for(int i=0; i<ACE_PREFIX_LENGTH;i++){
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
startsWithPrefix = false;
}
}
return startsWithPrefix;
}
private static char toASCIILower(char ch){
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
return (char)(ch + LOWER_CASE_DELTA);
}
return ch;
}
private static StringBuffer toASCIILower(StringBuffer src){
StringBuffer dest = new StringBuffer();
for(int i=0; i<src.length();i++){
dest.append(toASCIILower(src.charAt(i)));
}
return dest;
}
private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
char c1,c2;
int rc;
for(int i =0;/* no condition */;i++) {
/* If we reach the ends of both strings then they match */
if(i == s1.length()) {
return 0;
}
c1 = s1.charAt(i);
c2 = s2.charAt(i);
/* Case-insensitive comparison */
if(c1!=c2) {
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
if(rc!=0) {
return rc;
}
}
}
}
private static int compareCaseInsensitiveASCII(String s1, String s2){
char c1,c2;
int rc;
for(int i =0;/* no condition */;i++) {
/* If we reach the ends of both strings then they match */
if(i == s1.length()) {
return 0;
}
c1 = s1.charAt(i);
c2 = s2.charAt(i);
/* Case-insensitive comparison */
if(c1!=c2) {
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
if(rc!=0) {
return rc;
}
}
}
}
private static int getSeparatorIndex(char[] src,int start, int limit)
throws IOException{
loadInstance();
for(; start<limit;start++){
if(prep.isLabelSeparator(src[start])){
return start;
}
}
// we have not found the separator just return length
return start;
}
private static boolean isLDHChar(int ch){
// high runner case
if(ch>0x007A){
return false;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return true;
}
return false;
}
/**
* This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToASCII(String src, int options)
throws ParseException, IOException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
/**
* This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToASCII(StringBuffer src, int options)
throws ParseException, IOException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
/**
* This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
throws ParseException, IOException{
//load the data
loadInstance();
boolean[] caseFlags = null;
// the source contains all ascii codepoints
boolean srcIsASCII = true;
// assume the source contains all LDH codepoints
boolean srcIsLDH = true;
//get the options
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
int failPos = -1;
// step 2
StringBuffer processOut = prep.prepare(srcIter,options);
int poLen = processOut.length();
StringBuffer dest = new StringBuffer();
// step 3 & 4
for(int j=0;j<poLen;j++ ){
char ch=processOut.charAt(j);
if(ch > 0x7F){
srcIsASCII = false;
}
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
if(isLDHChar(ch)==false){
srcIsLDH = false;
failPos = j;
}
}
if(useSTD3ASCIIRules == true){
// verify 3a and 3b
if( srcIsLDH == false /* source contains some non-LDH characters */
|| processOut.charAt(0) == HYPHEN
|| processOut.charAt(processOut.length()-1) == HYPHEN){
/* populate the parseError struct */
if(srcIsLDH==false){
throw new ParseException( "The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
(failPos>0) ? (failPos-1) : failPos);
}else if(processOut.charAt(0) == HYPHEN){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
}else{
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
(poLen>0) ? poLen-1 : poLen);
}
}
}
if(srcIsASCII){
dest = processOut;
}else{
// step 5 : verify the sequence does not begin with ACE prefix
if(!startsWithPrefix(processOut)){
//step 6: encode the sequence with punycode
caseFlags = new boolean[poLen];
StringBuffer punyout = Punycode.encode(processOut,caseFlags);
// convert all codepoints to lower case ASCII
StringBuffer lowerOut = toASCIILower(punyout);
//Step 7: prepend the ACE prefix
dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
//Step 6: copy the contents in b2 into dest
dest.append(lowerOut);
}else{
throw new ParseException("The input does not start with the ACE Prefix.",
ParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
}
}
if(dest.length() > MAX_LABEL_LENGTH){
throw new ParseException("The labels in the input are too long. Length > 64.",
ParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
}
return dest;
}
/**
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
throws ParseException, IOException{
return convertIDNToASCII(iter.getText(), options);
}
/**
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
throws ParseException, IOException{
return convertIDNToASCII(str.toString(), options);
}
/**
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNToASCII(String src,int options)
throws ParseException, IOException{
//load the data
loadInstance();
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex=0;
for(;;){
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
result.append(convertToASCII(iter,options));
if(sepIndex==srcArr.length){
break;
}
// increment the sepIndex to skip past the separator
sepIndex++;
oldSepIndex = sepIndex;
result.append((char)FULL_STOP);
}
return result;
}
/**
* This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToUnicode(String src, int options)
throws ParseException, IOException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
/**
* This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToUnicode(StringBuffer src, int options)
throws ParseException, IOException{
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
/**
* This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
throws ParseException, IOException{
//load the data
loadInstance();
boolean[] caseFlags = null;
// the source contains all ascii codepoints
boolean srcIsASCII = true;
// assume the source contains all LDH codepoints
boolean srcIsLDH = true;
//get the options
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
int failPos = -1;
int ch;
int saveIndex = iter.getIndex();
// step 1: find out if all the codepoints in src are ASCII
while((ch=iter.next())!= UCharacterIterator.DONE){
if(ch>0x7F){
srcIsASCII = false;
}
if((srcIsLDH = isLDHChar(ch))==false){
failPos = iter.getIndex();
}
}
StringBuffer processOut;
if(srcIsASCII == false){
// step 2: process the string
iter.setIndex(saveIndex);
processOut = prep.prepare(iter,options);
}else{
//just point to source
processOut = new StringBuffer(iter.getText());
}
// TODO:
// The RFC states that
// <quote>
// ToUnicode never fails. If any step fails, then the original input
// is returned immediately in that step.
// </quote>
//step 3: verify ACE Prefix
if(startsWithPrefix(processOut)){
//step 4: Remove the ACE Prefix
String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());
//step 5: Decode using punycode
StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp),caseFlags);
//step 6:Apply toASCII
StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
//step 7: verify
if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
throw new ParseException("The verification step prescribed by the RFC 3491 failed",
ParseException.VERIFICATION_ERROR);
}
//step 8: return output of step 5
return decodeOut;
}else{
// verify that STD3 ASCII rules are satisfied
if(useSTD3ASCIIRules == true){
if( srcIsLDH == false /* source contains some non-LDH characters */
|| processOut.charAt(0) == HYPHEN
|| processOut.charAt(processOut.length()-1) == HYPHEN){
if(srcIsLDH==false){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
(failPos>0) ? (failPos-1) : failPos);
}else if(processOut.charAt(0) == HYPHEN){
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),0);
}else{
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
ParseException.STD3_ASCII_RULES_ERROR,
processOut.toString(),
processOut.length());
}
}
}
// just return the source
return new StringBuffer(iter.getText());
}
}
/**
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
throws ParseException, IOException{
return convertIDNToUnicode(iter.getText(), options);
}
/**
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
throws ParseException, IOException{
return convertIDNToUnicode(str.toString(), options);
}
/**
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
public static StringBuffer convertIDNToUnicode(String src, int options)
throws ParseException, IOException{
char[] srcArr = src.toCharArray();
StringBuffer result = new StringBuffer();
int sepIndex=0;
int oldSepIndex=0;
for(;;){
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
result.append(convertToUnicode(iter,options));
if(sepIndex==srcArr.length){
break;
}
// increment the sepIndex to skip past the separator
sepIndex++;
oldSepIndex =sepIndex;
result.append((char)FULL_STOP);
}
return result;
}
/**
* Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
* @param s1 First IDN string as StringBuffer
* @param s2 Second IDN string as StringBuffer
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
// TODO: optimize
public static int compare(StringBuffer s1, StringBuffer s2, int options)
throws ParseException, IOException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(s1.toString(),options);
StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
/**
* Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
* @param s1 First IDN string
* @param s2 Second IDN string
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
// TODO: optimize
public static int compare(String s1, String s2, int options)
throws ParseException, IOException{
if(s1==null || s2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(s1, options);
StringBuffer s2Out = convertIDNToASCII(s2, options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
/**
* Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
* @param s1 First IDN string as UCharacterIterator
* @param s2 Second IDN string as UCharacterIterator
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2
* @throws ParseException
* @throws IOException
* @draft ICU 2.8
*/
// TODO: optimize
public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
throws ParseException, IOException{
if(i1==null || i2 == null){
throw new IllegalArgumentException("One of the source buffers is null");
}
StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
return compareCaseInsensitiveASCII(s1Out,s2Out);
}
}

View File

@ -0,0 +1,143 @@
/*
*******************************************************************************
* Copyright (C) 2003-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/ParseException.java,v $
* $Date: 2003/08/21 23:40:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.stringprep;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class ParseException extends Exception {
public static final int INVALID_CHAR_FOUND = 0;
public static final int ILLEGAL_CHAR_FOUND = 1;
public static final int PROHIBITED_ERROR = 2;
public static final int UNASSIGNED_ERROR = 3;
public static final int CHECK_BIDI_ERROR = 4;
public static final int STD3_ASCII_RULES_ERROR = 5;
public static final int ACE_PREFIX_ERROR = 6;
public static final int VERIFICATION_ERROR = 7;
public static final int LABEL_TOO_LONG_ERROR = 8;
public static final int BUFFER_OVERFLOW_ERROR = 9;
public ParseException(String message,int error){
super(message);
this.error = error;
}
public ParseException(String message,int error, String rules, int pos){
super(message);
this.error = error;
setContext(rules,pos);
}
public boolean equals(Object other){
if(!(other instanceof ParseException)){
return false;
}
return ((ParseException)other).error == this.error;
}
public String toString(){
StringBuffer buf = new StringBuffer();
buf.append(super.getMessage());
buf.append(". preContext: ");
buf.append(preContext);
buf.append(". postContext: ");
buf.append(postContext);
buf.append("\n");
return buf.toString();
}
private int error;
/**
* The line on which the error occured. If the parse engine
* is not using this field, it should set it to zero. Otherwise
* it should be a positive integer. The default value of this field
* is -1. It will be set to 0 if the code populating this struct is not
* using line numbers.
* @stable ICU 2.0
*/
private int line;
/**
* The character offset to the error. If the line field is
* being used, then this offset is from the start of the line.
* If the line field is not being used, then this offset is from
* the start of the text.The default value of this field
* is -1. It will be set to appropriate value by the code that
* populating the struct.
* @stable ICU 2.0
*/
private int offset;
/**
* Textual context before the error. Null-terminated.
* May be the empty string if not implemented by parser.
* @stable ICU 2.0
*/
private StringBuffer preContext = new StringBuffer();
/**
* Textual context after the error. Null-terminated.
* May be the empty string if not implemented by parser.
* @stable ICU 2.0
*/
private StringBuffer postContext = new StringBuffer();
public static final int PARSE_CONTEXT_LEN = 16;
public void setOffset(int offset){
this.offset = offset;
}
public int getOffset(){
return offset;
}
public int getLineNumber(){
return line;
}
public int setLineNumber(int lineNumber){
return line;
}
public String getPreContext(){
return preContext.toString();
}
public String getPostContext(){
return postContext.toString();
}
public void setPreContext(String str, int pos){
setPreContext(str.toCharArray(),pos);
}
public void setPreContext(char[] str, int pos){
int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
preContext.append(str,start,len);
}
public void setPostContext(String str, int pos){
setPostContext(str.toCharArray(),pos);
}
public void setPostContext(char[] str, int pos){
int start = pos;
int len = str.length - start;
postContext.append(str,start,len);
}
public void setContext(char[]str,int pos){
setPreContext(str,pos);
setPostContext(str,pos);
}
public void setContext(String str,int pos){
setPreContext(str,pos);
setPostContext(str,pos);
}
}

View File

@ -0,0 +1,467 @@
/*
*******************************************************************************
* Copyright (C) 2003-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/Punycode.java,v $
* $Date: 2003/08/21 23:40:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.stringprep;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
/**
* @author ram
*
* To change this generated comment edit the template variable "typecomment":
* Window>Preferences>Java>Templates.
* To enable and disable the creation of type comments go to
* Window>Preferences>Java>Code Generation.
*/
/* Package Private class */
final class Punycode {
/* Punycode parameters for Bootstring */
private static final int BASE = 36;
private static final int TMIN = 1;
private static final int TMAX = 26;
private static final int SKEW = 38;
private static final int DAMP = 700;
private static final int INITIAL_BIAS = 72;
private static final int INITIAL_N = 0x80;
/* "Basic" Unicode/ASCII code points */
private static final int HYPHEN = 0x2d;
private static final int DELIMITER = HYPHEN;
private static final int ZERO = 0x30;
private static final int NINE = 0x39;
private static final int SMALL_A = 0x61;
private static final int SMALL_Z = 0x7a;
private static final int CAPITAL_A = 0x41;
private static final int CAPITAL_Z = 0x5a;
private static final int MAX_CP_COUNT = 200;
private static final int UINT_MAGIC = 0x80000000;
private static final long ULONG_MAGIC = 0x8000000000000000L;
private static int adaptBias(int delta, int length, boolean firstTime){
if(firstTime){
delta /=DAMP;
}else{
delta /= 2;
}
delta += delta/length;
int count=0;
for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
delta/=(BASE-TMIN);
}
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
}
/**
* basicToDigit[] contains the numeric value of a basic code
* point (for use in representing integers) in the range 0 to
* BASE-1, or -1 if b is does not represent a value.
*/
static final int[] basicToDigit= new int[]{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
private static char asciiCaseMap(char b, boolean uppercase) {
if(uppercase) {
if(SMALL_A<=b && b<=SMALL_Z) {
b-=(SMALL_A-CAPITAL_A);
}
} else {
if(CAPITAL_A<=b && b<=CAPITAL_Z) {
b+=(SMALL_A-CAPITAL_A);
}
}
return b;
}
/**
* digitToBasic() returns the basic code point whose value
* (when used for representing integers) is d, which must be in the
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
* nonzero, in which case the uppercase form is used.
*/
private static char digitToBasic(int digit, boolean uppercase) {
/* 0..25 map to ASCII a..z or A..Z */
/* 26..35 map to ASCII 0..9 */
if(digit<26) {
if(uppercase) {
return (char)(CAPITAL_A+digit);
} else {
return (char)(SMALL_A+digit);
}
} else {
return (char)((ZERO-26)+digit);
}
}
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
int[] cpBuffer = new int[MAX_CP_COUNT];
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
char c, c2;
int srcLength = src.length();
int destCapacity = MAX_CP_COUNT;
char[] dest = new char[destCapacity];
StringBuffer result = new StringBuffer();
/*
* Handle the basic code points and
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
*/
srcCPCount=destLength=0;
for(j=0; j<srcLength; ++j) {
if(srcCPCount==MAX_CP_COUNT) {
/* too many input code points */
throw new IndexOutOfBoundsException();
}
c=src.charAt(j);
if(isBasic(c)) {
if(destLength<destCapacity) {
cpBuffer[srcCPCount++]=0;
dest[destLength]=
caseFlags!=null ?
asciiCaseMap((char)c, caseFlags[j]) :
(char)c;
}
++destLength;
} else {
n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
if(!UTF16.isSurrogate(c)) {
n|=c;
} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
++j;
n|=UCharacter.getCodePoint(c, c2);
} else {
/* error: unmatched surrogate */
throw new ParseException("Illegal char found",ParseException.ILLEGAL_CHAR_FOUND);
}
cpBuffer[srcCPCount++]=n;
}
}
/* Finish the basic string - if it is not empty - with a delimiter. */
basicLength=destLength;
if(basicLength>0) {
if(destLength<destCapacity) {
dest[destLength]=DELIMITER;
}
++destLength;
}
/*
* handledCPCount is the number of code points that have been handled
* basicLength is the number of basic code points
* destLength is the number of chars that have been output
*/
/* Initialize the state: */
n=INITIAL_N;
delta=0;
bias=INITIAL_BIAS;
/* Main encoding loop: */
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
/*
* All non-basic code points < n have been handled already.
* Find the next larger one:
*/
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(n<=q && q<m) {
m=q;
}
}
/*
* Increase delta enough to advance the decoder's
* <n,i> state to <m,0>, but guard against overflow:
*/
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
throw new RuntimeException("Internal program error");
}
delta+=(m-n)*(handledCPCount+1);
n=m;
/* Encode a sequence of same code points n */
for(j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(q<n) {
++delta;
} else if(q==n) {
/* Represent delta as a generalized variable-length integer: */
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(t>TMAX) {
t=TMAX;
}
*/
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(q<t) {
break;
}
if(destLength<destCapacity) {
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
}
q=(q-t)/(BASE-t);
}
if(destLength<destCapacity) {
dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
}
bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
delta=0;
++handledCPCount;
}
}
++delta;
++n;
}
return result.append(dest, 0, destLength);
}
private static boolean isBasic(int ch){
return (ch < INITIAL_N);
}
private static boolean isBasicUpperCase(int ch){
return( CAPITAL_A<=ch && ch >= CAPITAL_Z);
}
private static boolean isSurrogate(int ch){
return (((ch)&0xfffff800)==0xd800);
}
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
throws ParseException{
int srcLength = src.length();
StringBuffer result = new StringBuffer();
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
destCPCount, firstSupplementaryIndex, cpLength;
char b;
int destCapacity = MAX_CP_COUNT;
char[] dest = new char[destCapacity];
/*
* Handle the basic code points:
* Let basicLength be the number of input code points
* before the last delimiter, or 0 if there is none,
* then copy the first basicLength code points to the output.
*
* The two following loops iterate backward.
*/
for(j=srcLength; j>0;) {
if(src.charAt(--j)==DELIMITER) {
break;
}
}
destLength=basicLength=destCPCount=j;
while(j>0) {
b=src.charAt(--j);
if(!isBasic(b)) {
throw new ParseException("Illegal char found", ParseException.INVALID_CHAR_FOUND);
}
if(j<destCapacity) {
dest[j]= b;
if(caseFlags!=null) {
caseFlags[j]=isBasicUpperCase(b);
}
}
}
/* Initialize the state: */
n=INITIAL_N;
i=0;
bias=INITIAL_BIAS;
firstSupplementaryIndex=1000000000;
/*
* Main decoding loop:
* Start just after the last delimiter if any
* basic code points were copied; start at the beginning otherwise.
*/
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
/*
* in is the index of the next character to be consumed, and
* destCPCount is the number of code points in the output array.
*
* Decode a generalized variable-length integer into delta,
* which gets added to i. The overflow checking is easier
* if we increase i as we go, then subtract off its starting
* value at the end to obtain delta.
*/
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
if(in>=srcLength) {
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
}
digit=basicToDigit[(byte)src.charAt(in++)];
if(digit<0) {
throw new ParseException("Invalid char found", ParseException.INVALID_CHAR_FOUND);
}
if(digit>(0x7fffffff-i)/w) {
/* integer overflow */
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
}
i+=digit*w;
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(digit<t) {
break;
}
if(w>0x7fffffff/(BASE-t)) {
/* integer overflow */
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
}
w*=BASE-t;
}
/*
* Modification from sample code:
* Increments destCPCount here,
* where needed instead of in for() loop tail.
*/
++destCPCount;
bias=adaptBias(i-oldi, destCPCount, (oldi==0));
/*
* i was supposed to wrap around from (incremented) destCPCount to 0,
* incrementing n each time, so we'll fix that now:
*/
if(i/destCPCount>(0x7fffffff-n)) {
/* integer overflow */
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
}
n+=i/destCPCount;
i%=destCPCount;
/* not needed for Punycode: */
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
if(n>0x10ffff || isSurrogate(n)) {
/* Unicode code point overflow */
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
}
/* Insert n at position i of the output: */
cpLength=UTF16.getCharCount(n);
if((destLength+cpLength)<destCapacity) {
int codeUnitIndex;
/*
* Handle indexes when supplementary code points are present.
*
* In almost all cases, there will be only BMP code points before i
* and even in the entire string.
* This is handled with the same efficiency as with UTF-32.
*
* Only the rare cases with supplementary code points are handled
* more slowly - but not too bad since this is an insertion anyway.
*/
if(i<=firstSupplementaryIndex) {
codeUnitIndex=i;
if(cpLength>1) {
firstSupplementaryIndex=codeUnitIndex;
} else {
++firstSupplementaryIndex;
}
} else {
codeUnitIndex=firstSupplementaryIndex;
codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
}
/* use the UChar index codeUnitIndex instead of the code point index i */
if(codeUnitIndex<destLength) {
System.arraycopy(dest, codeUnitIndex,
dest, codeUnitIndex+cpLength,
(destLength-codeUnitIndex));
if(caseFlags!=null) {
System.arraycopy(caseFlags, codeUnitIndex,
caseFlags, codeUnitIndex+cpLength,
destLength-codeUnitIndex);
}
}
if(cpLength==1) {
/* BMP, insert one code unit */
dest[codeUnitIndex]=(char)n;
} else {
/* supplementary character, insert two code units */
dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
}
if(caseFlags!=null) {
/* Case of last character determines uppercase flag: */
caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
if(cpLength==2) {
caseFlags[codeUnitIndex+1]=false;
}
}
}
destLength+=cpLength;
++i;
}
result.append(dest, 0, destLength);
return result;
}
}

View File

@ -0,0 +1,409 @@
/*
*******************************************************************************
* Copyright (C) 2003-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/StringPrep.java,v $
* $Date: 2003/08/21 23:40:41 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.stringprep;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.StringPrepDataReader;
import com.ibm.icu.impl.Trie;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.VersionInfo;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterDirection;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class StringPrep {
/**
* Option to prohibit processing of unassigned code points in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
public static final int NONE = 0x0000;
/**
* Option to allow processing of unassigned code points in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
public static final int ALLOW_UNASSIGNED = 0x0001;
private static final int UNASSIGNED = 0x0000;
private static final int MAP = 0x0001;
private static final int PROHIBITED = 0x0002;
private static final int LABEL_SEPARATOR = 0x0003;
private static final int DELETE = 0x0004;
private static final int TYPE_LIMIT = 0x0005;
private static final int NORMALIZATION_ON = 0x0001;
private static final int CHECK_BIDI_ON = 0x0002;
private static final int TYPE_THRESHOLD = 0xFFF0;
private static final int MAX_INDEX_VALUE = 0x3FBF; /*16139*/
private static final int MAX_INDEX_TOP_LENGTH = 0x0003;
/* indexes[] value names */
private static final int INDEX_TRIE_SIZE = 0; /* number of bytes in normalization trie */
private static final int INDEX_MAPPING_DATA_SIZE = 1; /* The array that contains the mapping */
private static final int NORM_CORRECTNS_LAST_UNI_VERSION = 2; /* The index of Unicode version of last entry in NormalizationCorrections.txt */
private static final int ONE_UCHAR_MAPPING_INDEX_START = 3; /* The starting index of 1 UChar mapping index in the mapping data array */
private static final int TWO_UCHARS_MAPPING_INDEX_START = 4; /* The starting index of 2 UChars mapping index in the mapping data array */
private static final int THREE_UCHARS_MAPPING_INDEX_START = 5;
private static final int FOUR_UCHARS_MAPPING_INDEX_START = 6;
private static final int OPTIONS = 7; /* Bit set of options to turn on in the profile */
private static final int INDEX_TOP = 16; /* changing this requires a new formatVersion */
/**
* Default buffer size of datafile
*/
private static final int DATA_BUFFER_SIZE = 25000;
/* Wrappers for Trie implementations */
private static final class StringPrepTrieImpl implements Trie.DataManipulate{
static CharTrie sprepTrie = null;
/**
* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
* data the index array offset of the indexes for that lead surrogate.
* @param property data value for a surrogate from the trie, including
* the folding offset
* @return data offset or 0 if there is no data for the lead surrogate
*/
public int getFoldingOffset(int value){
return value;
}
}
private static StringPrepTrieImpl sprepTrieImpl;
private static int[] indexes;
private static char[] mappingData;
private static byte[] formatVersion;
private char getCodePointValue(int ch){
return StringPrepTrieImpl.sprepTrie.getCodePointValue(ch);
}
//protected
private boolean doNFKC = false;
private boolean checkBiDi = false;
private VersionInfo unicodeVersion;
private VersionInfo normVersion;
private static VersionInfo getVersionInfo(int comp){
int micro = comp & 0xFF;
int milli =(comp >> 8) & 0xFF;
int minor =(comp >> 16) & 0xFF;
int major =(comp >> 24) & 0xFF;
return VersionInfo.getInstance(major,minor,milli,micro);
}
private static VersionInfo getVersionInfo(byte[] version){
if(version.length != 4){
return null;
}
return VersionInfo.getInstance((int)version[0],(int) version[1],(int) version[2],(int) version[3]);
}
private StringPrep(InputStream inputStream) throws IOException{
BufferedInputStream b = new BufferedInputStream(inputStream,DATA_BUFFER_SIZE);
StringPrepDataReader reader = new StringPrepDataReader(b);
// read the indexes
indexes = reader.readIndexes(INDEX_TOP);
byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
sprepTrieImpl = new StringPrepTrieImpl();
//indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
// load the rest of the data data and initialize the data members
reader.read(sprepBytes,mappingData);
StringPrepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl );
// get the data format version
formatVersion = reader.getDataFormatVersion();
// get the options
doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
unicodeVersion = getVersionInfo(reader.getUnicodeVersion());
normVersion = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
if(normVersion.compareTo(UCharacter.getUnicodeVersion())>0){
throw new IOException("Normalization Correction version not supported");
}
b.close();
}
/**
* Returns the StringPrep instance created after reading the input stream.
* The object does not hold a reference to the input steam, so the stream can be
* closed after the method returns.
*
* @param inputStream The stream for reading the StringPrep profile binary
* @return StringPrep object created from the input stream
* @throws IOException
* @draft ICU 2.8
*/
public static final StringPrep getInstance(InputStream inputStream)
throws IOException{
StringPrep prep = null;
// load the file and create the object
prep = new StringPrep(inputStream);
return prep;
}
private class Values{
boolean isIndex;
int value;
int type;
}
private static final void getValues(char trieWord,Values values){
if(trieWord == 0){
/*
* Initial value stored in the mapping table
* just return USPREP_TYPE_LIMIT .. so that
* the source codepoint is copied to the destination
*/
values.type = TYPE_LIMIT;
}else if(trieWord >= TYPE_THRESHOLD){
values.type = (trieWord - TYPE_THRESHOLD);
}else{
/* get the type */
values.type = MAP;
/* ascertain if the value is index or delta */
if((trieWord & 0x02)>0){
values.isIndex = true;
values.value = trieWord >> 2; //mask off the lower 2 bits and shift
}else{
values.isIndex = false;
values.value = ((int)(trieWord<<16))>>16;
values.value = (values.value >> 2);
}
if((trieWord>>2) == MAX_INDEX_VALUE){
values.type = DELETE;
values.isIndex = false;
values.value = 0;
}
}
}
private StringBuffer map( UCharacterIterator iter, int options)
throws ParseException{
Values val = new Values();
char result = 0;
int ch = UCharacterIterator.DONE;
StringBuffer dest = new StringBuffer();
boolean allowUnassigned = (boolean) ((options & ALLOW_UNASSIGNED)>0);
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
result = getCodePointValue(ch);
getValues(result,val);
// check if the source codepoint is unassigned
if(val.type == UNASSIGNED && allowUnassigned == false){
throw new ParseException("An unassigned code point was found in the input",
ParseException.UNASSIGNED_ERROR,
iter.getText(),iter.getIndex());
}else if((val.type == MAP)){
int index, length;
if(val.isIndex){
index = val.value;
if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
length = 1;
}else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
length = 2;
}else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
length = 3;
}else{
length = mappingData[index++];
}
/* copy mapping to destination */
dest.append(mappingData,index,length);
continue;
}else{
ch -= val.value;
}
}else if(val.type == DELETE){
// just consume the codepoint and contine
continue;
}
//copy the source into destination
UTF16.append(dest,ch);
}
return dest;
}
private StringBuffer normalize(StringBuffer src){
return new StringBuffer(Normalizer.normalize(src.toString(),Normalizer.NFKC,Normalizer.UNICODE_3_2));
}
protected boolean isLabelSeparator(int ch){
int result = getCodePointValue(ch);
if( (result & 0x07) == LABEL_SEPARATOR){
return true;
}
return false;
}
/*
1) Map -- For each character in the input, check if it has a mapping
and, if so, replace it with its mapping.
2) Normalize -- Possibly normalize the result of step 1 using Unicode
normalization.
3) Prohibit -- Check for any characters that are not allowed in the
output. If any are found, return an error.
4) Check bidi -- Possibly check for right-to-left characters, and if
any are found, make sure that the whole string satisfies the
requirements for bidirectional strings. If the string does not
satisfy the requirements for bidirectional strings, return an
error.
[Unicode3.2] defines several bidirectional categories; each character
has one bidirectional category assigned to it. For the purposes of
the requirements below, an "RandALCat character" is a character that
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
is a character that has Unicode bidirectional category "L". Note
that there are many characters which fall in neither of the above
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
this because they have bidirectional category "EN".
In any profile that specifies bidirectional character handling, all
three of the following requirements MUST be met:
1) The characters in section 5.8 MUST be prohibited.
2) If a string contains any RandALCat character, the string MUST NOT
contain any LCat character.
3) If a string contains any RandALCat character, a RandALCat
character MUST be the first character of the string, and a
RandALCat character MUST be the last character of the string.
*/
/**
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
* depending on the options specified in the profile.
*
* @param src A UCharacterIterator object containing the source string
* @param options A bit set of options:
*
* - StringPrep.NONE Prohibit processing of unassigned code points in the input
*
* - StringPrep.ALLOW_UNASSIGNED Treat the unassigned code points are in the input
* as normal Unicode code points.
*
* @return StringBuffer A StringBuffer containing the output
* @throws ParseException
* @draft ICU 2.8
*/
public StringBuffer prepare(UCharacterIterator src, int options)
throws ParseException{
// map
StringBuffer mapOut = map(src,options);
StringBuffer normOut = mapOut;// initialize
if(doNFKC){
// normalize
normOut = normalize(mapOut);
}
int ch;
char result;
UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
Values val = new Values();
int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
int rtlPos=-1, ltrPos=-1;
boolean rightToLeft=false, leftToRight=false;
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
result = getCodePointValue(ch);
getValues(result,val);
if(val.type == PROHIBITED ){
throw new ParseException("A prohibited code point was found in the input",
ParseException.PROHIBITED_ERROR,iter.getText(),val.value);
}
direction = UCharacter.getDirection(ch);
if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
if(direction == UCharacterDirection.LEFT_TO_RIGHT){
leftToRight = true;
ltrPos = iter.getIndex()-1;
}
if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
rightToLeft = true;
rtlPos = iter.getIndex()-1;
}
}
if(checkBiDi == true){
// satisfy 2
if( leftToRight == true && rightToLeft == true){
throw new ParseException("The input does not conform to the rules for BiDi code points.",
ParseException.CHECK_BIDI_ERROR,iter.getText(),
(rtlPos>ltrPos) ? rtlPos : ltrPos);
}
//satisfy 3
if( rightToLeft == true &&
!((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
){
throw new ParseException("The input does not conform to the rules for BiDi code points.",
ParseException.CHECK_BIDI_ERROR,iter.getText(),
(rtlPos>ltrPos) ? rtlPos : ltrPos);
}
}
return normOut;
}
}