ICU-3064 StringPrep port take 1
X-SVN-Rev: 12908
This commit is contained in:
parent
869bbeb681
commit
dacfe88ad2
6
.gitattributes
vendored
6
.gitattributes
vendored
@ -56,6 +56,11 @@ icu4c/source/test/testdata/iscii.bin -text
|
||||
icu4c/source/test/testdata/uni-text.bin -text
|
||||
icu4j/src/com/ibm/icu/dev/data/ThaiWordFreq.xls -text
|
||||
icu4j/src/com/ibm/icu/dev/data/holidays_jp.ucs -text
|
||||
icu4j/src/com/ibm/icu/dev/data/nfscis.spp -text
|
||||
icu4j/src/com/ibm/icu/dev/data/nfscsi.spp -text
|
||||
icu4j/src/com/ibm/icu/dev/data/nfscss.spp -text
|
||||
icu4j/src/com/ibm/icu/dev/data/nfsmxp.spp -text
|
||||
icu4j/src/com/ibm/icu/dev/data/nfsmxs.spp -text
|
||||
icu4j/src/com/ibm/icu/dev/data/rbbi/english.dict -text
|
||||
icu4j/src/com/ibm/icu/dev/data/thai6.ucs -text
|
||||
icu4j/src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Asian.txt -text
|
||||
@ -74,6 +79,7 @@ icu4j/src/com/ibm/icu/impl/data/ICULocaleData.jar -text
|
||||
icu4j/src/com/ibm/icu/impl/data/invuca.icu -text
|
||||
icu4j/src/com/ibm/icu/impl/data/pnames.icu -text
|
||||
icu4j/src/com/ibm/icu/impl/data/ucadata.icu -text
|
||||
icu4j/src/com/ibm/icu/impl/data/uidna.spp -text
|
||||
icu4j/src/com/ibm/icu/impl/data/unames.icu -text
|
||||
icu4j/src/com/ibm/icu/impl/data/unorm.icu -text
|
||||
icu4j/src/com/ibm/icu/impl/data/uprops.icu -text
|
||||
|
@ -6,8 +6,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/build.xml,v $
|
||||
* $Date: 2003/07/03 20:48:58 $
|
||||
* $Revision: 1.75 $
|
||||
* $Date: 2003/08/21 23:44:28 $
|
||||
* $Revision: 1.76 $
|
||||
*
|
||||
*******************************************************************************
|
||||
* This is the ant build file for ICU4J. See readme.html for more information.
|
||||
@ -16,34 +16,44 @@
|
||||
|
||||
<project name="ICU4J" default="core" basedir=".">
|
||||
|
||||
<!-- ### begin clover setup ### -->
|
||||
<property name="icu4j.tempdir" value="tmp"/>
|
||||
<property name="clover.initstring" value="${icu4j.tempdir}/icu4j.db"/>
|
||||
<path id="clover.classpath">
|
||||
<!-- ***** you will need to change the paths below to point
|
||||
to clover.jar and velocity.jar on your filesystem ***** -->
|
||||
<pathelement path="${java.class.path}/"/>
|
||||
<pathelement location="clover.jar"/>
|
||||
<pathelement location="velocity.jar"/>
|
||||
</path>
|
||||
<!-- ### Begin Clover 1.2 setup ### -->
|
||||
<typedef resource="clovertypes"/>
|
||||
<taskdef resource="clovertasks"/>
|
||||
|
||||
<!-- target to switch Clover on -->
|
||||
<target name="with.clover">
|
||||
<property name="build.compiler" value="org.apache.tools.ant.taskdefs.CloverCompilerAdapter"/>
|
||||
|
||||
<!-- instruct clover to use a class-based instrumentation strategy when instrumenting SimpleTimeZone.java -->
|
||||
<property name="clover.useclass.includes" value="**/SimpleTimeZone.java"/>
|
||||
<clover-setup initString="tmp/icu4j.db">
|
||||
<files>
|
||||
<exclude name="**/dev/**/*.java"/>
|
||||
</files>
|
||||
</clover-setup>
|
||||
</target>
|
||||
<target name="clover.html" depends="with.clover">
|
||||
<clover-report >
|
||||
<current outfile="icu4j_html" >
|
||||
<format type="html" />
|
||||
</current>
|
||||
</clover-report>
|
||||
</target>
|
||||
<target name="clover.log" depends="with.clover">
|
||||
<clover-log level="method">
|
||||
<package name="com.ibm.icu.text"/>
|
||||
</clover-log>
|
||||
<clover-log level="method">
|
||||
<package name="com.ibm.icu.lang"/>
|
||||
</clover-log>
|
||||
<clover-log level="method">
|
||||
<package name="com.ibm.icu.math"/>
|
||||
</clover-log>
|
||||
<clover-log level="method">
|
||||
<package name="com.ibm.icu.util"/>
|
||||
</clover-log>
|
||||
<clover-log level="method">
|
||||
<package name="com.ibm.icu.impl"/>
|
||||
</clover-log>
|
||||
</target>
|
||||
|
||||
<!-- target to generate a Clover html report -->
|
||||
<target name="clover.report">
|
||||
<java classname="com.cortexeb.tools.clover.reporters.html.HtmlReporter" fork="yes">
|
||||
<arg line="--outputdir ${icu4j.tempdir}/icu4j_html --showSrc --initstring ${clover.initstring} --title 'icu4j core'"/>
|
||||
<classpath refid="clover.classpath"/>
|
||||
</java>
|
||||
</target>
|
||||
<!-- ### end clover setup ### -->
|
||||
|
||||
<!-- ### End Clover 1.2 setup ### -->
|
||||
|
||||
<target name="anthack1">
|
||||
<!-- It's a real pain to set properties conditionally.
|
||||
Ant 1.5 isn't really any better than 1.4, in fact 1.5 enforces that
|
||||
@ -78,7 +88,6 @@
|
||||
|
||||
<path id="build.classpath">
|
||||
<pathelement path="${build.dir}"/>
|
||||
<path refid="clover.classpath"/>
|
||||
</path>
|
||||
|
||||
<property name="richedit.dir" value="richedit"/>
|
||||
@ -121,7 +130,8 @@
|
||||
srcdir="${src.dir}"
|
||||
destdir="${build.dir}"
|
||||
classpathref="build.classpath"
|
||||
debug="on" deprecation="off"/>
|
||||
debug="on" deprecation="off"
|
||||
encoding="ascii"/>
|
||||
</target>
|
||||
|
||||
<target name="tests" depends="core,testData">
|
||||
@ -182,7 +192,7 @@
|
||||
<target name ="coreData" depends="init">
|
||||
<copy todir="${build.dir}/com/ibm/icu/impl/data">
|
||||
<fileset dir="${src.dir}/com/ibm/icu/impl/data"
|
||||
includes="Transliterator_*.txt,*.icu"
|
||||
includes="Transliterator_*.txt,*.icu,*.spp"
|
||||
excludes="**/CVS/**/*,Transliterator_Han_Latin_*.txt"/>
|
||||
</copy>
|
||||
</target>
|
||||
@ -196,6 +206,10 @@
|
||||
destdir="${build.dir}"
|
||||
classpathref="build.classpath"
|
||||
debug="on" deprecation="off"/>
|
||||
<copy todir="${build.dir}/com/ibm/icu/dev/data">
|
||||
<fileset dir="${src.dir}/com/ibm/icu/dev/data"
|
||||
includes="*.spp"/>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
<!-- builds richedit and richedit tests -->
|
||||
|
BIN
icu4j/src/com/ibm/icu/dev/data/nfscis.spp
Normal file
BIN
icu4j/src/com/ibm/icu/dev/data/nfscis.spp
Normal file
Binary file not shown.
BIN
icu4j/src/com/ibm/icu/dev/data/nfscsi.spp
Normal file
BIN
icu4j/src/com/ibm/icu/dev/data/nfscsi.spp
Normal file
Binary file not shown.
BIN
icu4j/src/com/ibm/icu/dev/data/nfscss.spp
Normal file
BIN
icu4j/src/com/ibm/icu/dev/data/nfscss.spp
Normal file
Binary file not shown.
BIN
icu4j/src/com/ibm/icu/dev/data/nfsmxp.spp
Normal file
BIN
icu4j/src/com/ibm/icu/dev/data/nfsmxp.spp
Normal file
Binary file not shown.
BIN
icu4j/src/com/ibm/icu/dev/data/nfsmxs.spp
Normal file
BIN
icu4j/src/com/ibm/icu/dev/data/nfsmxs.spp
Normal file
Binary file not shown.
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestFmwk.java,v $
|
||||
* $Date: 2003/06/11 18:27:08 $
|
||||
* $Revision: 1.47 $
|
||||
* $Date: 2003/08/21 23:42:03 $
|
||||
* $Revision: 1.48 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
@ -820,7 +820,23 @@ public class TestFmwk extends AbstractTestLog {
|
||||
public static String hex(StringBuffer s) {
|
||||
return hex(s.toString());
|
||||
}
|
||||
|
||||
public static String prettify(String s) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char ch =s.charAt(i);
|
||||
if(ch > 0x7f){
|
||||
result.append("\\u");
|
||||
result.append(hex(ch));
|
||||
}else{
|
||||
result.append(ch);
|
||||
}
|
||||
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
public static String prettify(StringBuffer s) {
|
||||
return prettify(s.toString());
|
||||
}
|
||||
private static class ASCIIWriter extends PrintWriter {
|
||||
private Writer w;
|
||||
private StringBuffer buffer = new StringBuffer();
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestUtil.java,v $
|
||||
* $Date: 2003/06/03 18:49:28 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2003/08/21 23:42:03 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -14,8 +14,10 @@ package com.ibm.icu.dev.test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class TestUtil {
|
||||
/**
|
||||
@ -86,6 +88,14 @@ public class TestUtil {
|
||||
public static final BufferedReader getDataReader(String name) throws IOException {
|
||||
return getDataReader(name, 1024);
|
||||
}
|
||||
/**
|
||||
* Return an input stream on the data file at path 'name' rooted at the data path
|
||||
*/
|
||||
public static final InputStream getDataStream(String name) throws IOException{
|
||||
File file = getDataFile(name);
|
||||
FileInputStream st = new FileInputStream(file);
|
||||
return st;
|
||||
}
|
||||
static final char DIGITS[] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
|
408
icu4j/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java
Normal file
408
icu4j/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java
Normal file
@ -0,0 +1,408 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class IDNAReference {
|
||||
|
||||
private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
|
||||
private static final int ACE_PREFIX_LENGTH = 4;
|
||||
|
||||
private static final int MAX_LABEL_LENGTH = 63;
|
||||
private static final int HYPHEN = 0x002D;
|
||||
private static final int CAPITAL_A = 0x0041;
|
||||
private static final int CAPITAL_Z = 0x005A;
|
||||
private static final int LOWER_CASE_DELTA = 0x0020;
|
||||
private static final int FULL_STOP = 0x002E;
|
||||
|
||||
|
||||
public static final int DEFAULT = 0x0000;
|
||||
public static final int ALLOW_UNASSIGNED = 0x0001;
|
||||
public static final int USE_STD3_RULES = 0x0002;
|
||||
public static final NamePrepTransform transform = NamePrepTransform.getInstance();
|
||||
|
||||
private static boolean startsWithPrefix(StringBuffer src){
|
||||
boolean startsWithPrefix = true;
|
||||
|
||||
if(src.length() < ACE_PREFIX_LENGTH){
|
||||
return false;
|
||||
}
|
||||
for(int i=0; i<ACE_PREFIX_LENGTH;i++){
|
||||
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
|
||||
startsWithPrefix = false;
|
||||
}
|
||||
}
|
||||
return startsWithPrefix;
|
||||
}
|
||||
|
||||
private static char toASCIILower(char ch){
|
||||
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
|
||||
return (char)(ch + LOWER_CASE_DELTA);
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
private static StringBuffer toASCIILower(StringBuffer src){
|
||||
StringBuffer dest = new StringBuffer();
|
||||
for(int i=0; i<src.length();i++){
|
||||
dest.append(toASCIILower(src.charAt(i)));
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
|
||||
char c1,c2;
|
||||
int rc;
|
||||
for(int i =0;/* no condition */;i++) {
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(i == s1.length()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
c1 = s1.charAt(i);
|
||||
c2 = s2.charAt(i);
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
if(c1!=c2) {
|
||||
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int getSeparatorIndex(char[] src,int start, int limit){
|
||||
for(; start<limit;start++){
|
||||
if(NamePrepTransform.isLabelSeparator(src[start])){
|
||||
return start;
|
||||
}
|
||||
}
|
||||
// we have not found the separator just return length
|
||||
return start;
|
||||
}
|
||||
|
||||
private static boolean isLDHChar(int ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return false;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static StringBuffer convertToASCII(String src, int options)
|
||||
throws ParseException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
public static StringBuffer convertToASCII(StringBuffer src, int options)
|
||||
throws ParseException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
|
||||
throws ParseException{
|
||||
|
||||
char[] caseFlags = null;
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
boolean srcIsASCII = true;
|
||||
// assume the source contains all LDH codepoints
|
||||
boolean srcIsLDH = true;
|
||||
|
||||
//get the options
|
||||
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
|
||||
|
||||
int failPos = -1;
|
||||
// step 2
|
||||
//StringPrep prep = StringPrep.getNameprepInstance();
|
||||
StringBuffer processOut = transform.prepare(srcIter,options);
|
||||
int poLen = processOut.length();
|
||||
StringBuffer dest = new StringBuffer();
|
||||
// step 3 & 4
|
||||
for(int j=0;j<poLen;j++ ){
|
||||
char ch=processOut.charAt(j);
|
||||
if(ch > 0x7F){
|
||||
srcIsASCII = false;
|
||||
}
|
||||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(isLDHChar(ch)==false){
|
||||
srcIsLDH = false;
|
||||
failPos = j;
|
||||
}
|
||||
}
|
||||
|
||||
if(useSTD3ASCIIRules == true){
|
||||
// verify 3a and 3b
|
||||
if( srcIsLDH == false /* source contains some non-LDH characters */
|
||||
|| processOut.charAt(0) == HYPHEN
|
||||
|| processOut.charAt(processOut.length()-1) == HYPHEN){
|
||||
|
||||
/* populate the parseError struct */
|
||||
if(srcIsLDH==false){
|
||||
throw new ParseException( "The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
(failPos>0) ? (failPos-1) : failPos);
|
||||
}else if(processOut.charAt(0) == HYPHEN){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
|
||||
|
||||
}else{
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
(poLen>0) ? poLen-1 : poLen);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if(srcIsASCII){
|
||||
dest = processOut;
|
||||
}else{
|
||||
// step 5 : verify the sequence does not begin with ACE prefix
|
||||
if(!startsWithPrefix(processOut)){
|
||||
|
||||
//step 6: encode the sequence with punycode
|
||||
StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
|
||||
|
||||
// convert all codepoints to lower case ASCII
|
||||
StringBuffer lowerOut = toASCIILower(punyout);
|
||||
|
||||
//Step 7: prepend the ACE prefix
|
||||
dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
|
||||
//Step 6: copy the contents in b2 into dest
|
||||
dest.append(lowerOut);
|
||||
}else{
|
||||
throw new ParseException("The input does not start with the ACE Prefix.",
|
||||
ParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
|
||||
}
|
||||
}
|
||||
if(dest.length() > MAX_LABEL_LENGTH){
|
||||
throw new ParseException("The labels in the input are too long. Length > 64.",
|
||||
ParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
|
||||
throws ParseException{
|
||||
return convertIDNToASCII(iter.getText(), options);
|
||||
}
|
||||
public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
|
||||
throws ParseException{
|
||||
return convertIDNToASCII(str.toString(), options);
|
||||
}
|
||||
public static StringBuffer convertIDNToASCII(String src,int options)
|
||||
throws ParseException{
|
||||
char[] srcArr = src.toCharArray();
|
||||
StringBuffer result = new StringBuffer();
|
||||
int sepIndex=0;
|
||||
int oldSepIndex = 0;
|
||||
for(;;){
|
||||
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
|
||||
result.append(convertToASCII(iter,options));
|
||||
if(sepIndex==srcArr.length){
|
||||
break;
|
||||
}
|
||||
// increment the sepIndex to skip past the separator
|
||||
sepIndex++;
|
||||
oldSepIndex = sepIndex;
|
||||
result.append((char)FULL_STOP);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static StringBuffer convertToUnicode(String src, int options)
|
||||
throws ParseException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
public static StringBuffer convertToUnicode(StringBuffer src, int options)
|
||||
throws ParseException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
|
||||
throws ParseException{
|
||||
|
||||
char[] caseFlags = null;
|
||||
|
||||
//get the options
|
||||
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
boolean srcIsASCII = true;
|
||||
// assume the source contains all LDH codepoints
|
||||
boolean srcIsLDH = true;
|
||||
|
||||
int failPos = -1;
|
||||
int ch;
|
||||
int saveIndex = iter.getIndex();
|
||||
// step 1: find out if all the codepoints in src are ASCII
|
||||
while((ch=iter.next())!= UCharacterIterator.DONE){
|
||||
if(ch>0x7F){
|
||||
srcIsASCII = false;
|
||||
}
|
||||
if((srcIsLDH = isLDHChar(ch))==false){
|
||||
failPos = iter.getIndex();
|
||||
}
|
||||
}
|
||||
StringBuffer processOut;
|
||||
|
||||
if(srcIsASCII == false){
|
||||
// step 2: process the string
|
||||
iter.setIndex(saveIndex);
|
||||
processOut = transform.prepare(iter,options);
|
||||
|
||||
}else{
|
||||
//just point to source
|
||||
processOut = new StringBuffer(iter.getText());
|
||||
}
|
||||
// TODO:
|
||||
// The RFC states that
|
||||
// <quote>
|
||||
// ToUnicode never fails. If any step fails, then the original input
|
||||
// is returned immediately in that step.
|
||||
// </quote>
|
||||
|
||||
//step 3: verify ACE Prefix
|
||||
if(startsWithPrefix(processOut)){
|
||||
|
||||
//step 4: Remove the ACE Prefix
|
||||
String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());
|
||||
|
||||
//step 5: Decode using punycode
|
||||
StringBuffer decodeOut = PunycodeReference.decode(new StringBuffer(temp),caseFlags);
|
||||
|
||||
//step 6:Apply toASCII
|
||||
StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
|
||||
|
||||
//step 7: verify
|
||||
if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
|
||||
throw new ParseException("The verification step prescribed by the RFC 3491 failed",
|
||||
ParseException.VERIFICATION_ERROR);
|
||||
}
|
||||
|
||||
//step 8: return output of step 5
|
||||
return decodeOut;
|
||||
|
||||
}else{
|
||||
// verify that STD3 ASCII rules are satisfied
|
||||
if(useSTD3ASCIIRules == true){
|
||||
if( srcIsLDH == false /* source contains some non-LDH characters */
|
||||
|| processOut.charAt(0) == HYPHEN
|
||||
|| processOut.charAt(processOut.length()-1) == HYPHEN){
|
||||
|
||||
if(srcIsLDH==false){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
|
||||
(failPos>0) ? (failPos-1) : failPos);
|
||||
}else if(processOut.charAt(0) == HYPHEN){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),0);
|
||||
|
||||
}else{
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
processOut.length());
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
// just return the source
|
||||
return new StringBuffer(iter.getText());
|
||||
}
|
||||
}
|
||||
public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
|
||||
throws ParseException{
|
||||
return convertIDNToUnicode(iter.getText(), options);
|
||||
}
|
||||
public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
|
||||
throws ParseException{
|
||||
return convertIDNToUnicode(str.toString(), options);
|
||||
}
|
||||
public static StringBuffer convertIDNToUnicode(String src, int options)
|
||||
throws ParseException{
|
||||
|
||||
char[] srcArr = src.toCharArray();
|
||||
StringBuffer result = new StringBuffer();
|
||||
int sepIndex=0;
|
||||
int oldSepIndex=0;
|
||||
for(;;){
|
||||
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
|
||||
result.append(convertToUnicode(iter,options));
|
||||
if(sepIndex==srcArr.length){
|
||||
break;
|
||||
}
|
||||
// increment the sepIndex to skip past the separator
|
||||
sepIndex++;
|
||||
oldSepIndex = sepIndex;
|
||||
result.append((char)FULL_STOP);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
// TODO: optimize
|
||||
public static int compare(StringBuffer s1, StringBuffer s2, int options)
|
||||
throws ParseException{
|
||||
if(s1==null || s2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
|
||||
StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
// TODO: optimize
|
||||
public static int compare(String s1, String s2, int options)
|
||||
throws ParseException{
|
||||
if(s1==null || s2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(s1, options);
|
||||
StringBuffer s2Out = convertIDNToASCII(s2, options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
// TODO: optimize
|
||||
public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
|
||||
throws ParseException{
|
||||
if(i1==null || i2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
|
||||
StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
|
||||
}
|
1538
icu4j/src/com/ibm/icu/dev/test/stringprep/IDNA_rules.java
Normal file
1538
icu4j/src/com/ibm/icu/dev/test/stringprep/IDNA_rules.java
Normal file
File diff suppressed because it is too large
Load Diff
173
icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java
Normal file
173
icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
import com.ibm.icu.stringprep.StringPrep;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* This is a dumb implementation of NFS4 profiles. It is a direct port of
|
||||
* C code, does not use Object Oriented principles. Quick and Dirty implementation
|
||||
* for testing.
|
||||
*/
|
||||
public final class NFS4StringPrep {
|
||||
private static final String[] NFS4DataFileNames ={
|
||||
"nfscss.spp",
|
||||
"nfscsi.spp",
|
||||
"nfscis.spp",
|
||||
"nfsmxp.spp",
|
||||
"nfsmxs.spp"
|
||||
};
|
||||
private StringPrep nfscss = null;
|
||||
private StringPrep nfscsi = null;
|
||||
private StringPrep nfscis = null;
|
||||
private StringPrep nfsmxp = null;
|
||||
private StringPrep nfsmxs = null;
|
||||
//singleton instance
|
||||
private static NFS4StringPrep prep = null;
|
||||
|
||||
// we donot synchronize the constructor because we
|
||||
// know that the constructor is only called from
|
||||
// getInstance method if and only if the the singleton
|
||||
// intance is null, which means this constructor is called
|
||||
// only once
|
||||
private NFS4StringPrep ()throws IOException{
|
||||
|
||||
InputStream nfscssFile = TestUtil.getDataStream(NFS4DataFileNames[0]);
|
||||
nfscss = StringPrep.getInstance(nfscssFile);
|
||||
nfscssFile.close();
|
||||
|
||||
InputStream nfscsiFile = TestUtil.getDataStream(NFS4DataFileNames[1]);
|
||||
nfscsi = StringPrep.getInstance(nfscsiFile);
|
||||
nfscsiFile.close();
|
||||
|
||||
InputStream nfscisFile = TestUtil.getDataStream(NFS4DataFileNames[2]);
|
||||
nfscis = StringPrep.getInstance(nfscisFile);
|
||||
nfscsiFile.close();
|
||||
|
||||
InputStream nfsmxpFile = TestUtil.getDataStream(NFS4DataFileNames[3]);
|
||||
nfsmxp = StringPrep.getInstance(nfsmxpFile);
|
||||
nfscsiFile.close();
|
||||
|
||||
InputStream nfsmxsFile = TestUtil.getDataStream(NFS4DataFileNames[4]);
|
||||
nfsmxs = StringPrep.getInstance(nfsmxsFile);
|
||||
nfsmxsFile.close();
|
||||
|
||||
}
|
||||
|
||||
public static synchronized final NFS4StringPrep getInstance()
|
||||
throws IOException{
|
||||
if(prep==null){
|
||||
prep = new NFS4StringPrep();
|
||||
}
|
||||
return prep;
|
||||
}
|
||||
|
||||
private static byte[] prepare(byte[] src, StringPrep prep)
|
||||
throws ParseException, UnsupportedEncodingException{
|
||||
String s = new String(src, "UTF-8");
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(s);
|
||||
StringBuffer out = prep.prepare(iter,StringPrep.NONE);
|
||||
return out.toString().getBytes("UTF-8");
|
||||
}
|
||||
|
||||
public static byte[] cs_prepare(byte[] src, boolean caseInsensitive)
|
||||
throws IOException, ParseException, UnsupportedEncodingException{
|
||||
NFS4StringPrep prep = getInstance();
|
||||
if(caseInsensitive){
|
||||
return prepare(src, prep.nfscsi);
|
||||
}else{
|
||||
return prepare(src,prep.nfscsi);
|
||||
}
|
||||
}
|
||||
|
||||
public static byte[] cis_prepare(byte[] src)
|
||||
throws IOException, ParseException, UnsupportedEncodingException{
|
||||
NFS4StringPrep prep = getInstance();
|
||||
return prepare(src, prep.nfscis);
|
||||
}
|
||||
|
||||
/* sorted array for binary search*/
|
||||
private static final String[] special_prefixes={
|
||||
"ANONYMOUS",
|
||||
"AUTHENTICATED",
|
||||
"BATCH",
|
||||
"DIALUP",
|
||||
"EVERYONE",
|
||||
"GROUP",
|
||||
"INTERACTIVE",
|
||||
"NETWORK",
|
||||
"OWNER",
|
||||
};
|
||||
|
||||
|
||||
/* binary search the sorted array */
|
||||
private static final int findStringIndex(String[] sortedArr,String target){
|
||||
|
||||
int left, middle, right,rc;
|
||||
|
||||
left =0;
|
||||
right= sortedArr.length-1;
|
||||
|
||||
while(left <= right){
|
||||
middle = (left+right)/2;
|
||||
rc= sortedArr[middle].compareTo(target);
|
||||
|
||||
if(rc<0){
|
||||
left = middle+1;
|
||||
}else if(rc >0){
|
||||
right = middle -1;
|
||||
}else{
|
||||
return middle;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
private static final char AT_SIGN = '@';
|
||||
|
||||
public static byte[] mixed_prepare(byte[] src)
|
||||
throws IOException, ParseException, UnsupportedEncodingException{
|
||||
String s = new String(src, "UTF-8");
|
||||
int index = s.indexOf(AT_SIGN);
|
||||
StringBuffer out = new StringBuffer();
|
||||
NFS4StringPrep prep = getInstance();
|
||||
if(index > -1){
|
||||
/* special prefixes must not be followed by suffixes! */
|
||||
String prefixString = s.substring(0,index);
|
||||
int i= findStringIndex(special_prefixes, prefixString);
|
||||
String suffixString = s.substring(index+1, s.length());
|
||||
if(i>-1 && !suffixString.equals("")){
|
||||
throw new ParseException("Suffix following a special index", ParseException.INVALID_CHAR_FOUND);
|
||||
}
|
||||
UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
|
||||
UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
|
||||
out.append(prep.nfsmxp.prepare(prefix,StringPrep.NONE));
|
||||
out.append(AT_SIGN); // add the delimiter
|
||||
out.append(prep.nfsmxs.prepare(suffix, StringPrep.NONE));
|
||||
}else{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(s);
|
||||
out.append(prep.nfsmxp.prepare(iter,StringPrep.NONE));
|
||||
|
||||
}
|
||||
return out.toString().getBytes("UTF-8");
|
||||
}
|
||||
|
||||
}
|
172
icu4j/src/com/ibm/icu/dev/test/stringprep/NamePrepTransform.java
Normal file
172
icu4j/src/com/ibm/icu/dev/test/stringprep/NamePrepTransform.java
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NamePrepTransform.java,v $
|
||||
* $Date: 2003/08/21 23:42:21 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
import com.ibm.icu.impl.ICULocaleData;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class NamePrepTransform {
|
||||
|
||||
private static final NamePrepTransform transform = new NamePrepTransform();
|
||||
|
||||
private UnicodeSet labelSeparatorSet;
|
||||
private UnicodeSet prohibitedSet;
|
||||
private UnicodeSet unassignedSet;
|
||||
private Transliterator mapTransform;
|
||||
public static final int NONE = 0;
|
||||
public static final int ALLOW_UNASSIGNED = 1;
|
||||
|
||||
private NamePrepTransform(){
|
||||
// load the resource bundle
|
||||
ResourceBundle bundle = ICULocaleData.getResourceBundle("com.ibm.icu.dev.test.stringprep","IDNA","rules");
|
||||
String mapRules = bundle.getString("Map");
|
||||
mapRules += bundle.getString("CaseMap");
|
||||
mapTransform = Transliterator.createFromRules("CaseMap",mapRules,Transliterator.FORWARD);
|
||||
labelSeparatorSet = new UnicodeSet(bundle.getString("LabelSeparatorSet"));
|
||||
prohibitedSet = new UnicodeSet(bundle.getString("ProhibitedSet"));
|
||||
unassignedSet = new UnicodeSet(bundle.getString("UnassignedSet"));
|
||||
}
|
||||
|
||||
public static final NamePrepTransform getInstance(){
|
||||
return transform;
|
||||
}
|
||||
public static boolean isLabelSeparator(int ch){
|
||||
return transform.labelSeparatorSet.contains(ch);
|
||||
}
|
||||
|
||||
/*
|
||||
1) Map -- For each character in the input, check if it has a mapping
|
||||
and, if so, replace it with its mapping.
|
||||
|
||||
2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
normalization.
|
||||
|
||||
3) Prohibit -- Check for any characters that are not allowed in the
|
||||
output. If any are found, return an error.
|
||||
|
||||
4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
any are found, make sure that the whole string satisfies the
|
||||
requirements for bidirectional strings. If the string does not
|
||||
satisfy the requirements for bidirectional strings, return an
|
||||
error.
|
||||
[Unicode3.2] defines several bidirectional categories; each character
|
||||
has one bidirectional category assigned to it. For the purposes of
|
||||
the requirements below, an "RandALCat character" is a character that
|
||||
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
|
||||
is a character that has Unicode bidirectional category "L". Note
|
||||
|
||||
|
||||
that there are many characters which fall in neither of the above
|
||||
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
|
||||
this because they have bidirectional category "EN".
|
||||
|
||||
In any profile that specifies bidirectional character handling, all
|
||||
three of the following requirements MUST be met:
|
||||
|
||||
1) The characters in section 5.8 MUST be prohibited.
|
||||
|
||||
2) If a string contains any RandALCat character, the string MUST NOT
|
||||
contain any LCat character.
|
||||
|
||||
3) If a string contains any RandALCat character, a RandALCat
|
||||
character MUST be the first character of the string, and a
|
||||
RandALCat character MUST be the last character of the string.
|
||||
*/
|
||||
public StringBuffer prepare(UCharacterIterator src,
|
||||
int options)
|
||||
throws ParseException{
|
||||
return prepare(src.getText(),options);
|
||||
}
|
||||
private String map ( String src, int options)
|
||||
throws ParseException{
|
||||
// map
|
||||
boolean allowUnassigned = (boolean) ((options & ALLOW_UNASSIGNED)>0);
|
||||
String caseMapOut = transform.mapTransform.transliterate(src);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(caseMapOut);
|
||||
int ch;
|
||||
while((ch=iter.nextCodePoint())!=UCharacterIterator.DONE){
|
||||
if(transform.unassignedSet.contains(ch)==true && allowUnassigned ==false){
|
||||
throw new ParseException("An unassigned code point was found in the input",
|
||||
ParseException.UNASSIGNED_ERROR);
|
||||
}
|
||||
}
|
||||
return caseMapOut;
|
||||
}
|
||||
public StringBuffer prepare(String src,int options)
|
||||
throws ParseException{
|
||||
|
||||
int ch;
|
||||
String mapOut = map(src,options);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(mapOut);
|
||||
|
||||
int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
|
||||
firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
|
||||
int rtlPos=-1, ltrPos=-1;
|
||||
boolean rightToLeft=false, leftToRight=false;
|
||||
|
||||
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
|
||||
|
||||
|
||||
if(transform.prohibitedSet.contains(ch)==true){
|
||||
throw new ParseException("A prohibited code point was found in the input",
|
||||
ParseException.PROHIBITED_ERROR,
|
||||
iter.getText(),iter.getIndex());
|
||||
}
|
||||
|
||||
direction = UCharacter.getDirection(ch);
|
||||
if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
if(direction == UCharacterDirection.LEFT_TO_RIGHT){
|
||||
leftToRight = true;
|
||||
ltrPos = iter.getIndex()-1;
|
||||
}
|
||||
if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
|
||||
rightToLeft = true;
|
||||
rtlPos = iter.getIndex()-1;
|
||||
}
|
||||
}
|
||||
|
||||
// satisfy 2
|
||||
if( leftToRight == true && rightToLeft == true){
|
||||
throw new ParseException("The input does not conform to the rules for BiDi code points.",
|
||||
ParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
|
||||
}
|
||||
|
||||
//satisfy 3
|
||||
if( rightToLeft == true &&
|
||||
!((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
|
||||
(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
|
||||
){
|
||||
throw new ParseException("The input does not conform to the rules for BiDi code points.",
|
||||
ParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
|
||||
}
|
||||
|
||||
return new StringBuffer(mapOut);
|
||||
|
||||
}
|
||||
|
||||
}
|
388
icu4j/src/com/ibm/icu/dev/test/stringprep/PunycodeReference.java
Normal file
388
icu4j/src/com/ibm/icu/dev/test/stringprep/PunycodeReference.java
Normal file
@ -0,0 +1,388 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/PunycodeReference.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
Disclaimer and license
|
||||
|
||||
Regarding this entire document or any portion of it (including
|
||||
the pseudocode and C code), the author makes no guarantees and
|
||||
is not responsible for any damage resulting from its use. The
|
||||
author grants irrevocable permission to anyone to use, modify,
|
||||
and distribute it in any way that does not diminish the rights
|
||||
of anyone else to use, modify, and distribute it, provided that
|
||||
redistributed derivative works do not contain misleading author or
|
||||
version information. Derivative works need not be licensed under
|
||||
similar terms.
|
||||
|
||||
punycode.c 0.4.0 (2001-Nov-17-Sat)
|
||||
http://www.cs.berkeley.edu/~amc/idn/
|
||||
Adam M. Costello
|
||||
http://www.nicemice.net/amc/
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* The implementation is direct port of C code in the RFC
|
||||
*/
|
||||
|
||||
public final class PunycodeReference {
|
||||
/*** punycode status codes */
|
||||
public static final int punycode_success=0;
|
||||
public static final int punycode_bad_input=1; /* Input is invalid. */
|
||||
public static final int punycode_big_output=2; /* Output would exceed the space provided. */
|
||||
public static final int punycode_overflow =3; /* Input needs wider integers to process. */
|
||||
|
||||
/*** Bootstring parameters for Punycode ***/
|
||||
private static final int base = 36;
|
||||
private static final int tmin = 1;
|
||||
private static final int tmax = 26;
|
||||
private static final int skew = 38;
|
||||
private static final int damp = 700;
|
||||
private static final int initial_bias = 72;
|
||||
private static final int initial_n = 0x80;
|
||||
private static final int delimiter = 0x2D;
|
||||
|
||||
|
||||
private static final long UNSIGNED_INT_MASK = 0xffffffffL;
|
||||
|
||||
/* basic(cp) tests whether cp is a basic code point: */
|
||||
private static boolean basic(int cp){
|
||||
return (char)(cp) < 0x80;
|
||||
}
|
||||
|
||||
/* delim(cp) tests whether cp is a delimiter: */
|
||||
private static boolean delim(int cp){
|
||||
return ((cp) == delimiter);
|
||||
}
|
||||
|
||||
/* decode_digit(cp) returns the numeric value of a basic code */
|
||||
/* point (for use in representing integers) in the range 0 to */
|
||||
/* base-1, or base if cp is does not represent a value. */
|
||||
|
||||
private static int decode_digit(int cp)
|
||||
{
|
||||
return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
|
||||
cp - 97 < 26 ? cp - 97 : base;
|
||||
}
|
||||
|
||||
/* encode_digit(d,flag) returns the basic code point whose value */
|
||||
/* (when used for representing integers) is d, which needs to be in */
|
||||
/* the range 0 to base-1. The lowercase form is used unless flag is */
|
||||
/* nonzero, in which case the uppercase form is used. The behavior */
|
||||
/* is undefined if flag is nonzero and digit d has no uppercase form. */
|
||||
|
||||
private static char encode_digit(int d, int flag)
|
||||
{
|
||||
return (char) (d + 22 + (75 * ((d < 26) ? 1 : 0) - (((flag != 0) ? 1 :0) << 5)));
|
||||
/* 0..25 map to ASCII a..z or A..Z */
|
||||
/* 26..35 map to ASCII 0..9 */
|
||||
}
|
||||
|
||||
/* flagged(bcp) tests whether a basic code point is flagged */
|
||||
/* (uppercase). The behavior is undefined if bcp is not a */
|
||||
/* basic code point. */
|
||||
|
||||
private static boolean flagged(int bcp){
|
||||
return ((bcp) - 65 < 26);
|
||||
}
|
||||
|
||||
/* encode_basic(bcp,flag) forces a basic code point to lowercase */
|
||||
/* if flag is zero, uppercase if flag is nonzero, and returns */
|
||||
/* the resulting code point. The code point is unchanged if it */
|
||||
/* is caseless. The behavior is undefined if bcp is not a basic */
|
||||
/* code point. */
|
||||
|
||||
private static char encode_basic(int bcp, int flag)
|
||||
{
|
||||
bcp -= (((bcp - 97) < 26) ? 1 :0 ) << 5;
|
||||
boolean mybcp = (bcp - 65 < 26);
|
||||
return (char) (bcp + (((flag==0) && mybcp ) ? 1 : 0 ) << 5);
|
||||
}
|
||||
|
||||
/*** Platform-specific constants ***/
|
||||
|
||||
/* maxint is the maximum value of a punycode_uint variable: */
|
||||
private static long maxint = 0xFFFFFFFFL;
|
||||
/* Because maxint is unsigned, -1 becomes the maximum value. */
|
||||
|
||||
/*** Bias adaptation function ***/
|
||||
|
||||
private static int adapt(int delta, int numpoints, boolean firsttime ){
|
||||
int k;
|
||||
|
||||
delta = (firsttime==true) ? delta / damp : delta >> 1;
|
||||
/* delta >> 1 is a faster way of doing delta / 2 */
|
||||
delta += delta / numpoints;
|
||||
|
||||
for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
|
||||
delta /= base - tmin;
|
||||
}
|
||||
|
||||
return k + (base - tmin + 1) * delta / (delta + skew);
|
||||
}
|
||||
|
||||
/*** Main encode function ***/
|
||||
|
||||
public static final int encode( int input_length,
|
||||
int input[],
|
||||
char[] case_flags,
|
||||
int[] output_length,
|
||||
char output[] ){
|
||||
int delta, h, b, out, max_out, bias, j, q, k, t;
|
||||
long m,n;
|
||||
/* Initialize the state: */
|
||||
|
||||
n = initial_n;
|
||||
delta = out = 0;
|
||||
max_out = output_length[0];
|
||||
bias = initial_bias;
|
||||
|
||||
/* Handle the basic code points: */
|
||||
|
||||
for (j = 0; j < input_length; ++j) {
|
||||
if (basic(input[j])) {
|
||||
if (max_out - out < 2) return punycode_big_output;
|
||||
output[out++] = (char)
|
||||
(case_flags!=null ? encode_basic(input[j], case_flags[j]) : input[j]);
|
||||
}
|
||||
/* else if (input[j] < n) return punycode_bad_input; */
|
||||
/* (not needed for Punycode with unsigned code points) */
|
||||
}
|
||||
|
||||
h = b = out;
|
||||
|
||||
/* h is the number of code points that have been handled, b is the */
|
||||
/* number of basic code points, and out is the number of characters */
|
||||
/* that have been output. */
|
||||
|
||||
if (b > 0) output[out++] = delimiter;
|
||||
|
||||
/* Main encoding loop: */
|
||||
|
||||
while (h < input_length) {
|
||||
/* All non-basic code points < n have been */
|
||||
/* handled already. Find the next larger one: */
|
||||
|
||||
for (m = maxint, j = 0; j < input_length; ++j) {
|
||||
/* if (basic(input[j])) continue; */
|
||||
/* (not needed for Punycode) */
|
||||
if (input[j] >= n && input[j] < m) m = input[j];
|
||||
}
|
||||
|
||||
/* Increase delta enough to advance the decoder's */
|
||||
/* <n,i> state to <m,0>, but guard against overflow: */
|
||||
|
||||
if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
|
||||
delta += (m - n) * (h + 1);
|
||||
n = m;
|
||||
|
||||
for (j = 0; j < input_length; ++j) {
|
||||
/* Punycode does not need to check whether input[j] is basic: */
|
||||
if (input[j] < n /* || basic(input[j]) */ ) {
|
||||
if (++delta == 0) return punycode_overflow;
|
||||
}
|
||||
|
||||
if (input[j] == n) {
|
||||
/* Represent delta as a generalized variable-length integer: */
|
||||
|
||||
for (q = delta, k = base; ; k += base) {
|
||||
if (out >= max_out) return punycode_big_output;
|
||||
t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
|
||||
k >= bias + tmax ? tmax : k - bias;
|
||||
if (q < t) break;
|
||||
output[out++] = encode_digit(t + (q - t) % (base - t), 0);
|
||||
q = (q - t) / (base - t);
|
||||
}
|
||||
|
||||
output[out++] = encode_digit(q, (case_flags !=null) ? case_flags[j] : 0);
|
||||
bias = adapt(delta, h + 1, (h == b));
|
||||
delta = 0;
|
||||
++h;
|
||||
}
|
||||
}
|
||||
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
|
||||
output_length[0] = out;
|
||||
return punycode_success;
|
||||
}
|
||||
|
||||
public static final StringBuffer encode(StringBuffer input,char[] case_flags)
|
||||
throws ParseException{
|
||||
int[] in = new int[input.length()];
|
||||
int inLen = 0;
|
||||
int ch;
|
||||
StringBuffer result = new StringBuffer();
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(input);
|
||||
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
|
||||
in[inLen++]=ch;
|
||||
}
|
||||
|
||||
int[] outLen = new int[1];
|
||||
outLen[0] = input.length()*4;
|
||||
char[] output = new char[outLen[0]];
|
||||
int rc = punycode_success;
|
||||
for(;;){
|
||||
rc = encode(inLen,in,case_flags, outLen, output);
|
||||
if(rc==punycode_big_output){
|
||||
outLen[0] = outLen[0]*4;
|
||||
output = new char[outLen[0]];
|
||||
// continue to convert
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if(rc==punycode_success){
|
||||
return result.append(output,0,outLen[0]);
|
||||
}
|
||||
getException(rc);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void getException(int rc)
|
||||
throws ParseException{
|
||||
switch(rc){
|
||||
case punycode_big_output:
|
||||
throw new ParseException("The output capacity was not sufficient.",ParseException.BUFFER_OVERFLOW_ERROR);
|
||||
case punycode_bad_input:
|
||||
throw new ParseException("Illegal char found in the input",ParseException.ILLEGAL_CHAR_FOUND);
|
||||
case punycode_overflow:
|
||||
throw new ParseException("Invalid char found in the input",ParseException.INVALID_CHAR_FOUND);
|
||||
}
|
||||
|
||||
}
|
||||
private static final int MAX_BUFFER_SIZE = 100;
|
||||
|
||||
public static final StringBuffer decode(StringBuffer input,char[] case_flags)
|
||||
throws ParseException{
|
||||
char[] in = input.toString().toCharArray();
|
||||
int[] outLen = new int[1];
|
||||
outLen[0] = MAX_BUFFER_SIZE;
|
||||
int[] output = new int[outLen[0]];
|
||||
int rc = punycode_success;
|
||||
StringBuffer result = new StringBuffer();
|
||||
for(;;){
|
||||
rc = decode(input.length(),in, outLen, output,case_flags);
|
||||
if(rc==punycode_big_output){
|
||||
outLen[0] = output.length * 4;
|
||||
output = new int[outLen[0]];
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if(rc==punycode_success){
|
||||
for(int i=0; i < outLen[0]; i++ ){
|
||||
UTF16.append(result,output[i]);
|
||||
}
|
||||
}else{
|
||||
getException(rc);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*** Main decode function ***/
|
||||
public static final int decode(int input_length,
|
||||
char[] input,
|
||||
int[] output_length,
|
||||
int[] output,
|
||||
char[] case_flags ){
|
||||
int n, out, i, max_out, bias,
|
||||
b, j, in, oldi, w, k, digit, t;
|
||||
|
||||
/* Initialize the state: */
|
||||
|
||||
n = initial_n;
|
||||
out = i = 0;
|
||||
max_out = output_length[0];
|
||||
bias = initial_bias;
|
||||
|
||||
/* Handle the basic code points: Let b be the number of input code */
|
||||
/* points before the last delimiter, or 0 if there is none, then */
|
||||
/* copy the first b code points to the output. */
|
||||
|
||||
for (b = j = 0; j < input_length; ++j){
|
||||
if (delim(input[j])==true){
|
||||
b = j;
|
||||
}
|
||||
}
|
||||
if (b > max_out) return punycode_big_output;
|
||||
|
||||
for (j = 0; j < b; ++j) {
|
||||
if (case_flags != null) case_flags[out] = (char)(flagged(input[j]) ? 1 : 0);
|
||||
if (!basic(input[j])) return punycode_bad_input;
|
||||
output[out++] = input[j];
|
||||
}
|
||||
|
||||
/* Main decoding loop: Start just after the last delimiter if any */
|
||||
/* basic code points were copied; start at the beginning otherwise. */
|
||||
|
||||
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
|
||||
|
||||
/* in is the index of the next character to be consumed, and */
|
||||
/* out is the number of code points in the output array. */
|
||||
|
||||
/* Decode a generalized variable-length integer into delta, */
|
||||
/* which gets added to i. The overflow checking is easier */
|
||||
/* if we increase i as we go, then subtract off its starting */
|
||||
/* value at the end to obtain delta. */
|
||||
|
||||
for (oldi = i, w = 1, k = base; ; k += base) {
|
||||
if (in >= input_length) return punycode_bad_input;
|
||||
digit = decode_digit(input[in++]);
|
||||
if (digit >= base) return punycode_bad_input;
|
||||
if (digit > (maxint - i) / w) return punycode_overflow;
|
||||
i += digit * w;
|
||||
t = (k <= bias) /* + tmin */ ? tmin : /* +tmin not needed */
|
||||
(k >= (bias + tmax)) ? tmax : k - bias;
|
||||
if (digit < t) break;
|
||||
if (w > maxint / (base - t)) return punycode_overflow;
|
||||
w *= (base - t);
|
||||
}
|
||||
|
||||
bias = adapt(i - oldi, out + 1, (oldi == 0));
|
||||
|
||||
/* i was supposed to wrap around from out+1 to 0, */
|
||||
/* incrementing n each time, so we'll fix that now: */
|
||||
|
||||
if (i / (out + 1) > maxint - n) return punycode_overflow;
|
||||
n += i / (out + 1);
|
||||
i %= (out + 1);
|
||||
|
||||
/* Insert n at position i of the output: */
|
||||
|
||||
/* not needed for Punycode: */
|
||||
/* if (decode_digit(n) <= base) return punycode_invalid_input; */
|
||||
if (out >= max_out) return punycode_big_output;
|
||||
|
||||
if (case_flags != null) {
|
||||
System.arraycopy(case_flags, i, case_flags, i + 1, out - i);
|
||||
/* Case of last character determines uppercase flag: */
|
||||
case_flags[i] = (char)(flagged(input[in - 1]) ? 0 :1);
|
||||
}
|
||||
|
||||
System.arraycopy(output, i, output, i + 1, (out - i));
|
||||
output[i++] = n;
|
||||
}
|
||||
|
||||
output_length[0] = out;
|
||||
return punycode_success;
|
||||
}
|
||||
|
||||
}
|
42
icu4j/src/com/ibm/icu/dev/test/stringprep/TestAll.java
Normal file
42
icu4j/src/com/ibm/icu/dev/test/stringprep/TestAll.java
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestAll.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk.TestGroup;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class TestAll extends TestGroup {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestAll().run(args);
|
||||
}
|
||||
|
||||
public TestAll() {
|
||||
super(
|
||||
new String[] {
|
||||
"TestIDNA",
|
||||
"TestStringPrep",
|
||||
"TestIDNARef"
|
||||
},
|
||||
"StringPrep and IDNA test");
|
||||
}
|
||||
|
||||
public static final String CLASS_TARGET_NAME = "StringPrep";
|
||||
|
||||
|
||||
}
|
631
icu4j/src/com/ibm/icu/dev/test/stringprep/TestData.java
Normal file
631
icu4j/src/com/ibm/icu/dev/test/stringprep/TestData.java
Normal file
@ -0,0 +1,631 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestData.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import com.ibm.icu.stringprep.IDNA;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class TestData {
|
||||
public static final char[][] unicodeIn ={
|
||||
{
|
||||
0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
|
||||
0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F
|
||||
},
|
||||
{
|
||||
0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587,
|
||||
|
||||
},
|
||||
{
|
||||
0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
|
||||
0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
|
||||
0x0065, 0x0073, 0x006B, 0x0079,
|
||||
},
|
||||
{
|
||||
0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
|
||||
0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
|
||||
0x05D1, 0x05E8, 0x05D9, 0x05EA,
|
||||
},
|
||||
{
|
||||
0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
|
||||
0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
|
||||
0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
|
||||
0x0939, 0x0948, 0x0902,
|
||||
},
|
||||
{
|
||||
0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
|
||||
0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B,
|
||||
|
||||
},
|
||||
/*
|
||||
{
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C,
|
||||
},
|
||||
*/
|
||||
{
|
||||
0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
|
||||
0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
|
||||
0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
|
||||
0x0438,
|
||||
},
|
||||
{
|
||||
0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
|
||||
0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
|
||||
0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
|
||||
0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
|
||||
0x0061, 0x00F1, 0x006F, 0x006C,
|
||||
},
|
||||
{
|
||||
0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587,
|
||||
|
||||
},
|
||||
{
|
||||
0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
|
||||
0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
|
||||
0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
|
||||
0x0056, 0x0069, 0x1EC7, 0x0074,
|
||||
},
|
||||
{
|
||||
0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F,
|
||||
},
|
||||
{
|
||||
0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
|
||||
0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
|
||||
0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053,
|
||||
},
|
||||
{
|
||||
0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
|
||||
0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
|
||||
0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240,
|
||||
},
|
||||
{
|
||||
0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032,
|
||||
},
|
||||
{
|
||||
0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
|
||||
0x308B, 0x0035, 0x79D2, 0x524D,
|
||||
},
|
||||
{
|
||||
0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0,
|
||||
|
||||
},
|
||||
{
|
||||
0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067,
|
||||
},
|
||||
// test non-BMP code points
|
||||
{
|
||||
0xD800, 0xDF00, 0xD800, 0xDF01, 0xD800, 0xDF02, 0xD800, 0xDF03, 0xD800, 0xDF05,
|
||||
0xD800, 0xDF06, 0xD800, 0xDF07, 0xD800, 0xDF09, 0xD800, 0xDF0A, 0xD800, 0xDF0B,
|
||||
|
||||
},
|
||||
{
|
||||
0xD800, 0xDF0D, 0xD800, 0xDF0C, 0xD800, 0xDF1E, 0xD800, 0xDF0F, 0xD800, 0xDF16,
|
||||
0xD800, 0xDF15, 0xD800, 0xDF14, 0xD800, 0xDF12, 0xD800, 0xDF10, 0xD800, 0xDF20,
|
||||
0xD800, 0xDF21,
|
||||
|
||||
},
|
||||
// Greek
|
||||
{
|
||||
0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac
|
||||
},
|
||||
// Maltese
|
||||
{
|
||||
0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
|
||||
0x0127, 0x0061
|
||||
},
|
||||
// Russian
|
||||
{
|
||||
0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
|
||||
0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
|
||||
0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
|
||||
0x0441, 0x0441, 0x043a, 0x0438
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
public static final String[] asciiIn = {
|
||||
"xn--egbpdaj6bu4bxfgehfvwxn",
|
||||
"xn--ihqwcrb4cv8a8dqg056pqjye",
|
||||
"xn--Proprostnemluvesky-uyb24dma41a",
|
||||
"xn--4dbcagdahymbxekheh6e0a7fei0b",
|
||||
"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
|
||||
"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
|
||||
/* "xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",*/
|
||||
"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l",
|
||||
"xn--PorqunopuedensimplementehablarenEspaol-fmd56a",
|
||||
"xn--ihqwctvzc91f659drss3x8bo0yb",
|
||||
"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
|
||||
"xn--3B-ww4c5e180e575a65lsy2b",
|
||||
"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
|
||||
"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b",
|
||||
"xn--2-u9tlzr9756bt3uc0v",
|
||||
"xn--MajiKoi5-783gue6qz075azm5e",
|
||||
"xn--de-jg4avhby1noc0d",
|
||||
"xn--d9juau41awczczp",
|
||||
"XN--097CCDEKGHQJK",
|
||||
"XN--db8CBHEJLGH4E0AL",
|
||||
"xn--hxargifdar", // Greek
|
||||
"xn--bonusaa-5bb1da", // Maltese
|
||||
"xn--b1abfaaepdrnnbgefbadotcwatmq2g4l", // Russian (Cyrillic)
|
||||
};
|
||||
|
||||
public static final String[] domainNames = {
|
||||
"slip129-37-118-146.nc.us.ibm.net",
|
||||
"saratoga.pe.utexas.edu",
|
||||
"dial-120-45.ots.utexas.edu",
|
||||
"woo-085.dorms.waller.net",
|
||||
"hd30-049.hil.compuserve.com",
|
||||
"pem203-31.pe.ttu.edu",
|
||||
"56K-227.MaxTNT3.pdq.net",
|
||||
"dial-36-2.ots.utexas.edu",
|
||||
"slip129-37-23-152.ga.us.ibm.net",
|
||||
"ts45ip119.cadvision.com",
|
||||
"sdn-ts-004txaustP05.dialsprint.net",
|
||||
"bar-tnt1s66.erols.com",
|
||||
"101.st-louis-15.mo.dial-access.att.net",
|
||||
"h92-245.Arco.COM",
|
||||
"dial-13-2.ots.utexas.edu",
|
||||
"net-redynet29.datamarkets.com.ar",
|
||||
"ccs-shiva28.reacciun.net.ve",
|
||||
"7.houston-11.tx.dial-access.att.net",
|
||||
"ingw129-37-120-26.mo.us.ibm.net",
|
||||
"dialup6.austintx.com",
|
||||
"dns2.tpao.gov.tr",
|
||||
"slip129-37-119-194.nc.us.ibm.net",
|
||||
"cs7.dillons.co.uk.203.119.193.in-addr.arpa",
|
||||
"swprd1.innovplace.saskatoon.sk.ca",
|
||||
"bikini.bologna.maraut.it",
|
||||
"node91.subnet159-198-79.baxter.com",
|
||||
"cust19.max5.new-york.ny.ms.uu.net",
|
||||
"balexander.slip.andrew.cmu.edu",
|
||||
"pool029.max2.denver.co.dynip.alter.net",
|
||||
"cust49.max9.new-york.ny.ms.uu.net",
|
||||
"s61.abq-dialin2.hollyberry.com",
|
||||
|
||||
};
|
||||
|
||||
public static final String[] domainNames1Uni = {
|
||||
"http://\u0917\u0928\u0947\u0936.sanjose.ibm.com",
|
||||
"www.\u0121.com",
|
||||
"www.\u00E0\u00B3\u00AF.com",
|
||||
"www.\u00C2\u00A4.com",
|
||||
"www.\u00C2\u00A3.com",
|
||||
"\u0025",
|
||||
"\u005C\u005C",
|
||||
"@",
|
||||
"\u002F",
|
||||
"www.\u0021.com",
|
||||
"www.\u0024.com",
|
||||
"\u003f",
|
||||
// These yeild U_IDNA_PROHIBITED_ERROR
|
||||
//"\\u00CF\\u0082.com",
|
||||
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
|
||||
//"\\u00E2\\u0098\\u00BA.com",
|
||||
"\u00C3\u00BC.com"
|
||||
};
|
||||
public static final String[] domainNamesToASCIIOut = {
|
||||
"xn--http://-3mo7iufsh.sanjose.ibm.com",
|
||||
"www.xn--vea.com",
|
||||
"www.xn--3 -iia80t.com",
|
||||
"www.xn--bba7j.com",
|
||||
"www.xn--9a9j.com",
|
||||
"\u0025",
|
||||
"\u005C\u005C",
|
||||
"@",
|
||||
"\u002F",
|
||||
"www.\u0021.com",
|
||||
"www.\u0024.com",
|
||||
"\u003f",
|
||||
"xn--14-ria7423a.com"
|
||||
|
||||
};
|
||||
|
||||
public static final String[] domainNamesToUnicodeOut = {
|
||||
"http://\u0917\u0928\u0947\u0936.sanjose.ibm.com",
|
||||
"www.\u0121.com",
|
||||
"www.\u00E0\u0033\u0020\u0304.com",
|
||||
"www.\u00E2\u00A4.com",
|
||||
"www.\u00E2\u00A3.com",
|
||||
"\u0025",
|
||||
"\u005C\u005C",
|
||||
"@",
|
||||
"\u002F",
|
||||
"www.\u0021.com",
|
||||
"www.\u0024.com",
|
||||
"\u003f",
|
||||
"\u00E3\u0031\u2044\u0034.com"
|
||||
|
||||
};
|
||||
|
||||
|
||||
public static class ErrorCase{
|
||||
|
||||
public char[] unicode;
|
||||
public String ascii;
|
||||
public Exception expected;
|
||||
public boolean useSTD3ASCIIRules;
|
||||
public boolean testToUnicode;
|
||||
public boolean testLabel;
|
||||
ErrorCase(char[] uniIn, String asciiIn, Exception ex,
|
||||
boolean std3, boolean testToUni, boolean testlabel){
|
||||
unicode = uniIn;
|
||||
ascii = asciiIn;
|
||||
expected = ex;
|
||||
useSTD3ASCIIRules = std3;
|
||||
testToUnicode = testToUni;
|
||||
testLabel = testlabel;
|
||||
|
||||
}
|
||||
};
|
||||
public static final ErrorCase[] errorCases = {
|
||||
|
||||
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x070F,/*prohibited*/
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
},
|
||||
"www.XN--8mb5595fsoa28orucya378bqre2tcwop06c5qbw82a1rffmae0361dea96b.com",
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR),
|
||||
false, true, true),
|
||||
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x0221, 0x0234/*Unassigned code points*/,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
},
|
||||
"www.XN--6lA2Bz548Fj1GuA391Bf1Gb1N59Ab29A7iA.com",
|
||||
|
||||
new ParseException("",ParseException.UNASSIGNED_ERROR),
|
||||
false, true, true
|
||||
),
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x0644, 0x064A, 0x0647,/*Arabic code points. Cannot mix RTL with LTR*/
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
},
|
||||
"www.xn--ghBGI4851OiyA33VqrD6Az86C4qF83CtRv93D5xBk15AzfG0nAgA0578DeA71C.com",
|
||||
new ParseException("",ParseException.CHECK_BIDI_ERROR),
|
||||
false, true, true
|
||||
),
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
/* labels cannot begin with an HYPHEN */
|
||||
0x002D, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x002E,
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
|
||||
},
|
||||
"www.xn----b95Ew8SqA315Ao5FbuMlnNmhA.com",
|
||||
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
|
||||
true, true, false
|
||||
),
|
||||
new ErrorCase( new char[]{
|
||||
/* correct ACE-prefix followed by unicode */
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0x0078, 0x006e, 0x002d,0x002d, /* ACE Prefix */
|
||||
0x002D, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x002D,
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
|
||||
},
|
||||
/* wrong ACE-prefix followed by valid ACE-encoded ASCII */
|
||||
"www.XY-----b91I0V65S96C2A355Cw1E5yCeQr19CsnP1mFfmAE0361DeA96B.com",
|
||||
new ParseException("",ParseException.ACE_PREFIX_ERROR),
|
||||
false, false, false
|
||||
),
|
||||
/* cannot verify U_IDNA_VERIFICATION_ERROR */
|
||||
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
},
|
||||
"www.xn--989AoMsVi5E83Db1D2A355Cv1E0vAk1DwRv93D5xBh15A0Dt30A5JpSD879Ccm6FeA98C.com",
|
||||
new ParseException("",ParseException.LABEL_TOO_LONG_ERROR),
|
||||
false, true, true
|
||||
),
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0x0030, 0x0644, 0x064A, 0x0647, 0x0031, /* Arabic code points squashed between EN codepoints */
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
|
||||
},
|
||||
"www.xn--01-tvdmo.com",
|
||||
new ParseException("",ParseException.CHECK_BIDI_ERROR),
|
||||
false, true, true
|
||||
),
|
||||
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, // www.
|
||||
0x206C, 0x0644, 0x064A, 0x0647, 0x206D, // Arabic code points squashed between BN codepoints
|
||||
0x002e, 0x0063, 0x006f, 0x006d, // com.
|
||||
|
||||
},
|
||||
"www.XN--ghbgi278xia.com",
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR),
|
||||
false, true, true
|
||||
),
|
||||
new ErrorCase( new char[] {
|
||||
0x0077, 0x0077, 0x0077, 0x002e, // www.
|
||||
0x002D, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, // HYPHEN at the start of label
|
||||
0x002e, 0x0063, 0x006f, 0x006d, // com.
|
||||
|
||||
},
|
||||
"www.-abcde.com",
|
||||
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
|
||||
true, true, false
|
||||
),
|
||||
new ErrorCase( new char[] {
|
||||
0x0077, 0x0077, 0x0077, 0x002e, // www.
|
||||
0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x002D, // HYPHEN at the end of the label
|
||||
0x002e, 0x0063, 0x006f, 0x006d, // com.
|
||||
|
||||
},
|
||||
"www.abcde-.com",
|
||||
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
|
||||
true, true, false
|
||||
),
|
||||
new ErrorCase( new char[]{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, // www.
|
||||
0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x0040, // Containing non LDH code point
|
||||
0x002e, 0x0063, 0x006f, 0x006d, // com.
|
||||
|
||||
},
|
||||
"www.abcde@.com",
|
||||
new ParseException("",ParseException.STD3_ASCII_RULES_ERROR),
|
||||
true, true, false
|
||||
),
|
||||
|
||||
};
|
||||
|
||||
|
||||
public static final class ConformanceTestCase{
|
||||
String comment;
|
||||
String input;
|
||||
String output;
|
||||
String profile;
|
||||
int flags;
|
||||
Exception expected;
|
||||
private static byte[] getBytes(String in){
|
||||
if(in==null){
|
||||
return null;
|
||||
}
|
||||
byte[] bytes = new byte[in.length()];
|
||||
for(int i=0; i < in.length();i++){
|
||||
bytes[i] = (byte)in.charAt(i);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
ConformanceTestCase(String comt, String in, String out,
|
||||
String prof, int flg, Exception ex)
|
||||
{
|
||||
|
||||
try{
|
||||
comment = comt;
|
||||
byte[] bytes = getBytes(in);
|
||||
input = new String(bytes,"UTF-8");
|
||||
bytes = getBytes(out);
|
||||
output = (bytes==null)? null : new String(bytes,"UTF-8");
|
||||
profile = prof;
|
||||
flags = flg;
|
||||
expected = ex;
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static final ConformanceTestCase[] conformanceTestCases =
|
||||
{
|
||||
|
||||
new ConformanceTestCase(
|
||||
"Case folding ASCII U+0043 U+0041 U+0046 U+0045",
|
||||
"\u0043\u0041\u0046\u0045", "\u0063\u0061\u0066\u0065",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
null
|
||||
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Case folding 8bit U+00DF (german sharp s)",
|
||||
"\u00C3\u009F", "\u0073\u0073",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
null
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-ASCII multibyte space character U+1680",
|
||||
"\u00E1\u009A\u0080", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-ASCII 8bit control character U+0085",
|
||||
"\u00C2\u0085", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-ASCII multibyte control character U+180E",
|
||||
"\u00E1\u00A0\u008E", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-ASCII control character U+1D175",
|
||||
"\u00F0\u009D\u0085\u00B5", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Plane 0 private use character U+F123",
|
||||
"\u00EF\u0084\u00A3", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Plane 15 private use character U+F1234",
|
||||
"\u00F3\u00B1\u0088\u00B4", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Plane 16 private use character U+10F234",
|
||||
"\u00F4\u008F\u0088\u00B4", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-character code point U+8FFFE",
|
||||
"\u00F2\u008F\u00BF\u00BE", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Non-character code point U+10FFFF",
|
||||
"\u00F4\u008F\u00BF\u00BF", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
/*
|
||||
{
|
||||
"Surrogate code U+DF42",
|
||||
"\u00ED\u00BD\u0082", null, "Nameprep", InternationalizedDomainNames.DEFAULT,
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
*/
|
||||
new ConformanceTestCase(
|
||||
"Non-plain text character U+FFFD",
|
||||
"\u00EF\u00BF\u00BD", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Ideographic description character U+2FF5",
|
||||
"\u00E2\u00BF\u00B5", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Display property character U+0341",
|
||||
"\u00CD\u0081", "\u00CC\u0081",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
null
|
||||
|
||||
),
|
||||
|
||||
new ConformanceTestCase(
|
||||
"Left-to-right mark U+200E",
|
||||
"\u00E2\u0080\u008E", "\u00CC\u0081",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
|
||||
"Deprecated U+202A",
|
||||
"\u00E2\u0080\u00AA", "\u00CC\u0081",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Language tagging character U+E0001",
|
||||
"\u00F3\u00A0\u0080\u0081", "\u00CC\u0081",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Language tagging character U+E0042",
|
||||
"\u00F3\u00A0\u0081\u0082", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.PROHIBITED_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Bidi: RandALCat character U+05BE and LCat characters",
|
||||
"\u0066\u006F\u006F\u00D6\u00BE\u0062\u0061\u0072", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.CHECK_BIDI_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Bidi: RandALCat character U+FD50 and LCat characters",
|
||||
"\u0066\u006F\u006F\u00EF\u00B5\u0090\u0062\u0061\u0072", null,
|
||||
"Nameprep",IDNA.DEFAULT ,
|
||||
new ParseException("",ParseException.CHECK_BIDI_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Bidi: RandALCat character U+FB38 and LCat characters",
|
||||
"\u0066\u006F\u006F\u00EF\u00B9\u00B6\u0062\u0061\u0072", "\u0066\u006F\u006F \u00d9\u008e\u0062\u0061\u0072",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
null
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
|
||||
"\u00D8\u00A7\u0031", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.CHECK_BIDI_ERROR)
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Bidi: RandALCat character U+0627 U+0031 U+0628",
|
||||
"\u00D8\u00A7\u0031\u00D8\u00A8", "\u00D8\u00A7\u0031\u00D8\u00A8",
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
null
|
||||
),
|
||||
new ConformanceTestCase(
|
||||
"Unassigned code point U+E0002",
|
||||
"\u00F3\u00A0\u0080\u0082", null,
|
||||
"Nameprep", IDNA.DEFAULT,
|
||||
new ParseException("",ParseException.UNASSIGNED_ERROR)
|
||||
),
|
||||
|
||||
/* // Invalid UTF-8
|
||||
{
|
||||
"Larger test (shrinking)",
|
||||
"X\u00C2\u00AD\u00C3\u00DF\u00C4\u00B0\u00E2\u0084\u00A1\u006a\u00cc\u008c\u00c2\u00a0\u00c2"
|
||||
"\u00aa\u00ce\u00b0\u00e2\u0080\u0080", "xssi\u00cc\u0087""tel\u00c7\u00b0 a\u00ce\u00b0 ",
|
||||
"Nameprep",
|
||||
InternationalizedDomainNames.DEFAULT, U_ZERO_ERROR
|
||||
},
|
||||
{
|
||||
|
||||
"Larger test (expanding)",
|
||||
"X\u00C3\u00DF\u00e3\u008c\u0096\u00C4\u00B0\u00E2\u0084\u00A1\u00E2\u0092\u009F\u00E3\u008c\u0080",
|
||||
"xss\u00e3\u0082\u00ad\u00e3\u0083\u00ad\u00e3\u0083\u00a1\u00e3\u0083\u00bc\u00e3\u0083\u0088"
|
||||
"\u00e3\u0083\u00ab""i\u00cc\u0087""tel\u0028""d\u0029\u00e3\u0082\u00a2\u00e3\u0083\u0091"
|
||||
"\u00e3\u0083\u00bc\u00e3\u0083\u0088"
|
||||
"Nameprep",
|
||||
InternationalizedDomainNames.DEFAULT, U_ZERO_ERROR
|
||||
},
|
||||
*/
|
||||
};
|
||||
}
|
700
icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java
Normal file
700
icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java
Normal file
@ -0,0 +1,700 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java,v $
|
||||
* $Date: 2003/08/21 23:42:21 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Random;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.stringprep.IDNA;
|
||||
import com.ibm.icu.stringprep.StringPrep;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.LocaleUtility;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*/
|
||||
public class TestIDNA extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestIDNA().run(args);
|
||||
}
|
||||
private ParseException unassignedException = new ParseException("",ParseException.UNASSIGNED_ERROR);
|
||||
public void TestToUnicode() throws Exception{
|
||||
for(int i=0; i<TestData.asciiIn.length; i++){
|
||||
// test StringBuffer toUnicode
|
||||
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.DEFAULT, null);
|
||||
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.ALLOW_UNASSIGNED, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES|IDNA.ALLOW_UNASSIGNED, null);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void TestToASCII() throws Exception{
|
||||
for(int i=0; i<TestData.asciiIn.length; i++){
|
||||
// test StringBuffer toUnicode
|
||||
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNA.DEFAULT, null);
|
||||
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNA.ALLOW_UNASSIGNED, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNA.USE_STD3_RULES|IDNA.ALLOW_UNASSIGNED, null);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void TestIDNToASCII() throws Exception{
|
||||
for(int i=0; i<TestData.domainNames.length; i++){
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.DEFAULT, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.USE_STD3_RULES, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED|IDNA.USE_STD3_RULES, null);
|
||||
}
|
||||
|
||||
for(int i=0; i<TestData.domainNames1Uni.length; i++){
|
||||
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNA.DEFAULT, null);
|
||||
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNA.ALLOW_UNASSIGNED, null);
|
||||
}
|
||||
}
|
||||
public void TestIDNToUnicode() throws Exception{
|
||||
for(int i=0; i<TestData.domainNames.length; i++){
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.DEFAULT, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.USE_STD3_RULES, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNA.ALLOW_UNASSIGNED|IDNA.USE_STD3_RULES, null);
|
||||
}
|
||||
for(int i=0; i<TestData.domainNamesToASCIIOut.length; i++){
|
||||
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNA.DEFAULT, null);
|
||||
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNA.ALLOW_UNASSIGNED, null);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestToUnicode(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNA.convertToUnicode(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNA.convertToUnicode(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNA.convertToUnicode(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("Did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestIDNToUnicode(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNA.convertIDNToUnicode(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
StringBuffer out = IDNA.convertIDNToUnicode(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNA.convertIDNToUnicode(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("Did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestToASCII(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNA.convertToASCII(src,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +"\n Got: "+ ex.toString() +"\n Expected: " +ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNA.convertToASCII(inBuf,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNA.convertToASCII(inIter,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+ out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestIDNToASCII(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNA.convertIDNToASCII(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToIDNASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToIDNASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToIDNASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
StringBuffer out = IDNA.convertIDNtoASCII(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToIDNASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToIDNASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToIDNASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNA.convertIDNtoASCII(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertIDNToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+ out);
|
||||
}
|
||||
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertIDNToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertIDNToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
public void TestConformance()throws Exception{
|
||||
for(int i=0; i<TestData.conformanceTestCases.length;i++){
|
||||
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
if(testCase.expected != null){
|
||||
//Test toASCII
|
||||
doTestToASCII(testCase.input,testCase.output,IDNA.DEFAULT,testCase.expected);
|
||||
doTestToASCII(testCase.input,testCase.output,IDNA.ALLOW_UNASSIGNED,testCase.expected);
|
||||
}
|
||||
//Test toUnicode
|
||||
//doTestToUnicode(testCase.input,testCase.output,IDNA.DEFAULT,testCase.expected);
|
||||
}
|
||||
}
|
||||
public void TestNamePrepConformance() throws Exception{
|
||||
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
|
||||
StringPrep namePrep = StringPrep.getInstance(stream);
|
||||
for(int i=0; i<TestData.conformanceTestCases.length;i++){
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
|
||||
try{
|
||||
StringBuffer output = namePrep.prepare(iter,StringPrep.NONE);
|
||||
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
|
||||
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
|
||||
" Got: "+ prettify(output) );
|
||||
}
|
||||
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(testCase.expected == null || !ex.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
iter.setToStart();
|
||||
StringBuffer output = namePrep.prepare(iter,StringPrep.ALLOW_UNASSIGNED);
|
||||
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
|
||||
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
|
||||
" Got: "+ prettify(output) );
|
||||
}
|
||||
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(testCase.expected == null || !ex.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
public void TestErrorCases() throws Exception{
|
||||
for(int i=0; i < TestData.errorCases.length; i++){
|
||||
TestData.ErrorCase errCase = TestData.errorCases[i];
|
||||
if(errCase.testLabel==true){
|
||||
// Test ToASCII
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.DEFAULT,errCase.expected);
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.ALLOW_UNASSIGNED,errCase.expected);
|
||||
if(errCase.useSTD3ASCIIRules){
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNA.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
}
|
||||
if(errCase.useSTD3ASCIIRules!=true){
|
||||
|
||||
// Test IDNToASCII
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.DEFAULT,errCase.expected);
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.ALLOW_UNASSIGNED,errCase.expected);
|
||||
|
||||
}else{
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNA.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
|
||||
//TestToUnicode
|
||||
if(errCase.testToUnicode==true){
|
||||
if(errCase.useSTD3ASCIIRules!=true){
|
||||
// Test IDNToUnicode
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.DEFAULT,errCase.expected);
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.ALLOW_UNASSIGNED,errCase.expected);
|
||||
|
||||
}else{
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNA.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestCompare(String s1, String s2, boolean isEqual){
|
||||
try{
|
||||
int retVal = IDNA.compare(s1,s2,IDNA.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNA.compare(new StringBuffer(s1), new StringBuffer(s2), IDNA.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNA.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNA.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
}catch(Exception e){
|
||||
e.printStackTrace();
|
||||
errln("Unexpected exception thrown by IDNA.compare");
|
||||
}
|
||||
|
||||
try{
|
||||
int retVal = IDNA.compare(s1,s2,IDNA.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNA.compare(new StringBuffer(s1), new StringBuffer(s2), IDNA.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNA.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNA.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
}catch(Exception e){
|
||||
errln("Unexpected exception thrown by IDNA.compare");
|
||||
}
|
||||
}
|
||||
public void TestCompare() throws Exception{
|
||||
String www = "www.";
|
||||
String com = ".com";
|
||||
StringBuffer source = new StringBuffer(www);
|
||||
StringBuffer uni0 = new StringBuffer(www);
|
||||
StringBuffer uni1 = new StringBuffer(www);
|
||||
StringBuffer ascii0 = new StringBuffer(www);
|
||||
StringBuffer ascii1 = new StringBuffer(www);
|
||||
|
||||
uni0.append(TestData.unicodeIn[0]);
|
||||
uni0.append(com);
|
||||
|
||||
uni1.append(TestData.unicodeIn[1]);
|
||||
uni1.append(com);
|
||||
|
||||
ascii0.append(TestData.asciiIn[0]);
|
||||
ascii0.append(com);
|
||||
|
||||
ascii1.append(TestData.asciiIn[1]);
|
||||
ascii1.append(com);
|
||||
|
||||
for(int i=0;i< TestData.unicodeIn.length; i++){
|
||||
|
||||
// for every entry in unicodeIn array
|
||||
// prepend www. and append .com
|
||||
source.setLength(4);
|
||||
source.append(TestData.unicodeIn[i]);
|
||||
source.append(com);
|
||||
|
||||
// a) compare it with itself
|
||||
doTestCompare(source.toString(),source.toString(),true);
|
||||
|
||||
// b) compare it with asciiIn equivalent
|
||||
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
|
||||
|
||||
// c) compare it with unicodeIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(), uni1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),uni0.toString(), false);
|
||||
}
|
||||
// d) compare it with asciiIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(),ascii1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),ascii0.toString(), false);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// test and ascertain
|
||||
// func(func(func(src))) == func(src)
|
||||
public void doTestChainingToASCII(String source)throws Exception{
|
||||
StringBuffer expected;
|
||||
StringBuffer chained;
|
||||
|
||||
// test convertIDNToASCII
|
||||
expected = IDNA.convertIDNToASCII(source,IDNA.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNA.convertIDNtoASCII(chained,IDNA.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertIDNToASCII");
|
||||
}
|
||||
// test convertIDNToA
|
||||
expected = IDNA.convertToASCII(source,IDNA.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNA.convertToASCII(chained,IDNA.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertToASCII");
|
||||
}
|
||||
}
|
||||
// test and ascertain
|
||||
// func(func(func(src))) == func(src)
|
||||
public void doTestChainingToUnicode(String source)throws Exception{
|
||||
StringBuffer expected;
|
||||
StringBuffer chained;
|
||||
|
||||
// test convertIDNToUnicode
|
||||
expected = IDNA.convertIDNToUnicode(source,IDNA.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNA.convertIDNToUnicode(chained,IDNA.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertIDNToUnicode");
|
||||
}
|
||||
// test convertIDNToA
|
||||
expected = IDNA.convertToUnicode(source,IDNA.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNA.convertToUnicode(chained,IDNA.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertToUnicode");
|
||||
}
|
||||
}
|
||||
public void TestChaining() throws Exception{
|
||||
for(int i=0; i< TestData.asciiIn.length; i++){
|
||||
doTestChainingToUnicode(TestData.asciiIn[i]);
|
||||
}
|
||||
for(int i=0; i< TestData.unicodeIn.length; i++){
|
||||
doTestChainingToASCII(new String(TestData.unicodeIn[i]));
|
||||
}
|
||||
}
|
||||
public void TestRootLabelSeparator() throws Exception{
|
||||
String www = "www.";
|
||||
String com = ".com."; /*root label separator*/
|
||||
StringBuffer source = new StringBuffer(www);
|
||||
StringBuffer uni0 = new StringBuffer(www);
|
||||
StringBuffer uni1 = new StringBuffer(www);
|
||||
StringBuffer ascii0 = new StringBuffer(www);
|
||||
StringBuffer ascii1 = new StringBuffer(www);
|
||||
|
||||
uni0.append(TestData.unicodeIn[0]);
|
||||
uni0.append(com);
|
||||
|
||||
uni1.append(TestData.unicodeIn[1]);
|
||||
uni1.append(com);
|
||||
|
||||
ascii0.append(TestData.asciiIn[0]);
|
||||
ascii0.append(com);
|
||||
|
||||
ascii1.append(TestData.asciiIn[1]);
|
||||
ascii1.append(com);
|
||||
|
||||
for(int i=0;i< TestData.unicodeIn.length; i++){
|
||||
|
||||
// for every entry in unicodeIn array
|
||||
// prepend www. and append .com
|
||||
source.setLength(4);
|
||||
source.append(TestData.unicodeIn[i]);
|
||||
source.append(com);
|
||||
|
||||
// a) compare it with itself
|
||||
doTestCompare(source.toString(),source.toString(),true);
|
||||
|
||||
// b) compare it with asciiIn equivalent
|
||||
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
|
||||
|
||||
// c) compare it with unicodeIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(), uni1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),uni0.toString(), false);
|
||||
}
|
||||
// d) compare it with asciiIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(),ascii1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),ascii0.toString(), false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final int loopCount = 100;
|
||||
private static final int maxCharCount = 15;
|
||||
private static final int maxCodePoint = 0x10ffff;
|
||||
private Random random = null;
|
||||
|
||||
/**
|
||||
* Return a random integer i where 0 <= i < n.
|
||||
* A special function that gets random codepoints from planes 0,1,2 and 14
|
||||
*/
|
||||
private int rand_uni()
|
||||
{
|
||||
int retVal = (int)(random.nextLong()& 0x3FFFF);
|
||||
if(retVal >= 0x30000){
|
||||
retVal+=0xB0000;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
private int randi(int n){
|
||||
return (int) (random.nextInt(0x7fff) % (n+1));
|
||||
}
|
||||
|
||||
private StringBuffer getTestSource(StringBuffer fillIn) {
|
||||
// use uniform seed value from the framework
|
||||
if(random==null){
|
||||
random = createRandom();
|
||||
}
|
||||
int i = 0;
|
||||
int charCount = (randi(maxCharCount) + 1);
|
||||
while (i <charCount ) {
|
||||
int codepoint = rand_uni();
|
||||
if(codepoint == 0x0000){
|
||||
continue;
|
||||
}
|
||||
UTF16.append(fillIn, (int)codepoint);
|
||||
i++;
|
||||
}
|
||||
return fillIn;
|
||||
|
||||
}
|
||||
public void MonkeyTest() throws Exception{
|
||||
StringBuffer source = new StringBuffer();
|
||||
/* do the monkey test */
|
||||
for(int i=0; i<loopCount; i++){
|
||||
source.setLength(0);
|
||||
getTestSource(source);
|
||||
doTestCompareReferenceImpl(source);
|
||||
}
|
||||
|
||||
// test string with embedded null
|
||||
source.append( "\\u0000\\u2109\\u3E1B\\U000E65CA\\U0001CAC5" );
|
||||
|
||||
source = new StringBuffer(Utility.unescape(source.toString()));
|
||||
doTestCompareReferenceImpl(source);
|
||||
|
||||
//StringBuffer src = new StringBuffer(Utility.unescape("\\uDEE8\\U000E228C\\U0002EE8E\\U000E6350\\U00024DD9\u4049\\U000E0DE4\\U000E448C\\U0001869B\\U000E3380\\U00016A8E\\U000172D5\\U0001C408\\U000E9FB5"));
|
||||
//doTestCompareReferenceImpl(src);
|
||||
}
|
||||
private void doTestCompareReferenceImpl(StringBuffer src) throws Exception{
|
||||
|
||||
StringBuffer label = src;
|
||||
|
||||
ParseException expected = null;
|
||||
StringBuffer ascii = null;
|
||||
int options = IDNA.DEFAULT;
|
||||
logln("Comparing idnaref_toASCII with uidna_toASCII for input: " + prettify(label));
|
||||
try{
|
||||
ascii = IDNAReference.convertToASCII(label, options);
|
||||
}catch( ParseException e){
|
||||
expected = e;
|
||||
if(e.equals(unassignedException)){
|
||||
options = IDNA.ALLOW_UNASSIGNED;
|
||||
expected = null;
|
||||
try{
|
||||
ascii = IDNAReference.convertToASCII(label, options);
|
||||
}catch( ParseException ex){
|
||||
expected = ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doTestToASCII(label.toString(),
|
||||
(ascii == null) ? null : ascii.toString(),
|
||||
options,
|
||||
expected);
|
||||
|
||||
logln("Comparing idnaref_toUnicode with uidna_toUnicode for input: " + prettify(label));
|
||||
StringBuffer uni =null;
|
||||
|
||||
if(expected == null){
|
||||
options = IDNA.DEFAULT;
|
||||
try{
|
||||
uni = IDNAReference.convertToUnicode(ascii, options);
|
||||
}catch( ParseException e ){
|
||||
expected = e;
|
||||
if(expected.equals(unassignedException)){
|
||||
options = IDNA.ALLOW_UNASSIGNED;
|
||||
expected = null;
|
||||
try{
|
||||
uni = IDNAReference.convertToUnicode(ascii, options);
|
||||
}catch(ParseException ex){
|
||||
expected = ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
doTestToUnicode(ascii.toString(),
|
||||
(uni==null)? null : uni.toString(),
|
||||
options,
|
||||
expected);
|
||||
}
|
||||
|
||||
}
|
||||
public void TestCompareRefImpl() throws Exception{
|
||||
|
||||
StringBuffer src = new StringBuffer();
|
||||
|
||||
for(int i = 0x40000 ; i< 0x10ffff; i++){
|
||||
src.setLength(0);
|
||||
if(isQuick()==true && i> 0x1FFFF){
|
||||
return;
|
||||
}
|
||||
if(i >= 0x30000){
|
||||
i+=0xB0000;
|
||||
}
|
||||
UTF16.append(src,i);
|
||||
doTestCompareReferenceImpl(src);
|
||||
}
|
||||
}
|
||||
}
|
565
icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNARef.java
Normal file
565
icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNARef.java
Normal file
@ -0,0 +1,565 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNARef.java,v $
|
||||
* $Date: 2003/08/21 23:42:27 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.stringprep.ParseException;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class TestIDNARef extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestIDNARef().run(args);
|
||||
}
|
||||
private ParseException unassignedException = new ParseException("",ParseException.UNASSIGNED_ERROR);
|
||||
public void TestToUnicode() throws Exception{
|
||||
for(int i=0; i<TestData.asciiIn.length; i++){
|
||||
// test StringBuffer toUnicode
|
||||
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.DEFAULT, null);
|
||||
doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES|IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void TestToASCII() throws Exception{
|
||||
for(int i=0; i<TestData.asciiIn.length; i++){
|
||||
// test StringBuffer toUnicode
|
||||
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNAReference.DEFAULT, null);
|
||||
doTestToASCII(new String(TestData.unicodeIn[i]),TestData.asciiIn[i],IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES, null);
|
||||
//doTestToUnicode(TestData.asciiIn[i],new String(TestData.unicodeIn[i]),IDNAReference.USE_STD3_RULES|IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void TestIDNToASCII() throws Exception{
|
||||
for(int i=0; i<TestData.domainNames.length; i++){
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.DEFAULT, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.USE_STD3_RULES, null);
|
||||
doTestIDNToASCII(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED|IDNAReference.USE_STD3_RULES, null);
|
||||
}
|
||||
|
||||
for(int i=0; i<TestData.domainNames1Uni.length; i++){
|
||||
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNAReference.DEFAULT, null);
|
||||
doTestIDNToASCII(TestData.domainNames1Uni[i],TestData.domainNamesToASCIIOut[i],IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
}
|
||||
}
|
||||
public void TestIDNToUnicode() throws Exception{
|
||||
for(int i=0; i<TestData.domainNames.length; i++){
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.DEFAULT, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.USE_STD3_RULES, null);
|
||||
doTestIDNToUnicode(TestData.domainNames[i],TestData.domainNames[i],IDNAReference.ALLOW_UNASSIGNED|IDNAReference.USE_STD3_RULES, null);
|
||||
}
|
||||
for(int i=0; i<TestData.domainNamesToASCIIOut.length; i++){
|
||||
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNAReference.DEFAULT, null);
|
||||
doTestIDNToUnicode(TestData.domainNamesToASCIIOut[i],TestData.domainNamesToUnicodeOut[i],IDNAReference.ALLOW_UNASSIGNED, null);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestToUnicode(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNAReference.convertToUnicode(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNAReference.convertToUnicode(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertToUnicode(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("Did not get the expected exception for source: " + prettify(src) +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestIDNToUnicode(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNAReference.convertIDNToUnicode(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertIDNToUnicode(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToUnicode did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToUnicode did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertIDNToUnicode(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected)){
|
||||
errln("convertToUnicode did not return expected result with options : "+ options +
|
||||
" Expected: " + prettify(expected)+" Got: "+prettify(out));
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("Did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestToASCII(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNAReference.convertToASCII(src,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertToASCII(inBuf,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertToASCII(inIter,options);
|
||||
if(!unassignedException.equals(expectedException) && expected!=null && out != null && expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+ out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !expectedException.equals(ex)){
|
||||
errln("convertToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestIDNToASCII(String src, String expected, int options, Object expectedException)
|
||||
throws Exception{
|
||||
StringBuffer inBuf = new StringBuffer(src);
|
||||
UCharacterIterator inIter = UCharacterIterator.getInstance(src);
|
||||
try{
|
||||
|
||||
StringBuffer out = IDNAReference.convertIDNToASCII(src,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToIDNAReferenceSCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToIDNAReferenceSCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToIDNAReferenceSCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertIDNtoASCII(inBuf,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertToIDNAReferenceSCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+out);
|
||||
}
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertToIDNAReferenceSCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertToIDNAReferenceSCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
StringBuffer out = IDNAReference.convertIDNtoASCII(inIter,options);
|
||||
if(expected!=null && out != null && !out.toString().equals(expected.toLowerCase())){
|
||||
errln("convertIDNToASCII did not return expected result with options : "+ options +
|
||||
" Expected: " + expected+" Got: "+ out);
|
||||
}
|
||||
|
||||
if(expectedException!=null && !unassignedException.equals(expectedException)){
|
||||
errln("convertIDNToASCII did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(expectedException == null || !ex.equals(expectedException)){
|
||||
errln("convertIDNToASCII did not get the expected exception for source: " +src +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
public void TestConformance()throws Exception{
|
||||
for(int i=0; i<TestData.conformanceTestCases.length;i++){
|
||||
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
if(testCase.expected != null){
|
||||
//Test toASCII
|
||||
doTestToASCII(testCase.input,testCase.output,IDNAReference.DEFAULT,testCase.expected);
|
||||
doTestToASCII(testCase.input,testCase.output,IDNAReference.ALLOW_UNASSIGNED,testCase.expected);
|
||||
}
|
||||
//Test toUnicode
|
||||
//doTestToUnicode(testCase.input,testCase.output,IDNAReference.DEFAULT,testCase.expected);
|
||||
}
|
||||
}
|
||||
public void TestNamePrepConformance() throws Exception{
|
||||
NamePrepTransform namePrep = NamePrepTransform.getInstance();
|
||||
for(int i=0; i<TestData.conformanceTestCases.length;i++){
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
|
||||
try{
|
||||
StringBuffer output = namePrep.prepare(iter,NamePrepTransform.NONE);
|
||||
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
|
||||
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
|
||||
" Got: "+ prettify(output) );
|
||||
}
|
||||
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(testCase.expected == null || !ex.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
iter.setToStart();
|
||||
StringBuffer output = namePrep.prepare(iter,NamePrepTransform.ALLOW_UNASSIGNED);
|
||||
if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
|
||||
errln("Did not get the expected output. Expected: " + prettify(testCase.output)+
|
||||
" Got: "+ prettify(output) );
|
||||
}
|
||||
if(testCase.expected!=null && !unassignedException.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception. The operation succeeded!");
|
||||
}
|
||||
}catch(ParseException ex){
|
||||
if(testCase.expected == null || !ex.equals(testCase.expected)){
|
||||
errln("Did not get the expected exception for source: " +testCase.input +" Got: "+ ex.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
public void TestErrorCases() throws Exception{
|
||||
for(int i=0; i < TestData.errorCases.length; i++){
|
||||
TestData.ErrorCase errCase = TestData.errorCases[i];
|
||||
if(errCase.testLabel==true){
|
||||
// Test ToASCII
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.DEFAULT,errCase.expected);
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
|
||||
if(errCase.useSTD3ASCIIRules){
|
||||
doTestToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
}
|
||||
if(errCase.useSTD3ASCIIRules!=true){
|
||||
|
||||
// Test IDNToASCII
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.DEFAULT,errCase.expected);
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
|
||||
|
||||
}else{
|
||||
doTestIDNToASCII(new String(errCase.unicode),errCase.ascii,IDNAReference.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
|
||||
//TestToUnicode
|
||||
if(errCase.testToUnicode==true){
|
||||
if(errCase.useSTD3ASCIIRules!=true){
|
||||
// Test IDNToUnicode
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.DEFAULT,errCase.expected);
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.ALLOW_UNASSIGNED,errCase.expected);
|
||||
|
||||
}else{
|
||||
doTestIDNToUnicode(errCase.ascii,new String(errCase.unicode),IDNAReference.USE_STD3_RULES,errCase.expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private void doTestCompare(String s1, String s2, boolean isEqual){
|
||||
try{
|
||||
int retVal = IDNAReference.compare(s1,s2,IDNAReference.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNAReference.compare(new StringBuffer(s1), new StringBuffer(s2), IDNAReference.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNAReference.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNAReference.DEFAULT);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
}catch(Exception e){
|
||||
e.printStackTrace();
|
||||
errln("Unexpected exception thrown by IDNAReference.compare");
|
||||
}
|
||||
|
||||
try{
|
||||
int retVal = IDNAReference.compare(s1,s2,IDNAReference.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNAReference.compare(new StringBuffer(s1), new StringBuffer(s2), IDNAReference.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
retVal = IDNAReference.compare(UCharacterIterator.getInstance(s1), UCharacterIterator.getInstance(s2), IDNAReference.ALLOW_UNASSIGNED);
|
||||
if(isEqual==true && retVal != 0){
|
||||
errln("Did not get the expected result for s1: "+ prettify(s1)+
|
||||
" s2: "+prettify(s2));
|
||||
}
|
||||
}catch(Exception e){
|
||||
errln("Unexpected exception thrown by IDNAReference.compare");
|
||||
}
|
||||
}
|
||||
public void TestCompare() throws Exception{
|
||||
String www = "www.";
|
||||
String com = ".com";
|
||||
StringBuffer source = new StringBuffer(www);
|
||||
StringBuffer uni0 = new StringBuffer(www);
|
||||
StringBuffer uni1 = new StringBuffer(www);
|
||||
StringBuffer ascii0 = new StringBuffer(www);
|
||||
StringBuffer ascii1 = new StringBuffer(www);
|
||||
|
||||
uni0.append(TestData.unicodeIn[0]);
|
||||
uni0.append(com);
|
||||
|
||||
uni1.append(TestData.unicodeIn[1]);
|
||||
uni1.append(com);
|
||||
|
||||
ascii0.append(TestData.asciiIn[0]);
|
||||
ascii0.append(com);
|
||||
|
||||
ascii1.append(TestData.asciiIn[1]);
|
||||
ascii1.append(com);
|
||||
|
||||
for(int i=0;i< TestData.unicodeIn.length; i++){
|
||||
|
||||
// for every entry in unicodeIn array
|
||||
// prepend www. and append .com
|
||||
source.setLength(4);
|
||||
source.append(TestData.unicodeIn[i]);
|
||||
source.append(com);
|
||||
|
||||
// a) compare it with itself
|
||||
doTestCompare(source.toString(),source.toString(),true);
|
||||
|
||||
// b) compare it with asciiIn equivalent
|
||||
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
|
||||
|
||||
// c) compare it with unicodeIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(), uni1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),uni0.toString(), false);
|
||||
}
|
||||
// d) compare it with asciiIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(),ascii1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),ascii0.toString(), false);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// test and ascertain
|
||||
// func(func(func(src))) == func(src)
|
||||
public void doTestChainingToASCII(String source)throws Exception{
|
||||
StringBuffer expected;
|
||||
StringBuffer chained;
|
||||
|
||||
// test convertIDNToASCII
|
||||
expected = IDNAReference.convertIDNToASCII(source,IDNAReference.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNAReference.convertIDNtoASCII(chained,IDNAReference.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertIDNToASCII");
|
||||
}
|
||||
// test convertIDNToA
|
||||
expected = IDNAReference.convertToASCII(source,IDNAReference.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNAReference.convertToASCII(chained,IDNAReference.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertToASCII");
|
||||
}
|
||||
}
|
||||
// test and ascertain
|
||||
// func(func(func(src))) == func(src)
|
||||
public void doTestChainingToUnicode(String source)throws Exception{
|
||||
StringBuffer expected;
|
||||
StringBuffer chained;
|
||||
|
||||
// test convertIDNToUnicode
|
||||
expected = IDNAReference.convertIDNToUnicode(source,IDNAReference.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNAReference.convertIDNToUnicode(chained,IDNAReference.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertIDNToUnicode");
|
||||
}
|
||||
// test convertIDNToA
|
||||
expected = IDNAReference.convertToUnicode(source,IDNAReference.DEFAULT);
|
||||
chained = expected;
|
||||
for(int i=0; i< 4; i++){
|
||||
chained = IDNAReference.convertToUnicode(chained,IDNAReference.DEFAULT);
|
||||
}
|
||||
if(!expected.toString().equals(chained.toString())){
|
||||
errln("Chaining test failed for convertToUnicode");
|
||||
}
|
||||
}
|
||||
public void TestChaining() throws Exception{
|
||||
for(int i=0; i< TestData.unicodeIn.length; i++){
|
||||
doTestChainingToASCII(new String(TestData.unicodeIn[i]));
|
||||
}
|
||||
for(int i=0; i< TestData.asciiIn.length; i++){
|
||||
doTestChainingToUnicode(TestData.asciiIn[i]);
|
||||
}
|
||||
}
|
||||
public void TestRootLabelSeparator() throws Exception{
|
||||
String www = "www.";
|
||||
String com = ".com."; /*root label separator*/
|
||||
StringBuffer source = new StringBuffer(www);
|
||||
StringBuffer uni0 = new StringBuffer(www);
|
||||
StringBuffer uni1 = new StringBuffer(www);
|
||||
StringBuffer ascii0 = new StringBuffer(www);
|
||||
StringBuffer ascii1 = new StringBuffer(www);
|
||||
|
||||
uni0.append(TestData.unicodeIn[0]);
|
||||
uni0.append(com);
|
||||
|
||||
uni1.append(TestData.unicodeIn[1]);
|
||||
uni1.append(com);
|
||||
|
||||
ascii0.append(TestData.asciiIn[0]);
|
||||
ascii0.append(com);
|
||||
|
||||
ascii1.append(TestData.asciiIn[1]);
|
||||
ascii1.append(com);
|
||||
|
||||
for(int i=0;i< TestData.unicodeIn.length; i++){
|
||||
|
||||
// for every entry in unicodeIn array
|
||||
// prepend www. and append .com
|
||||
source.setLength(4);
|
||||
source.append(TestData.unicodeIn[i]);
|
||||
source.append(com);
|
||||
|
||||
// a) compare it with itself
|
||||
doTestCompare(source.toString(),source.toString(),true);
|
||||
|
||||
// b) compare it with asciiIn equivalent
|
||||
doTestCompare(source.toString(),www+TestData.asciiIn[i]+com,true);
|
||||
|
||||
// c) compare it with unicodeIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(), uni1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),uni0.toString(), false);
|
||||
}
|
||||
// d) compare it with asciiIn not equivalent
|
||||
if(i==0){
|
||||
doTestCompare(source.toString(),ascii1.toString(), false);
|
||||
}else{
|
||||
doTestCompare(source.toString(),ascii0.toString(), false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
197
icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java
Normal file
197
icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:42:21 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.stringprep;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class TestStringPrep extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new TestStringPrep().run(args);
|
||||
}
|
||||
/*
|
||||
There are several special identifiers ("who") which need to be
|
||||
understood universally, rather than in the context of a particular
|
||||
DNS domain. Some of these identifiers cannot be understood when an
|
||||
NFS client accesses the server, but have meaning when a local process
|
||||
accesses the file. The ability to display and modify these
|
||||
permissions is permitted over NFS, even if none of the access methods
|
||||
on the server understands the identifiers.
|
||||
|
||||
Who Description
|
||||
_______________________________________________________________
|
||||
|
||||
"OWNER" The owner of the file.
|
||||
"GROUP" The group associated with the file.
|
||||
"EVERYONE" The world.
|
||||
"INTERACTIVE" Accessed from an interactive terminal.
|
||||
"NETWORK" Accessed via the network.
|
||||
"DIALUP" Accessed as a dialup user to the server.
|
||||
"BATCH" Accessed from a batch job.
|
||||
"ANONYMOUS" Accessed without any authentication.
|
||||
"AUTHENTICATED" Any authenticated user (opposite of
|
||||
ANONYMOUS)
|
||||
"SERVICE" Access from a system service.
|
||||
|
||||
To avoid conflict, these special identifiers are distinguish by an
|
||||
appended "@" and should appear in the form "xxxx@" (note: no domain
|
||||
name after the "@"). For example: ANONYMOUS@.
|
||||
*/
|
||||
private String[] mixed_prep_data ={
|
||||
"OWNER@",
|
||||
"GROUP@",
|
||||
"EVERYONE@",
|
||||
"INTERACTIVE@",
|
||||
"NETWORK@",
|
||||
"DIALUP@",
|
||||
"BATCH@",
|
||||
"ANONYMOUS@",
|
||||
"AUTHENTICATED@",
|
||||
"\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D@slip129-37-118-146.nc.us.ibm.net",
|
||||
"\u0936\u094d\u0930\u0940\u092e\u0926\u094d@saratoga.pe.utexas.edu",
|
||||
"\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e@dial-120-45.ots.utexas.edu",
|
||||
"\u0905\u0927\u094d\u092f\u093e\u092f@woo-085.dorms.waller.net",
|
||||
"\u0905\u0930\u094d\u091c\u0941\u0928@hd30-049.hil.compuserve.com",
|
||||
"\u0935\u093f\u0937\u093e\u0926@pem203-31.pe.ttu.edu",
|
||||
"\u092f\u094b\u0917@56K-227.MaxTNT3.pdq.net",
|
||||
"\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930@dial-36-2.ots.utexas.edu",
|
||||
"\u0909\u0935\u093E\u091A\u0943@slip129-37-23-152.ga.us.ibm.net",
|
||||
"\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947@ts45ip119.cadvision.com",
|
||||
"\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947@sdn-ts-004txaustP05.dialsprint.net",
|
||||
"\u0938\u092e\u0935\u0947\u0924\u093e@bar-tnt1s66.erols.com",
|
||||
"\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903@101.st-louis-15.mo.dial-access.att.net",
|
||||
"\u092e\u093e\u092e\u0915\u093e\u0903@h92-245.Arco.COM",
|
||||
"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935@dial-13-2.ots.utexas.edu",
|
||||
"\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924@net-redynet29.datamarkets.com.ar",
|
||||
"\u0938\u0902\u091c\u0935@ccs-shiva28.reacciun.net.ve",
|
||||
"\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d@7.houston-11.tx.dial-access.att.net",
|
||||
"\u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27@ingw129-37-120-26.mo.us.ibm.net",
|
||||
"\u0c06\u0c28\u0c02\u0c26\u0c4d@dialup6.austintx.com",
|
||||
"\u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41@dns2.tpao.gov.tr",
|
||||
"\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d@slip129-37-119-194.nc.us.ibm.net",
|
||||
"\u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26@cs7.dillons.co.uk.203.119.193.in-addr.arpa",
|
||||
"\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d@swprd1.innovplace.saskatoon.sk.ca",
|
||||
"\u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26@bikini.bologna.maraut.it",
|
||||
"\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d@node91.subnet159-198-79.baxter.com",
|
||||
"\u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24@cust19.max5.new-york.ny.ms.uu.net",
|
||||
"\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30@balexander.slip.andrew.cmu.edu",
|
||||
"\u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32@pool029.max2.denver.co.dynip.alter.net",
|
||||
"\u0c30\u0c35\u0c3f@cust49.max9.new-york.ny.ms.uu.net",
|
||||
"\u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d@s61.abq-dialin2.hollyberry.com",
|
||||
"\u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27@\u0917\u0928\u0947\u0936.sanjose.ibm.com",
|
||||
"\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f@www.\u00E0\u00B3\u00AF.com",
|
||||
"\u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32@www.\u00C2\u00A4.com",
|
||||
"\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D@www.\u00C2\u00A3.com",
|
||||
"\u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f@\u0025",
|
||||
"\u0c2e\u0c3e\u0c27\u0c35\u0c4d@\u005C\u005C",
|
||||
"\u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f@www.\u0021.com",
|
||||
"test@www.\u0024.com",
|
||||
"help@\u00C3\u00BC.com",
|
||||
};
|
||||
public void TestNFS4MixedPrep(){
|
||||
for(int i=0; i< mixed_prep_data.length; i++){
|
||||
try{
|
||||
String src = mixed_prep_data[i];
|
||||
byte[] dest = NFS4StringPrep.mixed_prepare(src.getBytes("UTF-8"));
|
||||
String destString = new String(dest, "UTF-8");
|
||||
int destIndex = destString.indexOf('@');
|
||||
if(destIndex < 0){
|
||||
errln("Delimiter @ disappeared from the output!");
|
||||
}
|
||||
}catch(Exception e){
|
||||
errln("mixed_prepare for string: " + mixed_prep_data[i] +" failed with " + e.toString());
|
||||
}
|
||||
}
|
||||
/* test the error condition */
|
||||
{
|
||||
String src = "OWNER@oss.software.ibm.com";
|
||||
try{
|
||||
byte[] dest = NFS4StringPrep.mixed_prepare(src.getBytes("UTF-8"));
|
||||
if(dest!=null){
|
||||
errln("Did not get the expected exception");
|
||||
}
|
||||
}catch(Exception e){
|
||||
logln("mixed_prepare for string: " + src +" passed with " + e.toString());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
public void TestCISPrep(){
|
||||
|
||||
for(int i=0;i< (TestData.conformanceTestCases.length);i++){
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
String src = testCase.input;
|
||||
Exception expected = testCase.expected;
|
||||
String expectedDest = testCase.output;
|
||||
try{
|
||||
byte[] dest =NFS4StringPrep.cis_prepare(src.getBytes("UTF-8"));
|
||||
String destString = new String(dest, "UTF-8");
|
||||
if(!expectedDest.equalsIgnoreCase(destString)){
|
||||
errln("Did not get the expected output for nfs4_cis_prep at index " + i);
|
||||
}
|
||||
}catch(Exception e){
|
||||
if(!expected.equals(e)){
|
||||
errln("Did not get the expected exception");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
private static String[] cs_prep_data = {
|
||||
//BIDI checking is turned off .. so
|
||||
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74",
|
||||
|
||||
};
|
||||
public void TestCSPrep(){
|
||||
|
||||
// Checking for bidi is turned off
|
||||
String src = "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74";
|
||||
try{
|
||||
NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), false);
|
||||
}catch(Exception e){
|
||||
errln("Got unexpected exception: " + e.toString());
|
||||
}
|
||||
|
||||
// normalization is turned off
|
||||
try{
|
||||
src = "www.\u00E0\u00B3\u00AF.com";
|
||||
byte[] dest = NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), false);
|
||||
String destStr = new String(dest, "UTF-8");
|
||||
if(!src.equals(destStr)){
|
||||
errln("Did not get expected output. Expected: "+ prettify(src)+
|
||||
" Got: " + prettify(destStr));
|
||||
}
|
||||
}catch(Exception e){
|
||||
errln("Got unexpected exception: " + e.toString());
|
||||
}
|
||||
|
||||
// test case insensitive string
|
||||
try{
|
||||
src = "THISISATEST";
|
||||
byte[] dest = NFS4StringPrep.cs_prepare(src.getBytes("UTF-8"), true);
|
||||
String destStr = new String(dest, "UTF-8");
|
||||
if(!src.toLowerCase().equals(destStr)){
|
||||
errln("Did not get expected output. Expected: "+ prettify(src)+
|
||||
" Got: " + prettify(destStr));
|
||||
}
|
||||
}catch(Exception e){
|
||||
errln("Got unexpected exception: " + e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -5,13 +5,14 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/LocaleUtility.java,v $
|
||||
* $Date: 2003/06/03 18:49:32 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2003/08/21 23:41:25 $
|
||||
* $Revision: 1.9 $
|
||||
* *****************************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
@ -131,4 +132,10 @@ public class LocaleUtility {
|
||||
}
|
||||
return new Locale(parts[0], parts[1], parts[2]);
|
||||
}
|
||||
|
||||
public static InputStream getImplDataResourceAsStream(String name){
|
||||
Class myClass = new LocaleUtility().getClass();
|
||||
String fullName = "data/"+name;
|
||||
return myClass.getResourceAsStream(fullName);
|
||||
}
|
||||
}
|
||||
|
96
icu4j/src/com/ibm/icu/impl/StringPrepDataReader.java
Normal file
96
icu4j/src/com/ibm/icu/impl/StringPrepDataReader.java
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Created on May 2, 2003
|
||||
*
|
||||
* To change the template for this generated file go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public final class StringPrepDataReader implements ICUBinary.Authenticate {
|
||||
private final static boolean debug = ICUDebug.enabled("NormalizerDataReader");
|
||||
|
||||
/**
|
||||
* <p>private constructor.</p>
|
||||
* @param inputStream ICU uprop.dat file input stream
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* @draft 2.1
|
||||
*/
|
||||
public StringPrepDataReader(InputStream inputStream)
|
||||
throws IOException{
|
||||
if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
|
||||
|
||||
unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
|
||||
|
||||
if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
|
||||
|
||||
dataInputStream = new DataInputStream(inputStream);
|
||||
|
||||
if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
|
||||
}
|
||||
|
||||
public void read(byte[] idnaBytes,
|
||||
char[] mappingTable)
|
||||
throws IOException{
|
||||
|
||||
//Read the bytes that make up the idnaTrie
|
||||
dataInputStream.read(idnaBytes);
|
||||
|
||||
//Read the extra data
|
||||
for(int i=0;i<mappingTable.length;i++){
|
||||
mappingTable[i]=dataInputStream.readChar();
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] getDataFormatVersion(){
|
||||
return DATA_FORMAT_VERSION;
|
||||
}
|
||||
|
||||
public boolean isDataVersionAcceptable(byte version[]){
|
||||
return version[0] == DATA_FORMAT_VERSION[0]
|
||||
&& version[2] == DATA_FORMAT_VERSION[2]
|
||||
&& version[3] == DATA_FORMAT_VERSION[3];
|
||||
}
|
||||
public int[] readIndexes(int length)throws IOException{
|
||||
int[] indexes = new int[length];
|
||||
//Read the indexes
|
||||
for (int i = 0; i <length ; i++) {
|
||||
indexes[i] = dataInputStream.readInt();
|
||||
}
|
||||
return indexes;
|
||||
}
|
||||
|
||||
public byte[] getUnicodeVersion(){
|
||||
return unicodeVersion;
|
||||
}
|
||||
// private data members -------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* ICU data file input stream
|
||||
*/
|
||||
private DataInputStream dataInputStream;
|
||||
private byte[] unicodeVersion;
|
||||
/**
|
||||
* File format version that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
* see store.c of gennorm for more information and values
|
||||
*/
|
||||
///* dataFormat="SPRP" 0x53, 0x50, 0x52, 0x50 */
|
||||
private static final byte DATA_FORMAT_ID[] = {(byte)0x53, (byte)0x50,
|
||||
(byte)0x52, (byte)0x50};
|
||||
private static final byte DATA_FORMAT_VERSION[] = {(byte)0x3, (byte)0x2,
|
||||
(byte)0x5, (byte)0x2};
|
||||
|
||||
}
|
BIN
icu4j/src/com/ibm/icu/impl/data/uidna.spp
Normal file
BIN
icu4j/src/com/ibm/icu/impl/data/uidna.spp
Normal file
Binary file not shown.
918
icu4j/src/com/ibm/icu/stringprep/IDNA.java
Normal file
918
icu4j/src/com/ibm/icu/stringprep/IDNA.java
Normal file
@ -0,0 +1,918 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/IDNA.java,v $
|
||||
* $Date: 2003/08/21 23:40:42 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.stringprep;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.ibm.icu.impl.LocaleUtility;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
|
||||
/**
|
||||
*
|
||||
* UIDNA API implements the IDNA protocol as defined in the IDNA draft
|
||||
* (http://www.ietf.org/rfc/rfc3490.txt).
|
||||
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
|
||||
* containing non-ASCII code points are required to be processed by
|
||||
* ToASCII operation before passing it to resolver libraries. Domain names
|
||||
* that are obtained from resolver libraries are required to be processed by
|
||||
* ToUnicode operation before displaying the domain name to the user.
|
||||
* IDNA requires that implementations process input strings with Nameprep
|
||||
* (http://www.ietf.org/rfc/rfc3491.txt),
|
||||
* which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
|
||||
* and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
|
||||
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
|
||||
* neither Nameprep nor Punycode are optional.
|
||||
* The input and output of ToASCII and ToUnicode operations are Unicode
|
||||
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
|
||||
* multiple times to an input string will yield the same result as applying the operation
|
||||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*
|
||||
*/
|
||||
public final class IDNA {
|
||||
|
||||
/* IDNA ACE Prefix is "xn--" */
|
||||
private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
|
||||
private static final int ACE_PREFIX_LENGTH = 4;
|
||||
|
||||
private static final int MAX_LABEL_LENGTH = 63;
|
||||
private static final int HYPHEN = 0x002D;
|
||||
private static final String NAME_PREP_PROFILE = "uidna";
|
||||
private static final int CAPITAL_A = 0x0041;
|
||||
private static final int CAPITAL_Z = 0x005A;
|
||||
private static final int LOWER_CASE_DELTA = 0x0020;
|
||||
private static final int FULL_STOP = 0x002E;
|
||||
|
||||
/**
|
||||
* Option to prohibit processing of unassigned codepoints in the input and
|
||||
* do not check if the input conforms to STD-3 ASCII rules.
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
public static final int DEFAULT = 0x0000;
|
||||
/**
|
||||
* Option to allow processing of unassigned codepoints in the input
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
public static final int ALLOW_UNASSIGNED = 0x0001;
|
||||
/**
|
||||
* Option to check if input conforms to STD-3 ASCII rules
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
public static final int USE_STD3_RULES = 0x0002;
|
||||
|
||||
private static StringPrep prep = null;
|
||||
|
||||
private static synchronized void loadInstance()
|
||||
throws IOException{
|
||||
if(prep==null){
|
||||
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
|
||||
prep = StringPrep.getInstance(stream);
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean startsWithPrefix(StringBuffer src){
|
||||
boolean startsWithPrefix = true;
|
||||
|
||||
if(src.length() < ACE_PREFIX_LENGTH){
|
||||
return false;
|
||||
}
|
||||
for(int i=0; i<ACE_PREFIX_LENGTH;i++){
|
||||
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
|
||||
startsWithPrefix = false;
|
||||
}
|
||||
}
|
||||
return startsWithPrefix;
|
||||
}
|
||||
|
||||
private static char toASCIILower(char ch){
|
||||
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
|
||||
return (char)(ch + LOWER_CASE_DELTA);
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
private static StringBuffer toASCIILower(StringBuffer src){
|
||||
StringBuffer dest = new StringBuffer();
|
||||
for(int i=0; i<src.length();i++){
|
||||
dest.append(toASCIILower(src.charAt(i)));
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
|
||||
char c1,c2;
|
||||
int rc;
|
||||
for(int i =0;/* no condition */;i++) {
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(i == s1.length()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
c1 = s1.charAt(i);
|
||||
c2 = s2.charAt(i);
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
if(c1!=c2) {
|
||||
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private static int compareCaseInsensitiveASCII(String s1, String s2){
|
||||
char c1,c2;
|
||||
int rc;
|
||||
for(int i =0;/* no condition */;i++) {
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(i == s1.length()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
c1 = s1.charAt(i);
|
||||
c2 = s2.charAt(i);
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
if(c1!=c2) {
|
||||
rc=(int)toASCIILower(c1)-(int)toASCIILower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private static int getSeparatorIndex(char[] src,int start, int limit)
|
||||
throws IOException{
|
||||
loadInstance();
|
||||
for(; start<limit;start++){
|
||||
if(prep.isLabelSeparator(src[start])){
|
||||
return start;
|
||||
}
|
||||
}
|
||||
// we have not found the separator just return length
|
||||
return start;
|
||||
}
|
||||
|
||||
private static boolean isLDHChar(int ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return false;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToASCII(String src, int options)
|
||||
throws ParseException, IOException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToASCII(StringBuffer src, int options)
|
||||
throws ParseException, IOException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
|
||||
throws ParseException, IOException{
|
||||
//load the data
|
||||
loadInstance();
|
||||
|
||||
boolean[] caseFlags = null;
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
boolean srcIsASCII = true;
|
||||
// assume the source contains all LDH codepoints
|
||||
boolean srcIsLDH = true;
|
||||
|
||||
//get the options
|
||||
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
|
||||
|
||||
int failPos = -1;
|
||||
// step 2
|
||||
StringBuffer processOut = prep.prepare(srcIter,options);
|
||||
int poLen = processOut.length();
|
||||
StringBuffer dest = new StringBuffer();
|
||||
// step 3 & 4
|
||||
for(int j=0;j<poLen;j++ ){
|
||||
char ch=processOut.charAt(j);
|
||||
if(ch > 0x7F){
|
||||
srcIsASCII = false;
|
||||
}
|
||||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(isLDHChar(ch)==false){
|
||||
srcIsLDH = false;
|
||||
failPos = j;
|
||||
}
|
||||
}
|
||||
|
||||
if(useSTD3ASCIIRules == true){
|
||||
// verify 3a and 3b
|
||||
if( srcIsLDH == false /* source contains some non-LDH characters */
|
||||
|| processOut.charAt(0) == HYPHEN
|
||||
|| processOut.charAt(processOut.length()-1) == HYPHEN){
|
||||
|
||||
/* populate the parseError struct */
|
||||
if(srcIsLDH==false){
|
||||
throw new ParseException( "The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
(failPos>0) ? (failPos-1) : failPos);
|
||||
}else if(processOut.charAt(0) == HYPHEN){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
|
||||
|
||||
}else{
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
(poLen>0) ? poLen-1 : poLen);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if(srcIsASCII){
|
||||
dest = processOut;
|
||||
}else{
|
||||
// step 5 : verify the sequence does not begin with ACE prefix
|
||||
if(!startsWithPrefix(processOut)){
|
||||
|
||||
//step 6: encode the sequence with punycode
|
||||
caseFlags = new boolean[poLen];
|
||||
|
||||
StringBuffer punyout = Punycode.encode(processOut,caseFlags);
|
||||
|
||||
// convert all codepoints to lower case ASCII
|
||||
StringBuffer lowerOut = toASCIILower(punyout);
|
||||
|
||||
//Step 7: prepend the ACE prefix
|
||||
dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
|
||||
//Step 6: copy the contents in b2 into dest
|
||||
dest.append(lowerOut);
|
||||
}else{
|
||||
|
||||
throw new ParseException("The input does not start with the ACE Prefix.",
|
||||
ParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
|
||||
}
|
||||
}
|
||||
if(dest.length() > MAX_LABEL_LENGTH){
|
||||
throw new ParseException("The labels in the input are too long. Length > 64.",
|
||||
ParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
|
||||
throws ParseException, IOException{
|
||||
return convertIDNToASCII(iter.getText(), options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
|
||||
throws ParseException, IOException{
|
||||
return convertIDNToASCII(str.toString(), options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNToASCII(String src,int options)
|
||||
throws ParseException, IOException{
|
||||
//load the data
|
||||
loadInstance();
|
||||
char[] srcArr = src.toCharArray();
|
||||
StringBuffer result = new StringBuffer();
|
||||
int sepIndex=0;
|
||||
int oldSepIndex=0;
|
||||
for(;;){
|
||||
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
|
||||
result.append(convertToASCII(iter,options));
|
||||
if(sepIndex==srcArr.length){
|
||||
break;
|
||||
}
|
||||
// increment the sepIndex to skip past the separator
|
||||
sepIndex++;
|
||||
oldSepIndex = sepIndex;
|
||||
result.append((char)FULL_STOP);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToUnicode(String src, int options)
|
||||
throws ParseException, IOException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToUnicode(StringBuffer src, int options)
|
||||
throws ParseException, IOException{
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
|
||||
throws ParseException, IOException{
|
||||
//load the data
|
||||
loadInstance();
|
||||
|
||||
boolean[] caseFlags = null;
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
boolean srcIsASCII = true;
|
||||
// assume the source contains all LDH codepoints
|
||||
boolean srcIsLDH = true;
|
||||
|
||||
//get the options
|
||||
boolean useSTD3ASCIIRules = (boolean)((options & USE_STD3_RULES) != 0);
|
||||
|
||||
int failPos = -1;
|
||||
int ch;
|
||||
int saveIndex = iter.getIndex();
|
||||
// step 1: find out if all the codepoints in src are ASCII
|
||||
while((ch=iter.next())!= UCharacterIterator.DONE){
|
||||
if(ch>0x7F){
|
||||
srcIsASCII = false;
|
||||
}
|
||||
if((srcIsLDH = isLDHChar(ch))==false){
|
||||
failPos = iter.getIndex();
|
||||
}
|
||||
}
|
||||
StringBuffer processOut;
|
||||
|
||||
if(srcIsASCII == false){
|
||||
// step 2: process the string
|
||||
iter.setIndex(saveIndex);
|
||||
processOut = prep.prepare(iter,options);
|
||||
|
||||
}else{
|
||||
//just point to source
|
||||
processOut = new StringBuffer(iter.getText());
|
||||
}
|
||||
// TODO:
|
||||
// The RFC states that
|
||||
// <quote>
|
||||
// ToUnicode never fails. If any step fails, then the original input
|
||||
// is returned immediately in that step.
|
||||
// </quote>
|
||||
|
||||
//step 3: verify ACE Prefix
|
||||
if(startsWithPrefix(processOut)){
|
||||
|
||||
//step 4: Remove the ACE Prefix
|
||||
String temp = processOut.substring(ACE_PREFIX_LENGTH,processOut.length());
|
||||
|
||||
//step 5: Decode using punycode
|
||||
StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp),caseFlags);
|
||||
|
||||
//step 6:Apply toASCII
|
||||
StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
|
||||
|
||||
//step 7: verify
|
||||
if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
|
||||
throw new ParseException("The verification step prescribed by the RFC 3491 failed",
|
||||
ParseException.VERIFICATION_ERROR);
|
||||
}
|
||||
|
||||
//step 8: return output of step 5
|
||||
return decodeOut;
|
||||
|
||||
}else{
|
||||
// verify that STD3 ASCII rules are satisfied
|
||||
if(useSTD3ASCIIRules == true){
|
||||
if( srcIsLDH == false /* source contains some non-LDH characters */
|
||||
|| processOut.charAt(0) == HYPHEN
|
||||
|| processOut.charAt(processOut.length()-1) == HYPHEN){
|
||||
|
||||
if(srcIsLDH==false){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
|
||||
(failPos>0) ? (failPos-1) : failPos);
|
||||
}else if(processOut.charAt(0) == HYPHEN){
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),0);
|
||||
|
||||
}else{
|
||||
throw new ParseException("The input does not conform to the STD 3 ASCII rules",
|
||||
ParseException.STD3_ASCII_RULES_ERROR,
|
||||
processOut.toString(),
|
||||
processOut.length());
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
// just return the source
|
||||
return new StringBuffer(iter.getText());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
|
||||
throws ParseException, IOException{
|
||||
return convertIDNToUnicode(iter.getText(), options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
|
||||
throws ParseException, IOException{
|
||||
return convertIDNToUnicode(str.toString(), options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static StringBuffer convertIDNToUnicode(String src, int options)
|
||||
throws ParseException, IOException{
|
||||
|
||||
char[] srcArr = src.toCharArray();
|
||||
StringBuffer result = new StringBuffer();
|
||||
int sepIndex=0;
|
||||
int oldSepIndex=0;
|
||||
for(;;){
|
||||
sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(new String(srcArr,oldSepIndex,sepIndex-oldSepIndex));
|
||||
result.append(convertToUnicode(iter,options));
|
||||
if(sepIndex==srcArr.length){
|
||||
break;
|
||||
}
|
||||
// increment the sepIndex to skip past the separator
|
||||
sepIndex++;
|
||||
oldSepIndex =sepIndex;
|
||||
result.append((char)FULL_STOP);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First IDN string as StringBuffer
|
||||
* @param s2 Second IDN string as StringBuffer
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
// TODO: optimize
|
||||
public static int compare(StringBuffer s1, StringBuffer s2, int options)
|
||||
throws ParseException, IOException{
|
||||
if(s1==null || s2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(s1.toString(),options);
|
||||
StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First IDN string
|
||||
* @param s2 Second IDN string
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
// TODO: optimize
|
||||
public static int compare(String s1, String s2, int options)
|
||||
throws ParseException, IOException{
|
||||
if(s1==null || s2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(s1, options);
|
||||
StringBuffer s2Out = convertIDNToASCII(s2, options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
/**
|
||||
* Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First IDN string as UCharacterIterator
|
||||
* @param s2 Second IDN string as UCharacterIterator
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2
|
||||
* @throws ParseException
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
// TODO: optimize
|
||||
public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
|
||||
throws ParseException, IOException{
|
||||
if(i1==null || i2 == null){
|
||||
throw new IllegalArgumentException("One of the source buffers is null");
|
||||
}
|
||||
StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
|
||||
StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
|
||||
return compareCaseInsensitiveASCII(s1Out,s2Out);
|
||||
}
|
||||
}
|
143
icu4j/src/com/ibm/icu/stringprep/ParseException.java
Normal file
143
icu4j/src/com/ibm/icu/stringprep/ParseException.java
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/ParseException.java,v $
|
||||
* $Date: 2003/08/21 23:40:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.stringprep;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class ParseException extends Exception {
|
||||
|
||||
public static final int INVALID_CHAR_FOUND = 0;
|
||||
public static final int ILLEGAL_CHAR_FOUND = 1;
|
||||
public static final int PROHIBITED_ERROR = 2;
|
||||
public static final int UNASSIGNED_ERROR = 3;
|
||||
public static final int CHECK_BIDI_ERROR = 4;
|
||||
public static final int STD3_ASCII_RULES_ERROR = 5;
|
||||
public static final int ACE_PREFIX_ERROR = 6;
|
||||
public static final int VERIFICATION_ERROR = 7;
|
||||
public static final int LABEL_TOO_LONG_ERROR = 8;
|
||||
public static final int BUFFER_OVERFLOW_ERROR = 9;
|
||||
|
||||
public ParseException(String message,int error){
|
||||
super(message);
|
||||
this.error = error;
|
||||
}
|
||||
public ParseException(String message,int error, String rules, int pos){
|
||||
super(message);
|
||||
this.error = error;
|
||||
setContext(rules,pos);
|
||||
}
|
||||
|
||||
public boolean equals(Object other){
|
||||
if(!(other instanceof ParseException)){
|
||||
return false;
|
||||
}
|
||||
return ((ParseException)other).error == this.error;
|
||||
}
|
||||
public String toString(){
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append(super.getMessage());
|
||||
buf.append(". preContext: ");
|
||||
buf.append(preContext);
|
||||
buf.append(". postContext: ");
|
||||
buf.append(postContext);
|
||||
buf.append("\n");
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
private int error;
|
||||
/**
|
||||
* The line on which the error occured. If the parse engine
|
||||
* is not using this field, it should set it to zero. Otherwise
|
||||
* it should be a positive integer. The default value of this field
|
||||
* is -1. It will be set to 0 if the code populating this struct is not
|
||||
* using line numbers.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
private int line;
|
||||
|
||||
/**
|
||||
* The character offset to the error. If the line field is
|
||||
* being used, then this offset is from the start of the line.
|
||||
* If the line field is not being used, then this offset is from
|
||||
* the start of the text.The default value of this field
|
||||
* is -1. It will be set to appropriate value by the code that
|
||||
* populating the struct.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
private int offset;
|
||||
|
||||
/**
|
||||
* Textual context before the error. Null-terminated.
|
||||
* May be the empty string if not implemented by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
private StringBuffer preContext = new StringBuffer();
|
||||
|
||||
/**
|
||||
* Textual context after the error. Null-terminated.
|
||||
* May be the empty string if not implemented by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
private StringBuffer postContext = new StringBuffer();
|
||||
|
||||
public static final int PARSE_CONTEXT_LEN = 16;
|
||||
|
||||
public void setOffset(int offset){
|
||||
this.offset = offset;
|
||||
}
|
||||
public int getOffset(){
|
||||
return offset;
|
||||
}
|
||||
public int getLineNumber(){
|
||||
return line;
|
||||
}
|
||||
public int setLineNumber(int lineNumber){
|
||||
return line;
|
||||
}
|
||||
public String getPreContext(){
|
||||
return preContext.toString();
|
||||
}
|
||||
public String getPostContext(){
|
||||
return postContext.toString();
|
||||
}
|
||||
|
||||
public void setPreContext(String str, int pos){
|
||||
setPreContext(str.toCharArray(),pos);
|
||||
}
|
||||
public void setPreContext(char[] str, int pos){
|
||||
int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
|
||||
int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
|
||||
preContext.append(str,start,len);
|
||||
|
||||
}
|
||||
public void setPostContext(String str, int pos){
|
||||
setPostContext(str.toCharArray(),pos);
|
||||
}
|
||||
public void setPostContext(char[] str, int pos){
|
||||
int start = pos;
|
||||
int len = str.length - start;
|
||||
postContext.append(str,start,len);
|
||||
|
||||
}
|
||||
public void setContext(char[]str,int pos){
|
||||
setPreContext(str,pos);
|
||||
setPostContext(str,pos);
|
||||
}
|
||||
public void setContext(String str,int pos){
|
||||
setPreContext(str,pos);
|
||||
setPostContext(str,pos);
|
||||
}
|
||||
}
|
467
icu4j/src/com/ibm/icu/stringprep/Punycode.java
Normal file
467
icu4j/src/com/ibm/icu/stringprep/Punycode.java
Normal file
@ -0,0 +1,467 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/Punycode.java,v $
|
||||
* $Date: 2003/08/21 23:40:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.stringprep;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change this generated comment edit the template variable "typecomment":
|
||||
* Window>Preferences>Java>Templates.
|
||||
* To enable and disable the creation of type comments go to
|
||||
* Window>Preferences>Java>Code Generation.
|
||||
*/
|
||||
/* Package Private class */
|
||||
final class Punycode {
|
||||
|
||||
/* Punycode parameters for Bootstring */
|
||||
private static final int BASE = 36;
|
||||
private static final int TMIN = 1;
|
||||
private static final int TMAX = 26;
|
||||
private static final int SKEW = 38;
|
||||
private static final int DAMP = 700;
|
||||
private static final int INITIAL_BIAS = 72;
|
||||
private static final int INITIAL_N = 0x80;
|
||||
|
||||
/* "Basic" Unicode/ASCII code points */
|
||||
private static final int HYPHEN = 0x2d;
|
||||
private static final int DELIMITER = HYPHEN;
|
||||
|
||||
private static final int ZERO = 0x30;
|
||||
private static final int NINE = 0x39;
|
||||
|
||||
private static final int SMALL_A = 0x61;
|
||||
private static final int SMALL_Z = 0x7a;
|
||||
|
||||
private static final int CAPITAL_A = 0x41;
|
||||
private static final int CAPITAL_Z = 0x5a;
|
||||
private static final int MAX_CP_COUNT = 200;
|
||||
private static final int UINT_MAGIC = 0x80000000;
|
||||
private static final long ULONG_MAGIC = 0x8000000000000000L;
|
||||
|
||||
private static int adaptBias(int delta, int length, boolean firstTime){
|
||||
if(firstTime){
|
||||
delta /=DAMP;
|
||||
}else{
|
||||
delta /= 2;
|
||||
}
|
||||
delta += delta/length;
|
||||
|
||||
int count=0;
|
||||
for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
|
||||
delta/=(BASE-TMIN);
|
||||
}
|
||||
|
||||
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
*/
|
||||
static final int[] basicToDigit= new int[]{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
|
||||
private static char asciiCaseMap(char b, boolean uppercase) {
|
||||
if(uppercase) {
|
||||
if(SMALL_A<=b && b<=SMALL_Z) {
|
||||
b-=(SMALL_A-CAPITAL_A);
|
||||
}
|
||||
} else {
|
||||
if(CAPITAL_A<=b && b<=CAPITAL_Z) {
|
||||
b+=(SMALL_A-CAPITAL_A);
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* digitToBasic() returns the basic code point whose value
|
||||
* (when used for representing integers) is d, which must be in the
|
||||
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
|
||||
* nonzero, in which case the uppercase form is used.
|
||||
*/
|
||||
private static char digitToBasic(int digit, boolean uppercase) {
|
||||
/* 0..25 map to ASCII a..z or A..Z */
|
||||
/* 26..35 map to ASCII 0..9 */
|
||||
if(digit<26) {
|
||||
if(uppercase) {
|
||||
return (char)(CAPITAL_A+digit);
|
||||
} else {
|
||||
return (char)(SMALL_A+digit);
|
||||
}
|
||||
} else {
|
||||
return (char)((ZERO-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
|
||||
|
||||
int[] cpBuffer = new int[MAX_CP_COUNT];
|
||||
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
|
||||
char c, c2;
|
||||
int srcLength = src.length();
|
||||
int destCapacity = MAX_CP_COUNT;
|
||||
char[] dest = new char[destCapacity];
|
||||
StringBuffer result = new StringBuffer();
|
||||
/*
|
||||
* Handle the basic code points and
|
||||
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
|
||||
*/
|
||||
srcCPCount=destLength=0;
|
||||
|
||||
for(j=0; j<srcLength; ++j) {
|
||||
if(srcCPCount==MAX_CP_COUNT) {
|
||||
/* too many input code points */
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
c=src.charAt(j);
|
||||
if(isBasic(c)) {
|
||||
if(destLength<destCapacity) {
|
||||
cpBuffer[srcCPCount++]=0;
|
||||
dest[destLength]=
|
||||
caseFlags!=null ?
|
||||
asciiCaseMap((char)c, caseFlags[j]) :
|
||||
(char)c;
|
||||
}
|
||||
++destLength;
|
||||
} else {
|
||||
n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
|
||||
if(!UTF16.isSurrogate(c)) {
|
||||
n|=c;
|
||||
} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
|
||||
++j;
|
||||
|
||||
n|=UCharacter.getCodePoint(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
throw new ParseException("Illegal char found",ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
cpBuffer[srcCPCount++]=n;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finish the basic string - if it is not empty - with a delimiter. */
|
||||
basicLength=destLength;
|
||||
if(basicLength>0) {
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength]=DELIMITER;
|
||||
}
|
||||
++destLength;
|
||||
}
|
||||
|
||||
/*
|
||||
* handledCPCount is the number of code points that have been handled
|
||||
* basicLength is the number of basic code points
|
||||
* destLength is the number of chars that have been output
|
||||
*/
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
delta=0;
|
||||
bias=INITIAL_BIAS;
|
||||
|
||||
/* Main encoding loop: */
|
||||
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
|
||||
/*
|
||||
* All non-basic code points < n have been handled already.
|
||||
* Find the next larger one:
|
||||
*/
|
||||
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(n<=q && q<m) {
|
||||
m=q;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Increase delta enough to advance the decoder's
|
||||
* <n,i> state to <m,0>, but guard against overflow:
|
||||
*/
|
||||
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
|
||||
throw new RuntimeException("Internal program error");
|
||||
}
|
||||
delta+=(m-n)*(handledCPCount+1);
|
||||
n=m;
|
||||
|
||||
/* Encode a sequence of same code points n */
|
||||
for(j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(q<n) {
|
||||
++delta;
|
||||
} else if(q==n) {
|
||||
/* Represent delta as a generalized variable-length integer: */
|
||||
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
|
||||
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(t>TMAX) {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
|
||||
if(q<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
|
||||
}
|
||||
q=(q-t)/(BASE-t);
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
|
||||
}
|
||||
bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
|
||||
delta=0;
|
||||
++handledCPCount;
|
||||
}
|
||||
}
|
||||
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
|
||||
return result.append(dest, 0, destLength);
|
||||
}
|
||||
|
||||
private static boolean isBasic(int ch){
|
||||
return (ch < INITIAL_N);
|
||||
}
|
||||
|
||||
private static boolean isBasicUpperCase(int ch){
|
||||
return( CAPITAL_A<=ch && ch >= CAPITAL_Z);
|
||||
}
|
||||
private static boolean isSurrogate(int ch){
|
||||
return (((ch)&0xfffff800)==0xd800);
|
||||
}
|
||||
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
|
||||
throws ParseException{
|
||||
int srcLength = src.length();
|
||||
StringBuffer result = new StringBuffer();
|
||||
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
|
||||
destCPCount, firstSupplementaryIndex, cpLength;
|
||||
char b;
|
||||
int destCapacity = MAX_CP_COUNT;
|
||||
char[] dest = new char[destCapacity];
|
||||
|
||||
/*
|
||||
* Handle the basic code points:
|
||||
* Let basicLength be the number of input code points
|
||||
* before the last delimiter, or 0 if there is none,
|
||||
* then copy the first basicLength code points to the output.
|
||||
*
|
||||
* The two following loops iterate backward.
|
||||
*/
|
||||
for(j=srcLength; j>0;) {
|
||||
if(src.charAt(--j)==DELIMITER) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
destLength=basicLength=destCPCount=j;
|
||||
|
||||
while(j>0) {
|
||||
b=src.charAt(--j);
|
||||
if(!isBasic(b)) {
|
||||
throw new ParseException("Illegal char found", ParseException.INVALID_CHAR_FOUND);
|
||||
}
|
||||
|
||||
if(j<destCapacity) {
|
||||
dest[j]= b;
|
||||
|
||||
if(caseFlags!=null) {
|
||||
caseFlags[j]=isBasicUpperCase(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
i=0;
|
||||
bias=INITIAL_BIAS;
|
||||
firstSupplementaryIndex=1000000000;
|
||||
|
||||
/*
|
||||
* Main decoding loop:
|
||||
* Start just after the last delimiter if any
|
||||
* basic code points were copied; start at the beginning otherwise.
|
||||
*/
|
||||
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
|
||||
/*
|
||||
* in is the index of the next character to be consumed, and
|
||||
* destCPCount is the number of code points in the output array.
|
||||
*
|
||||
* Decode a generalized variable-length integer into delta,
|
||||
* which gets added to i. The overflow checking is easier
|
||||
* if we increase i as we go, then subtract off its starting
|
||||
* value at the end to obtain delta.
|
||||
*/
|
||||
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
|
||||
if(in>=srcLength) {
|
||||
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
digit=basicToDigit[(byte)src.charAt(in++)];
|
||||
if(digit<0) {
|
||||
throw new ParseException("Invalid char found", ParseException.INVALID_CHAR_FOUND);
|
||||
}
|
||||
if(digit>(0x7fffffff-i)/w) {
|
||||
/* integer overflow */
|
||||
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
i+=digit*w;
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
if(digit<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(w>0x7fffffff/(BASE-t)) {
|
||||
/* integer overflow */
|
||||
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
w*=BASE-t;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modification from sample code:
|
||||
* Increments destCPCount here,
|
||||
* where needed instead of in for() loop tail.
|
||||
*/
|
||||
++destCPCount;
|
||||
bias=adaptBias(i-oldi, destCPCount, (oldi==0));
|
||||
|
||||
/*
|
||||
* i was supposed to wrap around from (incremented) destCPCount to 0,
|
||||
* incrementing n each time, so we'll fix that now:
|
||||
*/
|
||||
if(i/destCPCount>(0x7fffffff-n)) {
|
||||
/* integer overflow */
|
||||
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
n+=i/destCPCount;
|
||||
i%=destCPCount;
|
||||
/* not needed for Punycode: */
|
||||
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
|
||||
|
||||
if(n>0x10ffff || isSurrogate(n)) {
|
||||
/* Unicode code point overflow */
|
||||
throw new ParseException("Illegal char found", ParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
/* Insert n at position i of the output: */
|
||||
cpLength=UTF16.getCharCount(n);
|
||||
if((destLength+cpLength)<destCapacity) {
|
||||
int codeUnitIndex;
|
||||
|
||||
/*
|
||||
* Handle indexes when supplementary code points are present.
|
||||
*
|
||||
* In almost all cases, there will be only BMP code points before i
|
||||
* and even in the entire string.
|
||||
* This is handled with the same efficiency as with UTF-32.
|
||||
*
|
||||
* Only the rare cases with supplementary code points are handled
|
||||
* more slowly - but not too bad since this is an insertion anyway.
|
||||
*/
|
||||
if(i<=firstSupplementaryIndex) {
|
||||
codeUnitIndex=i;
|
||||
if(cpLength>1) {
|
||||
firstSupplementaryIndex=codeUnitIndex;
|
||||
} else {
|
||||
++firstSupplementaryIndex;
|
||||
}
|
||||
} else {
|
||||
codeUnitIndex=firstSupplementaryIndex;
|
||||
codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
|
||||
}
|
||||
|
||||
/* use the UChar index codeUnitIndex instead of the code point index i */
|
||||
if(codeUnitIndex<destLength) {
|
||||
System.arraycopy(dest, codeUnitIndex,
|
||||
dest, codeUnitIndex+cpLength,
|
||||
(destLength-codeUnitIndex));
|
||||
if(caseFlags!=null) {
|
||||
System.arraycopy(caseFlags, codeUnitIndex,
|
||||
caseFlags, codeUnitIndex+cpLength,
|
||||
destLength-codeUnitIndex);
|
||||
}
|
||||
}
|
||||
if(cpLength==1) {
|
||||
/* BMP, insert one code unit */
|
||||
dest[codeUnitIndex]=(char)n;
|
||||
} else {
|
||||
/* supplementary character, insert two code units */
|
||||
dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
|
||||
dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
|
||||
}
|
||||
if(caseFlags!=null) {
|
||||
/* Case of last character determines uppercase flag: */
|
||||
caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
|
||||
if(cpLength==2) {
|
||||
caseFlags[codeUnitIndex+1]=false;
|
||||
}
|
||||
}
|
||||
}
|
||||
destLength+=cpLength;
|
||||
++i;
|
||||
}
|
||||
result.append(dest, 0, destLength);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
409
icu4j/src/com/ibm/icu/stringprep/StringPrep.java
Normal file
409
icu4j/src/com/ibm/icu/stringprep/StringPrep.java
Normal file
@ -0,0 +1,409 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/StringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:40:41 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.stringprep;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.ibm.icu.impl.CharTrie;
|
||||
import com.ibm.icu.impl.StringPrepDataReader;
|
||||
import com.ibm.icu.impl.Trie;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class StringPrep {
|
||||
/**
|
||||
* Option to prohibit processing of unassigned code points in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int NONE = 0x0000;
|
||||
|
||||
/**
|
||||
* Option to allow processing of unassigned code points in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int ALLOW_UNASSIGNED = 0x0001;
|
||||
|
||||
private static final int UNASSIGNED = 0x0000;
|
||||
private static final int MAP = 0x0001;
|
||||
private static final int PROHIBITED = 0x0002;
|
||||
private static final int LABEL_SEPARATOR = 0x0003;
|
||||
private static final int DELETE = 0x0004;
|
||||
private static final int TYPE_LIMIT = 0x0005;
|
||||
|
||||
private static final int NORMALIZATION_ON = 0x0001;
|
||||
private static final int CHECK_BIDI_ON = 0x0002;
|
||||
|
||||
private static final int TYPE_THRESHOLD = 0xFFF0;
|
||||
private static final int MAX_INDEX_VALUE = 0x3FBF; /*16139*/
|
||||
private static final int MAX_INDEX_TOP_LENGTH = 0x0003;
|
||||
|
||||
/* indexes[] value names */
|
||||
private static final int INDEX_TRIE_SIZE = 0; /* number of bytes in normalization trie */
|
||||
private static final int INDEX_MAPPING_DATA_SIZE = 1; /* The array that contains the mapping */
|
||||
private static final int NORM_CORRECTNS_LAST_UNI_VERSION = 2; /* The index of Unicode version of last entry in NormalizationCorrections.txt */
|
||||
private static final int ONE_UCHAR_MAPPING_INDEX_START = 3; /* The starting index of 1 UChar mapping index in the mapping data array */
|
||||
private static final int TWO_UCHARS_MAPPING_INDEX_START = 4; /* The starting index of 2 UChars mapping index in the mapping data array */
|
||||
private static final int THREE_UCHARS_MAPPING_INDEX_START = 5;
|
||||
private static final int FOUR_UCHARS_MAPPING_INDEX_START = 6;
|
||||
private static final int OPTIONS = 7; /* Bit set of options to turn on in the profile */
|
||||
private static final int INDEX_TOP = 16; /* changing this requires a new formatVersion */
|
||||
|
||||
|
||||
/**
|
||||
* Default buffer size of datafile
|
||||
*/
|
||||
private static final int DATA_BUFFER_SIZE = 25000;
|
||||
|
||||
/* Wrappers for Trie implementations */
|
||||
private static final class StringPrepTrieImpl implements Trie.DataManipulate{
|
||||
static CharTrie sprepTrie = null;
|
||||
/**
|
||||
* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
|
||||
* data the index array offset of the indexes for that lead surrogate.
|
||||
* @param property data value for a surrogate from the trie, including
|
||||
* the folding offset
|
||||
* @return data offset or 0 if there is no data for the lead surrogate
|
||||
*/
|
||||
public int getFoldingOffset(int value){
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
private static StringPrepTrieImpl sprepTrieImpl;
|
||||
private static int[] indexes;
|
||||
private static char[] mappingData;
|
||||
private static byte[] formatVersion;
|
||||
|
||||
private char getCodePointValue(int ch){
|
||||
return StringPrepTrieImpl.sprepTrie.getCodePointValue(ch);
|
||||
}
|
||||
|
||||
//protected
|
||||
private boolean doNFKC = false;
|
||||
private boolean checkBiDi = false;
|
||||
|
||||
private VersionInfo unicodeVersion;
|
||||
private VersionInfo normVersion;
|
||||
|
||||
|
||||
private static VersionInfo getVersionInfo(int comp){
|
||||
int micro = comp & 0xFF;
|
||||
int milli =(comp >> 8) & 0xFF;
|
||||
int minor =(comp >> 16) & 0xFF;
|
||||
int major =(comp >> 24) & 0xFF;
|
||||
return VersionInfo.getInstance(major,minor,milli,micro);
|
||||
}
|
||||
private static VersionInfo getVersionInfo(byte[] version){
|
||||
if(version.length != 4){
|
||||
return null;
|
||||
}
|
||||
return VersionInfo.getInstance((int)version[0],(int) version[1],(int) version[2],(int) version[3]);
|
||||
}
|
||||
|
||||
private StringPrep(InputStream inputStream) throws IOException{
|
||||
|
||||
BufferedInputStream b = new BufferedInputStream(inputStream,DATA_BUFFER_SIZE);
|
||||
|
||||
StringPrepDataReader reader = new StringPrepDataReader(b);
|
||||
|
||||
// read the indexes
|
||||
indexes = reader.readIndexes(INDEX_TOP);
|
||||
|
||||
byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
|
||||
|
||||
sprepTrieImpl = new StringPrepTrieImpl();
|
||||
//indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
|
||||
mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
|
||||
// load the rest of the data data and initialize the data members
|
||||
reader.read(sprepBytes,mappingData);
|
||||
|
||||
StringPrepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl );
|
||||
|
||||
// get the data format version
|
||||
formatVersion = reader.getDataFormatVersion();
|
||||
|
||||
// get the options
|
||||
doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
|
||||
checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
|
||||
unicodeVersion = getVersionInfo(reader.getUnicodeVersion());
|
||||
normVersion = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
|
||||
if(normVersion.compareTo(UCharacter.getUnicodeVersion())>0){
|
||||
throw new IOException("Normalization Correction version not supported");
|
||||
}
|
||||
b.close();
|
||||
}
|
||||
/**
|
||||
* Returns the StringPrep instance created after reading the input stream.
|
||||
* The object does not hold a reference to the input steam, so the stream can be
|
||||
* closed after the method returns.
|
||||
*
|
||||
* @param inputStream The stream for reading the StringPrep profile binary
|
||||
* @return StringPrep object created from the input stream
|
||||
* @throws IOException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final StringPrep getInstance(InputStream inputStream)
|
||||
throws IOException{
|
||||
|
||||
StringPrep prep = null;
|
||||
// load the file and create the object
|
||||
prep = new StringPrep(inputStream);
|
||||
|
||||
return prep;
|
||||
}
|
||||
|
||||
private class Values{
|
||||
boolean isIndex;
|
||||
int value;
|
||||
int type;
|
||||
}
|
||||
|
||||
private static final void getValues(char trieWord,Values values){
|
||||
|
||||
if(trieWord == 0){
|
||||
/*
|
||||
* Initial value stored in the mapping table
|
||||
* just return USPREP_TYPE_LIMIT .. so that
|
||||
* the source codepoint is copied to the destination
|
||||
*/
|
||||
values.type = TYPE_LIMIT;
|
||||
}else if(trieWord >= TYPE_THRESHOLD){
|
||||
values.type = (trieWord - TYPE_THRESHOLD);
|
||||
}else{
|
||||
/* get the type */
|
||||
values.type = MAP;
|
||||
/* ascertain if the value is index or delta */
|
||||
if((trieWord & 0x02)>0){
|
||||
values.isIndex = true;
|
||||
values.value = trieWord >> 2; //mask off the lower 2 bits and shift
|
||||
|
||||
}else{
|
||||
values.isIndex = false;
|
||||
values.value = ((int)(trieWord<<16))>>16;
|
||||
values.value = (values.value >> 2);
|
||||
|
||||
}
|
||||
|
||||
if((trieWord>>2) == MAX_INDEX_VALUE){
|
||||
values.type = DELETE;
|
||||
values.isIndex = false;
|
||||
values.value = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private StringBuffer map( UCharacterIterator iter, int options)
|
||||
throws ParseException{
|
||||
|
||||
Values val = new Values();
|
||||
char result = 0;
|
||||
int ch = UCharacterIterator.DONE;
|
||||
StringBuffer dest = new StringBuffer();
|
||||
boolean allowUnassigned = (boolean) ((options & ALLOW_UNASSIGNED)>0);
|
||||
|
||||
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
|
||||
|
||||
result = getCodePointValue(ch);
|
||||
getValues(result,val);
|
||||
|
||||
// check if the source codepoint is unassigned
|
||||
if(val.type == UNASSIGNED && allowUnassigned == false){
|
||||
throw new ParseException("An unassigned code point was found in the input",
|
||||
ParseException.UNASSIGNED_ERROR,
|
||||
iter.getText(),iter.getIndex());
|
||||
}else if((val.type == MAP)){
|
||||
int index, length;
|
||||
|
||||
if(val.isIndex){
|
||||
index = val.value;
|
||||
if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
|
||||
index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 1;
|
||||
}else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 2;
|
||||
}else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 3;
|
||||
}else{
|
||||
length = mappingData[index++];
|
||||
}
|
||||
/* copy mapping to destination */
|
||||
dest.append(mappingData,index,length);
|
||||
continue;
|
||||
|
||||
}else{
|
||||
ch -= val.value;
|
||||
}
|
||||
}else if(val.type == DELETE){
|
||||
// just consume the codepoint and contine
|
||||
continue;
|
||||
}
|
||||
//copy the source into destination
|
||||
UTF16.append(dest,ch);
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
private StringBuffer normalize(StringBuffer src){
|
||||
return new StringBuffer(Normalizer.normalize(src.toString(),Normalizer.NFKC,Normalizer.UNICODE_3_2));
|
||||
}
|
||||
|
||||
protected boolean isLabelSeparator(int ch){
|
||||
int result = getCodePointValue(ch);
|
||||
if( (result & 0x07) == LABEL_SEPARATOR){
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
1) Map -- For each character in the input, check if it has a mapping
|
||||
and, if so, replace it with its mapping.
|
||||
|
||||
2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
normalization.
|
||||
|
||||
3) Prohibit -- Check for any characters that are not allowed in the
|
||||
output. If any are found, return an error.
|
||||
|
||||
4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
any are found, make sure that the whole string satisfies the
|
||||
requirements for bidirectional strings. If the string does not
|
||||
satisfy the requirements for bidirectional strings, return an
|
||||
error.
|
||||
[Unicode3.2] defines several bidirectional categories; each character
|
||||
has one bidirectional category assigned to it. For the purposes of
|
||||
the requirements below, an "RandALCat character" is a character that
|
||||
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
|
||||
is a character that has Unicode bidirectional category "L". Note
|
||||
|
||||
|
||||
that there are many characters which fall in neither of the above
|
||||
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
|
||||
this because they have bidirectional category "EN".
|
||||
|
||||
In any profile that specifies bidirectional character handling, all
|
||||
three of the following requirements MUST be met:
|
||||
|
||||
1) The characters in section 5.8 MUST be prohibited.
|
||||
|
||||
2) If a string contains any RandALCat character, the string MUST NOT
|
||||
contain any LCat character.
|
||||
|
||||
3) If a string contains any RandALCat character, a RandALCat
|
||||
character MUST be the first character of the string, and a
|
||||
RandALCat character MUST be the last character of the string.
|
||||
*/
|
||||
/**
|
||||
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
|
||||
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
||||
* depending on the options specified in the profile.
|
||||
*
|
||||
* @param src A UCharacterIterator object containing the source string
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - StringPrep.NONE Prohibit processing of unassigned code points in the input
|
||||
*
|
||||
* - StringPrep.ALLOW_UNASSIGNED Treat the unassigned code points are in the input
|
||||
* as normal Unicode code points.
|
||||
*
|
||||
* @return StringBuffer A StringBuffer containing the output
|
||||
* @throws ParseException
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public StringBuffer prepare(UCharacterIterator src, int options)
|
||||
throws ParseException{
|
||||
|
||||
// map
|
||||
StringBuffer mapOut = map(src,options);
|
||||
StringBuffer normOut = mapOut;// initialize
|
||||
|
||||
if(doNFKC){
|
||||
// normalize
|
||||
normOut = normalize(mapOut);
|
||||
}
|
||||
|
||||
int ch;
|
||||
char result;
|
||||
UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
|
||||
Values val = new Values();
|
||||
int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
|
||||
firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
|
||||
int rtlPos=-1, ltrPos=-1;
|
||||
boolean rightToLeft=false, leftToRight=false;
|
||||
|
||||
while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
|
||||
result = getCodePointValue(ch);
|
||||
getValues(result,val);
|
||||
|
||||
if(val.type == PROHIBITED ){
|
||||
throw new ParseException("A prohibited code point was found in the input",
|
||||
ParseException.PROHIBITED_ERROR,iter.getText(),val.value);
|
||||
}
|
||||
|
||||
direction = UCharacter.getDirection(ch);
|
||||
if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
if(direction == UCharacterDirection.LEFT_TO_RIGHT){
|
||||
leftToRight = true;
|
||||
ltrPos = iter.getIndex()-1;
|
||||
}
|
||||
if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
|
||||
rightToLeft = true;
|
||||
rtlPos = iter.getIndex()-1;
|
||||
}
|
||||
}
|
||||
if(checkBiDi == true){
|
||||
// satisfy 2
|
||||
if( leftToRight == true && rightToLeft == true){
|
||||
throw new ParseException("The input does not conform to the rules for BiDi code points.",
|
||||
ParseException.CHECK_BIDI_ERROR,iter.getText(),
|
||||
(rtlPos>ltrPos) ? rtlPos : ltrPos);
|
||||
}
|
||||
|
||||
//satisfy 3
|
||||
if( rightToLeft == true &&
|
||||
!((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
|
||||
(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
|
||||
){
|
||||
throw new ParseException("The input does not conform to the rules for BiDi code points.",
|
||||
ParseException.CHECK_BIDI_ERROR,iter.getText(),
|
||||
(rtlPos>ltrPos) ? rtlPos : ltrPos);
|
||||
}
|
||||
}
|
||||
return normOut;
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user