ICU-3050 remove label separators from the data file

X-SVN-Rev: 13455
This commit is contained in:
Ram Viswanadha 2003-10-17 19:26:35 +00:00
parent 81b3a18536
commit 9bf949f010
8 changed files with 38 additions and 108 deletions

View File

@ -34,9 +34,8 @@ enum UStringPrepType{
USPREP_UNASSIGNED = 0x0000 ,
USPREP_MAP = 0x0001 ,
USPREP_PROHIBITED = 0x0002 ,
USPREP_LABEL_SEPARATOR = 0x0003 ,
USPREP_DELETE = 0x0004 ,
USPREP_TYPE_LIMIT = 0x0005
USPREP_DELETE = 0x0003 ,
USPREP_TYPE_LIMIT = 0x0004
};
typedef enum UStringPrepType UStringPrepType;
@ -46,7 +45,6 @@ static const char* usprepTypeNames[] ={
"UNASSIGNED" ,
"MAP" ,
"PROHIBITED" ,
"LABEL_SEPARATOR" ,
"DELETE",
"TYPE_LIMIT"
};
@ -151,13 +149,6 @@ usprep_normalize( UStringPrepProfile* prep,
UChar* dest, int32_t destCapacity,
UErrorCode* status );
U_CFUNC UBool
usprep_isLabelSeparator(UStringPrepProfile* profile,
UChar32 ch, UErrorCode* status);
/**
* Swap StringPrep .spp profile data. See udataswp.h.
* @internal

View File

@ -120,21 +120,25 @@ compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
return lengthResult;
}
static inline UBool
isLDHChar(UChar32 ch){
// high runner case
if(ch>0x007A){
return FALSE;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
/**
* Ascertain if the given code point is a label separator as
* defined by the IDNA RFC
*
* @param ch The code point to be ascertained
* @return true if the char is a label separator
* @draft ICU 2.8
*/
static inline UBool isLabelSeparator(UChar ch){
switch(ch){
case 0x002e:
case 0x3002:
case 0xFF0E:
case 0xFF61:
return TRUE;
}
default:
return FALSE;
}
}
// returns the length of the label excluding the separator
@ -153,7 +157,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
*done = TRUE;
return i;
}
if(usprep_isLabelSeparator(nameprep, src[i], status)){
if(isLabelSeparator(src[i])){
*limit = src + (i+1); // go past the delimiter
return i;
@ -162,7 +166,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
}else{
int32_t i;
for(i=0;i<srcLength;i++){
if(usprep_isLabelSeparator(nameprep, src[i], status)){
if(isLabelSeparator(src[i])){
*limit = src + (i+1); // go past the delimiter
return i;
}
@ -175,6 +179,21 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
return i;
}
}
static inline UBool isLDHChar(UChar ch){
// high runner case
if(ch>0x007A){
return FALSE;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return TRUE;
}
return FALSE;
}
static int32_t
_internal_toASCII(const UChar* src, int32_t srcLength,

View File

@ -786,35 +786,6 @@ CLEANUP:
}
U_CFUNC UBool
usprep_isLabelSeparator(UStringPrepProfile* profile,
UChar32 ch, UErrorCode* status){
// check error status
if(status==NULL || U_FAILURE(*status)){
return FALSE;
}
//check the arguments
if(profile==NULL){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
uint16_t result;
UStringPrepType type;
int16_t value;
UBool isIndex;
UTRIE_GET16(&profile->sprepTrie,ch, result);
type = getValues(result,value,isIndex);
if( type == USPREP_LABEL_SEPARATOR){
return TRUE;
}
return FALSE;
}
/* data swapping ------------------------------------------------------------ */
U_CAPI int32_t U_EXPORT2

View File

@ -1942,11 +1942,3 @@ E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
# code points for LDH chars
002E; ; LABEL_SEPARATOR
3002; ; LABEL_SEPARATOR
FF0E; ; LABEL_SEPARATOR
FF61; ; LABEL_SEPARATOR
# Total code points 4

View File

@ -77,13 +77,6 @@ strprepProfileLineFn(void *context,
/* compare the mapping */
compareMapping(data, code,mapping, length,USPREP_MAP);
}else if(strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* compare the range */
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}

View File

@ -180,12 +180,6 @@ strprepProfileLineFn(void *context,
/* store the mapping */
compareMapping(code,mapping, length,USPREP_MAP);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}
@ -298,9 +292,6 @@ testAllCodepoints(TestIDNA& test){
if(type == USPREP_MAP){
mapped++;
}
if(type == USPREP_LABEL_SEPARATOR){
mappedWithNorm++;
}
}else{
noValueInTrie++;
if(result > 0){

View File

@ -121,9 +121,6 @@ sub main(){
createProhibitedTable($inFH,$outfile,$line);
}
}
if( defined $writeLDHChars){
createLDHCharTable($inFH, $outfile);
}
if( defined $writeISCSIChars){
create_iSCSIExtraProhibitedTable($inFH, $outfile);
}
@ -238,21 +235,7 @@ sub createProhibitedTable{
readPrint($inFH,$outFH,$comment, "C");
close($outFH);
}
#-----------------------------------------------------------------------
sub createLDHCharTable{
($inFH,$outfile,$line) = @_;
$comment ="# code points for LDH chars \n";
$outFH = IO::File->new($outfile, "a")
or die "could not open the file $outfile for writing: $! \n";
print $outFH $comment;
print $outFH "002E; ; LABEL_SEPARATOR\n";
print $outFH "3002; ; LABEL_SEPARATOR\n";
print $outFH "FF0E; ; LABEL_SEPARATOR\n";
print $outFH "FF61; ; LABEL_SEPARATOR\n";
print $outFH "\n# Total code points 4\n";
close($outFH);
}
#-----------------------------------------------------------------------
sub create_iSCSIExtraProhibitedTable{
($inFH,$outfile,$line) = @_;
@ -295,12 +278,11 @@ Options:
--C7 Generate data for table C.7
--C8 Generate data for table C.8
--C9 Generate data for table C.9
--ldh-chars Generate data for LDH chars used in IDNA
--iscsi Generate data for extra prohibited iSCSI chars
Note, --B2 and --B3 are mutually exclusive.
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C21 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --ldh-chars
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9
filterRFC3454.pl filters the RFC file and creates String prep table files.
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt

View File

@ -381,15 +381,6 @@ strprepProfileLineFn(void *context,
/* store the mapping */
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
if(U_FAILURE(*pErrorCode)){
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
return;
}
/* store the range */
storeRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR, pErrorCode);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}