ICU-3050 remove label separators from the data file
X-SVN-Rev: 13455
This commit is contained in:
parent
81b3a18536
commit
9bf949f010
@ -34,9 +34,8 @@ enum UStringPrepType{
|
||||
USPREP_UNASSIGNED = 0x0000 ,
|
||||
USPREP_MAP = 0x0001 ,
|
||||
USPREP_PROHIBITED = 0x0002 ,
|
||||
USPREP_LABEL_SEPARATOR = 0x0003 ,
|
||||
USPREP_DELETE = 0x0004 ,
|
||||
USPREP_TYPE_LIMIT = 0x0005
|
||||
USPREP_DELETE = 0x0003 ,
|
||||
USPREP_TYPE_LIMIT = 0x0004
|
||||
};
|
||||
|
||||
typedef enum UStringPrepType UStringPrepType;
|
||||
@ -46,7 +45,6 @@ static const char* usprepTypeNames[] ={
|
||||
"UNASSIGNED" ,
|
||||
"MAP" ,
|
||||
"PROHIBITED" ,
|
||||
"LABEL_SEPARATOR" ,
|
||||
"DELETE",
|
||||
"TYPE_LIMIT"
|
||||
};
|
||||
@ -151,13 +149,6 @@ usprep_normalize( UStringPrepProfile* prep,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode* status );
|
||||
|
||||
|
||||
U_CFUNC UBool
|
||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
||||
UChar32 ch, UErrorCode* status);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Swap StringPrep .spp profile data. See udataswp.h.
|
||||
* @internal
|
||||
|
@ -120,21 +120,25 @@ compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
|
||||
return lengthResult;
|
||||
}
|
||||
|
||||
static inline UBool
|
||||
isLDHChar(UChar32 ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return FALSE;
|
||||
|
||||
/**
|
||||
* Ascertain if the given code point is a label separator as
|
||||
* defined by the IDNA RFC
|
||||
*
|
||||
* @param ch The code point to be ascertained
|
||||
* @return true if the char is a label separator
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
static inline UBool isLabelSeparator(UChar ch){
|
||||
switch(ch){
|
||||
case 0x002e:
|
||||
case 0x3002:
|
||||
case 0xFF0E:
|
||||
case 0xFF61:
|
||||
return TRUE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// returns the length of the label excluding the separator
|
||||
@ -153,7 +157,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
||||
*done = TRUE;
|
||||
return i;
|
||||
}
|
||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
||||
if(isLabelSeparator(src[i])){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
|
||||
@ -162,7 +166,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
||||
}else{
|
||||
int32_t i;
|
||||
for(i=0;i<srcLength;i++){
|
||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
||||
if(isLabelSeparator(src[i])){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
}
|
||||
@ -175,6 +179,21 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
||||
return i;
|
||||
}
|
||||
}
|
||||
static inline UBool isLDHChar(UChar ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return FALSE;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_internal_toASCII(const UChar* src, int32_t srcLength,
|
||||
|
@ -786,35 +786,6 @@ CLEANUP:
|
||||
}
|
||||
|
||||
|
||||
U_CFUNC UBool
|
||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
||||
UChar32 ch, UErrorCode* status){
|
||||
// check error status
|
||||
if(status==NULL || U_FAILURE(*status)){
|
||||
return FALSE;
|
||||
}
|
||||
//check the arguments
|
||||
if(profile==NULL){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
uint16_t result;
|
||||
UStringPrepType type;
|
||||
int16_t value;
|
||||
UBool isIndex;
|
||||
|
||||
UTRIE_GET16(&profile->sprepTrie,ch, result);
|
||||
|
||||
type = getValues(result,value,isIndex);
|
||||
|
||||
if( type == USPREP_LABEL_SEPARATOR){
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* data swapping ------------------------------------------------------------ */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -1942,11 +1942,3 @@ E0001; ; PROHIBITED
|
||||
E0020..E007F; ; PROHIBITED
|
||||
|
||||
# Total code points 82
|
||||
|
||||
# code points for LDH chars
|
||||
002E; ; LABEL_SEPARATOR
|
||||
3002; ; LABEL_SEPARATOR
|
||||
FF0E; ; LABEL_SEPARATOR
|
||||
FF61; ; LABEL_SEPARATOR
|
||||
|
||||
# Total code points 4
|
||||
|
@ -77,13 +77,6 @@ strprepProfileLineFn(void *context,
|
||||
|
||||
/* compare the mapping */
|
||||
compareMapping(data, code,mapping, length,USPREP_MAP);
|
||||
|
||||
}else if(strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* compare the range */
|
||||
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
@ -180,12 +180,6 @@ strprepProfileLineFn(void *context,
|
||||
/* store the mapping */
|
||||
compareMapping(code,mapping, length,USPREP_MAP);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
@ -298,9 +292,6 @@ testAllCodepoints(TestIDNA& test){
|
||||
if(type == USPREP_MAP){
|
||||
mapped++;
|
||||
}
|
||||
if(type == USPREP_LABEL_SEPARATOR){
|
||||
mappedWithNorm++;
|
||||
}
|
||||
}else{
|
||||
noValueInTrie++;
|
||||
if(result > 0){
|
||||
|
@ -121,9 +121,6 @@ sub main(){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
}
|
||||
if( defined $writeLDHChars){
|
||||
createLDHCharTable($inFH, $outfile);
|
||||
}
|
||||
if( defined $writeISCSIChars){
|
||||
create_iSCSIExtraProhibitedTable($inFH, $outfile);
|
||||
}
|
||||
@ -238,21 +235,7 @@ sub createProhibitedTable{
|
||||
readPrint($inFH,$outFH,$comment, "C");
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createLDHCharTable{
|
||||
($inFH,$outfile,$line) = @_;
|
||||
$comment ="# code points for LDH chars \n";
|
||||
|
||||
$outFH = IO::File->new($outfile, "a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
print $outFH $comment;
|
||||
print $outFH "002E; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "3002; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "FF0E; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "FF61; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "\n# Total code points 4\n";
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub create_iSCSIExtraProhibitedTable{
|
||||
($inFH,$outfile,$line) = @_;
|
||||
@ -295,12 +278,11 @@ Options:
|
||||
--C7 Generate data for table C.7
|
||||
--C8 Generate data for table C.8
|
||||
--C9 Generate data for table C.9
|
||||
--ldh-chars Generate data for LDH chars used in IDNA
|
||||
--iscsi Generate data for extra prohibited iSCSI chars
|
||||
|
||||
Note, --B2 and --B3 are mutually exclusive.
|
||||
|
||||
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C21 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --ldh-chars
|
||||
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9
|
||||
|
||||
filterRFC3454.pl filters the RFC file and creates String prep table files.
|
||||
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
|
||||
|
@ -381,15 +381,6 @@ strprepProfileLineFn(void *context,
|
||||
/* store the mapping */
|
||||
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
|
||||
return;
|
||||
}
|
||||
/* store the range */
|
||||
storeRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR, pErrorCode);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user