ICU-3050 remove label separators from the data file
X-SVN-Rev: 13455
This commit is contained in:
parent
81b3a18536
commit
9bf949f010
@ -34,9 +34,8 @@ enum UStringPrepType{
|
|||||||
USPREP_UNASSIGNED = 0x0000 ,
|
USPREP_UNASSIGNED = 0x0000 ,
|
||||||
USPREP_MAP = 0x0001 ,
|
USPREP_MAP = 0x0001 ,
|
||||||
USPREP_PROHIBITED = 0x0002 ,
|
USPREP_PROHIBITED = 0x0002 ,
|
||||||
USPREP_LABEL_SEPARATOR = 0x0003 ,
|
USPREP_DELETE = 0x0003 ,
|
||||||
USPREP_DELETE = 0x0004 ,
|
USPREP_TYPE_LIMIT = 0x0004
|
||||||
USPREP_TYPE_LIMIT = 0x0005
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum UStringPrepType UStringPrepType;
|
typedef enum UStringPrepType UStringPrepType;
|
||||||
@ -46,7 +45,6 @@ static const char* usprepTypeNames[] ={
|
|||||||
"UNASSIGNED" ,
|
"UNASSIGNED" ,
|
||||||
"MAP" ,
|
"MAP" ,
|
||||||
"PROHIBITED" ,
|
"PROHIBITED" ,
|
||||||
"LABEL_SEPARATOR" ,
|
|
||||||
"DELETE",
|
"DELETE",
|
||||||
"TYPE_LIMIT"
|
"TYPE_LIMIT"
|
||||||
};
|
};
|
||||||
@ -151,13 +149,6 @@ usprep_normalize( UStringPrepProfile* prep,
|
|||||||
UChar* dest, int32_t destCapacity,
|
UChar* dest, int32_t destCapacity,
|
||||||
UErrorCode* status );
|
UErrorCode* status );
|
||||||
|
|
||||||
|
|
||||||
U_CFUNC UBool
|
|
||||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
|
||||||
UChar32 ch, UErrorCode* status);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Swap StringPrep .spp profile data. See udataswp.h.
|
* Swap StringPrep .spp profile data. See udataswp.h.
|
||||||
* @internal
|
* @internal
|
||||||
|
@ -120,21 +120,25 @@ compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
|
|||||||
return lengthResult;
|
return lengthResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline UBool
|
|
||||||
isLDHChar(UChar32 ch){
|
/**
|
||||||
// high runner case
|
* Ascertain if the given code point is a label separator as
|
||||||
if(ch>0x007A){
|
* defined by the IDNA RFC
|
||||||
return FALSE;
|
*
|
||||||
|
* @param ch The code point to be ascertained
|
||||||
|
* @return true if the char is a label separator
|
||||||
|
* @draft ICU 2.8
|
||||||
|
*/
|
||||||
|
static inline UBool isLabelSeparator(UChar ch){
|
||||||
|
switch(ch){
|
||||||
|
case 0x002e:
|
||||||
|
case 0x3002:
|
||||||
|
case 0xFF0E:
|
||||||
|
case 0xFF61:
|
||||||
|
return TRUE;
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
|
||||||
if( (ch==0x002D) ||
|
|
||||||
(0x0030 <= ch && ch <= 0x0039) ||
|
|
||||||
(0x0041 <= ch && ch <= 0x005A) ||
|
|
||||||
(0x0061 <= ch && ch <= 0x007A)
|
|
||||||
){
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
return FALSE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the length of the label excluding the separator
|
// returns the length of the label excluding the separator
|
||||||
@ -153,7 +157,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
|||||||
*done = TRUE;
|
*done = TRUE;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
if(isLabelSeparator(src[i])){
|
||||||
*limit = src + (i+1); // go past the delimiter
|
*limit = src + (i+1); // go past the delimiter
|
||||||
return i;
|
return i;
|
||||||
|
|
||||||
@ -162,7 +166,7 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
|||||||
}else{
|
}else{
|
||||||
int32_t i;
|
int32_t i;
|
||||||
for(i=0;i<srcLength;i++){
|
for(i=0;i<srcLength;i++){
|
||||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
if(isLabelSeparator(src[i])){
|
||||||
*limit = src + (i+1); // go past the delimiter
|
*limit = src + (i+1); // go past the delimiter
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
@ -175,6 +179,21 @@ getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
static inline UBool isLDHChar(UChar ch){
|
||||||
|
// high runner case
|
||||||
|
if(ch>0x007A){
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||||
|
if( (ch==0x002D) ||
|
||||||
|
(0x0030 <= ch && ch <= 0x0039) ||
|
||||||
|
(0x0041 <= ch && ch <= 0x005A) ||
|
||||||
|
(0x0061 <= ch && ch <= 0x007A)
|
||||||
|
){
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t
|
static int32_t
|
||||||
_internal_toASCII(const UChar* src, int32_t srcLength,
|
_internal_toASCII(const UChar* src, int32_t srcLength,
|
||||||
|
@ -786,35 +786,6 @@ CLEANUP:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
U_CFUNC UBool
|
|
||||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
|
||||||
UChar32 ch, UErrorCode* status){
|
|
||||||
// check error status
|
|
||||||
if(status==NULL || U_FAILURE(*status)){
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
//check the arguments
|
|
||||||
if(profile==NULL){
|
|
||||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t result;
|
|
||||||
UStringPrepType type;
|
|
||||||
int16_t value;
|
|
||||||
UBool isIndex;
|
|
||||||
|
|
||||||
UTRIE_GET16(&profile->sprepTrie,ch, result);
|
|
||||||
|
|
||||||
type = getValues(result,value,isIndex);
|
|
||||||
|
|
||||||
if( type == USPREP_LABEL_SEPARATOR){
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* data swapping ------------------------------------------------------------ */
|
/* data swapping ------------------------------------------------------------ */
|
||||||
|
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
|
@ -1942,11 +1942,3 @@ E0001; ; PROHIBITED
|
|||||||
E0020..E007F; ; PROHIBITED
|
E0020..E007F; ; PROHIBITED
|
||||||
|
|
||||||
# Total code points 82
|
# Total code points 82
|
||||||
|
|
||||||
# code points for LDH chars
|
|
||||||
002E; ; LABEL_SEPARATOR
|
|
||||||
3002; ; LABEL_SEPARATOR
|
|
||||||
FF0E; ; LABEL_SEPARATOR
|
|
||||||
FF61; ; LABEL_SEPARATOR
|
|
||||||
|
|
||||||
# Total code points 4
|
|
||||||
|
@ -77,13 +77,6 @@ strprepProfileLineFn(void *context,
|
|||||||
|
|
||||||
/* compare the mapping */
|
/* compare the mapping */
|
||||||
compareMapping(data, code,mapping, length,USPREP_MAP);
|
compareMapping(data, code,mapping, length,USPREP_MAP);
|
||||||
|
|
||||||
}else if(strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
|
||||||
|
|
||||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
|
||||||
|
|
||||||
/* compare the range */
|
|
||||||
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
|
||||||
}else{
|
}else{
|
||||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -180,12 +180,6 @@ strprepProfileLineFn(void *context,
|
|||||||
/* store the mapping */
|
/* store the mapping */
|
||||||
compareMapping(code,mapping, length,USPREP_MAP);
|
compareMapping(code,mapping, length,USPREP_MAP);
|
||||||
|
|
||||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
|
||||||
|
|
||||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
|
||||||
|
|
||||||
/* store the range */
|
|
||||||
compareFlagsForRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
|
||||||
}else{
|
}else{
|
||||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||||
}
|
}
|
||||||
@ -298,9 +292,6 @@ testAllCodepoints(TestIDNA& test){
|
|||||||
if(type == USPREP_MAP){
|
if(type == USPREP_MAP){
|
||||||
mapped++;
|
mapped++;
|
||||||
}
|
}
|
||||||
if(type == USPREP_LABEL_SEPARATOR){
|
|
||||||
mappedWithNorm++;
|
|
||||||
}
|
|
||||||
}else{
|
}else{
|
||||||
noValueInTrie++;
|
noValueInTrie++;
|
||||||
if(result > 0){
|
if(result > 0){
|
||||||
|
@ -121,9 +121,6 @@ sub main(){
|
|||||||
createProhibitedTable($inFH,$outfile,$line);
|
createProhibitedTable($inFH,$outfile,$line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( defined $writeLDHChars){
|
|
||||||
createLDHCharTable($inFH, $outfile);
|
|
||||||
}
|
|
||||||
if( defined $writeISCSIChars){
|
if( defined $writeISCSIChars){
|
||||||
create_iSCSIExtraProhibitedTable($inFH, $outfile);
|
create_iSCSIExtraProhibitedTable($inFH, $outfile);
|
||||||
}
|
}
|
||||||
@ -238,21 +235,7 @@ sub createProhibitedTable{
|
|||||||
readPrint($inFH,$outFH,$comment, "C");
|
readPrint($inFH,$outFH,$comment, "C");
|
||||||
close($outFH);
|
close($outFH);
|
||||||
}
|
}
|
||||||
#-----------------------------------------------------------------------
|
|
||||||
sub createLDHCharTable{
|
|
||||||
($inFH,$outfile,$line) = @_;
|
|
||||||
$comment ="# code points for LDH chars \n";
|
|
||||||
|
|
||||||
$outFH = IO::File->new($outfile, "a")
|
|
||||||
or die "could not open the file $outfile for writing: $! \n";
|
|
||||||
print $outFH $comment;
|
|
||||||
print $outFH "002E; ; LABEL_SEPARATOR\n";
|
|
||||||
print $outFH "3002; ; LABEL_SEPARATOR\n";
|
|
||||||
print $outFH "FF0E; ; LABEL_SEPARATOR\n";
|
|
||||||
print $outFH "FF61; ; LABEL_SEPARATOR\n";
|
|
||||||
print $outFH "\n# Total code points 4\n";
|
|
||||||
close($outFH);
|
|
||||||
}
|
|
||||||
#-----------------------------------------------------------------------
|
#-----------------------------------------------------------------------
|
||||||
sub create_iSCSIExtraProhibitedTable{
|
sub create_iSCSIExtraProhibitedTable{
|
||||||
($inFH,$outfile,$line) = @_;
|
($inFH,$outfile,$line) = @_;
|
||||||
@ -295,12 +278,11 @@ Options:
|
|||||||
--C7 Generate data for table C.7
|
--C7 Generate data for table C.7
|
||||||
--C8 Generate data for table C.8
|
--C8 Generate data for table C.8
|
||||||
--C9 Generate data for table C.9
|
--C9 Generate data for table C.9
|
||||||
--ldh-chars Generate data for LDH chars used in IDNA
|
|
||||||
--iscsi Generate data for extra prohibited iSCSI chars
|
--iscsi Generate data for extra prohibited iSCSI chars
|
||||||
|
|
||||||
Note, --B2 and --B3 are mutually exclusive.
|
Note, --B2 and --B3 are mutually exclusive.
|
||||||
|
|
||||||
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C21 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --ldh-chars
|
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9
|
||||||
|
|
||||||
filterRFC3454.pl filters the RFC file and creates String prep table files.
|
filterRFC3454.pl filters the RFC file and creates String prep table files.
|
||||||
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
|
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
|
||||||
|
@ -381,15 +381,6 @@ strprepProfileLineFn(void *context,
|
|||||||
/* store the mapping */
|
/* store the mapping */
|
||||||
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
|
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
|
||||||
|
|
||||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
|
||||||
|
|
||||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode)){
|
|
||||||
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* store the range */
|
|
||||||
storeRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR, pErrorCode);
|
|
||||||
}else{
|
}else{
|
||||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user