ICU-2427 more code points with default bidi classes - parse DerivedBidiClass.txt for them
X-SVN-Rev: 11448
This commit is contained in:
parent
cf5d222f79
commit
99fbf11da7
@ -682,7 +682,7 @@ U_CAPI UCharDirection U_EXPORT2
|
||||
u_charDirection(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UCharDirection)((props>>UPROPS_BIDI_SHIFT)&0x1f);
|
||||
return (UCharDirection)GET_BIDI_CLASS(props);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -64,6 +64,7 @@ enum {
|
||||
|
||||
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
|
||||
#define GET_CATEGORY(props) ((props)&0x1f)
|
||||
#define GET_BIDI_CLASS(props) ((props>>UPROPS_BIDI_SHIFT)&0x1f)
|
||||
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
|
||||
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
|
||||
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)
|
||||
|
1241
icu4c/source/data/unidata/DerivedBidiClass.txt
Normal file
1241
icu4c/source/data/unidata/DerivedBidiClass.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -925,11 +925,14 @@ enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t
|
||||
{ 0x0590, U_LEFT_TO_RIGHT },
|
||||
{ 0x0600, U_RIGHT_TO_LEFT },
|
||||
{ 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x0900, U_RIGHT_TO_LEFT },
|
||||
{ 0xFB1D, U_LEFT_TO_RIGHT },
|
||||
{ 0xFB50, U_RIGHT_TO_LEFT },
|
||||
{ 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xFE70, U_LEFT_TO_RIGHT },
|
||||
{ 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x10800, U_LEFT_TO_RIGHT },
|
||||
{ 0x11000, U_RIGHT_TO_LEFT },
|
||||
{ 0x110000, U_LEFT_TO_RIGHT }
|
||||
};
|
||||
|
||||
@ -982,6 +985,8 @@ enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t
|
||||
* Verify default Bidi classes.
|
||||
* See table 3-7 "Bidirectional Character Types" in UAX #9.
|
||||
* http://www.unicode.org/reports/tr9/
|
||||
*
|
||||
* See also DerivedBidiClass.txt for Cn code points!
|
||||
*/
|
||||
if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
|
||||
/* enumerate the intersections of defaultBidi ranges with [start..limit[ */
|
||||
@ -2215,12 +2220,26 @@ TestAdditionalProperties() {
|
||||
/* enum/integer type properties */
|
||||
|
||||
/* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
|
||||
#if 0
|
||||
/* ### TODO test default Bidi classes for unassigned code points */
|
||||
{ 0x, UCHAR_BIDI_CLASS, },
|
||||
{ 0x, UCHAR_BIDI_CLASS, },
|
||||
{ 0x, UCHAR_BIDI_CLASS, },
|
||||
#endif
|
||||
/* test default Bidi classes for unassigned code points */
|
||||
{ 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x05a2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
{ 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
|
||||
|
||||
{ 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
|
||||
|
||||
{ 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
|
||||
{ 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
|
||||
|
@ -102,6 +102,11 @@ numericLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void U_CALLCONV
|
||||
bidiClassLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* parse files with single enumerated properties ---------------------------- */
|
||||
|
||||
struct SingleEnum {
|
||||
@ -396,6 +401,9 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
/* add Han numeric types & values */
|
||||
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode);
|
||||
|
||||
/* set proper bidi class for unassigned code points (Cn) */
|
||||
parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, pErrorCode);
|
||||
|
||||
parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
|
||||
|
||||
/*
|
||||
@ -599,8 +607,19 @@ numericLineFn(void *context,
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(GET_UNSIGNED_VALUE(oldProps32)!=0) {
|
||||
/* the code below is not prepared to maintain values and exceptions */
|
||||
/*
|
||||
* Do not set a numeric value for code points that have other
|
||||
* values or exceptions because the code below is not prepared
|
||||
* to maintain such values and exceptions.
|
||||
*
|
||||
* Check store.c (e.g., file format description and makeProps())
|
||||
* for details of what code points get their value field interpreted.
|
||||
* For example, case mappings for Ll/Lt/Lu and mirror mappings for mirrored characters.
|
||||
*
|
||||
* For simplicity, and because we only expect to set numeric values for Han characters,
|
||||
* for now we only allow to set these values for Lo characters.
|
||||
*/
|
||||
if(GET_UNSIGNED_VALUE(oldProps32)!=0 || PROPS_VALUE_IS_EXCEPTION(oldProps32) || GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) {
|
||||
fprintf(stderr, "genprops error: new numeric value for a character with some other value in DerivedNumericValues.txt at %s\n", fields[0][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
@ -618,7 +637,7 @@ numericLineFn(void *context,
|
||||
uprv_memset(&newProps, 0, sizeof(newProps));
|
||||
newProps.code=start;
|
||||
newProps.generalCategory=(uint8_t)GET_CATEGORY(oldProps32);
|
||||
newProps.bidi=(uint8_t)((oldProps32>>UPROPS_BIDI_SHIFT)&0x1f);
|
||||
newProps.bidi=(uint8_t)GET_BIDI_CLASS(oldProps32);
|
||||
newProps.isMirrored=(uint8_t)(oldProps32&(1UL<<UPROPS_MIRROR_SHIFT) ? TRUE : FALSE);
|
||||
newProps.numericType=(uint8_t)type; /* newly parsed numeric type */
|
||||
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
|
||||
@ -626,6 +645,62 @@ numericLineFn(void *context,
|
||||
}
|
||||
}
|
||||
|
||||
/* DerivedBidiClass.txt ----------------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
bidiClassLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *s;
|
||||
uint32_t oldStart, start, limit, value, props32;
|
||||
UBool didSet;
|
||||
|
||||
/* get the code point range */
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse bidi class */
|
||||
s=trimTerminateField(fields[1][0], fields[1][1]);
|
||||
value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s);
|
||||
if(value<0) {
|
||||
fprintf(stderr, "genprops error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
didSet=FALSE;
|
||||
oldStart=start;
|
||||
for(; start<limit; ++start) {
|
||||
props32=getProps(start);
|
||||
|
||||
/* ignore old bidi class, set only for unassigned code points (Cn) */
|
||||
if(GET_CATEGORY(props32)!=0) {
|
||||
if(value!=GET_BIDI_CLASS(props32)) {
|
||||
fprintf(stderr, "genprops error: different bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* remove whatever bidi class was set before */
|
||||
props32&=~(0x1f<<UPROPS_BIDI_SHIFT);
|
||||
|
||||
/* set bidi class for Cn according to DerivedBidiClass.txt */
|
||||
props32|=value<<UPROPS_BIDI_SHIFT;
|
||||
|
||||
/* set the modified properties */
|
||||
addProps(start, props32);
|
||||
didSet=TRUE;
|
||||
}
|
||||
|
||||
if(didSet && beVerbose) {
|
||||
printf("setting U+%04x..U+%04x bidi class %d\n", oldStart, limit-1, value);
|
||||
}
|
||||
}
|
||||
|
||||
/* data serialization ------------------------------------------------------- */
|
||||
|
||||
U_CFUNC int32_t
|
||||
|
Loading…
Reference in New Issue
Block a user