ICU-2427 more code points with default bidi classes - parse DerivedBidiClass.txt for them

X-SVN-Rev: 11448
This commit is contained in:
Markus Scherer 2003-04-03 19:41:00 +00:00
parent cf5d222f79
commit 99fbf11da7
5 changed files with 1346 additions and 10 deletions

View File

@ -682,7 +682,7 @@ U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar32 c) {
uint32_t props;
GET_PROPS(c, props);
return (UCharDirection)((props>>UPROPS_BIDI_SHIFT)&0x1f);
return (UCharDirection)GET_BIDI_CLASS(props);
}
U_CAPI UBool U_EXPORT2

View File

@ -64,6 +64,7 @@ enum {
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
#define GET_CATEGORY(props) ((props)&0x1f)
#define GET_BIDI_CLASS(props) ((props>>UPROPS_BIDI_SHIFT)&0x1f)
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)

File diff suppressed because it is too large Load Diff

View File

@ -925,11 +925,14 @@ enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t
{ 0x0590, U_LEFT_TO_RIGHT },
{ 0x0600, U_RIGHT_TO_LEFT },
{ 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
{ 0x0900, U_RIGHT_TO_LEFT },
{ 0xFB1D, U_LEFT_TO_RIGHT },
{ 0xFB50, U_RIGHT_TO_LEFT },
{ 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
{ 0xFE70, U_LEFT_TO_RIGHT },
{ 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
{ 0x10800, U_LEFT_TO_RIGHT },
{ 0x11000, U_RIGHT_TO_LEFT },
{ 0x110000, U_LEFT_TO_RIGHT }
};
@ -982,6 +985,8 @@ enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t
* Verify default Bidi classes.
* See table 3-7 "Bidirectional Character Types" in UAX #9.
* http://www.unicode.org/reports/tr9/
*
* See also DerivedBidiClass.txt for Cn code points!
*/
if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
/* enumerate the intersections of defaultBidi ranges with [start..limit[ */
@ -2215,12 +2220,26 @@ TestAdditionalProperties() {
/* enum/integer type properties */
/* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
#if 0
/* ### TODO test default Bidi classes for unassigned code points */
{ 0x, UCHAR_BIDI_CLASS, },
{ 0x, UCHAR_BIDI_CLASS, },
{ 0x, UCHAR_BIDI_CLASS, },
#endif
/* test default Bidi classes for unassigned code points */
{ 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x05a2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
{ 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
{ 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },

View File

@ -102,6 +102,11 @@ numericLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode);
static void U_CALLCONV
bidiClassLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode);
/* parse files with single enumerated properties ---------------------------- */
struct SingleEnum {
@ -396,6 +401,9 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
/* add Han numeric types & values */
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode);
/* set proper bidi class for unassigned code points (Cn) */
parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, pErrorCode);
parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
/*
@ -599,8 +607,19 @@ numericLineFn(void *context,
exit(U_PARSE_ERROR);
}
if(GET_UNSIGNED_VALUE(oldProps32)!=0) {
/* the code below is not prepared to maintain values and exceptions */
/*
* Do not set a numeric value for code points that have other
* values or exceptions because the code below is not prepared
* to maintain such values and exceptions.
*
* Check store.c (e.g., file format description and makeProps())
* for details of what code points get their value field interpreted.
* For example, case mappings for Ll/Lt/Lu and mirror mappings for mirrored characters.
*
* For simplicity, and because we only expect to set numeric values for Han characters,
* for now we only allow to set these values for Lo characters.
*/
if(GET_UNSIGNED_VALUE(oldProps32)!=0 || PROPS_VALUE_IS_EXCEPTION(oldProps32) || GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) {
fprintf(stderr, "genprops error: new numeric value for a character with some other value in DerivedNumericValues.txt at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
@ -618,7 +637,7 @@ numericLineFn(void *context,
uprv_memset(&newProps, 0, sizeof(newProps));
newProps.code=start;
newProps.generalCategory=(uint8_t)GET_CATEGORY(oldProps32);
newProps.bidi=(uint8_t)((oldProps32>>UPROPS_BIDI_SHIFT)&0x1f);
newProps.bidi=(uint8_t)GET_BIDI_CLASS(oldProps32);
newProps.isMirrored=(uint8_t)(oldProps32&(1UL<<UPROPS_MIRROR_SHIFT) ? TRUE : FALSE);
newProps.numericType=(uint8_t)type; /* newly parsed numeric type */
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
@ -626,6 +645,62 @@ numericLineFn(void *context,
}
}
/* DerivedBidiClass.txt ----------------------------------------------------- */
static void U_CALLCONV
bidiClassLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
char *s;
uint32_t oldStart, start, limit, value, props32;
UBool didSet;
/* get the code point range */
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]);
exit(*pErrorCode);
}
++limit;
/* parse bidi class */
s=trimTerminateField(fields[1][0], fields[1][1]);
value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s);
if(value<0) {
fprintf(stderr, "genprops error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
exit(U_PARSE_ERROR);
}
didSet=FALSE;
oldStart=start;
for(; start<limit; ++start) {
props32=getProps(start);
/* ignore old bidi class, set only for unassigned code points (Cn) */
if(GET_CATEGORY(props32)!=0) {
if(value!=GET_BIDI_CLASS(props32)) {
fprintf(stderr, "genprops error: different bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
exit(U_PARSE_ERROR);
}
continue;
}
/* remove whatever bidi class was set before */
props32&=~(0x1f<<UPROPS_BIDI_SHIFT);
/* set bidi class for Cn according to DerivedBidiClass.txt */
props32|=value<<UPROPS_BIDI_SHIFT;
/* set the modified properties */
addProps(start, props32);
didSet=TRUE;
}
if(didSet && beVerbose) {
printf("setting U+%04x..U+%04x bidi class %d\n", oldStart, limit-1, value);
}
}
/* data serialization ------------------------------------------------------- */
U_CFUNC int32_t