diff --git a/icu4c/source/common/uchar.c b/icu4c/source/common/uchar.c index 96418eb45b..e5eb3c4b5e 100644 --- a/icu4c/source/common/uchar.c +++ b/icu4c/source/common/uchar.c @@ -30,17 +30,11 @@ #include "utrie.h" #include "ustr_imp.h" -/* - * Since genprops overrides the general category for some control codes, - * we need to hardcode ISO 8 controls for u_iscntrl(), u_isprint(), etc. - */ -#define IS_ISO_8_CONTROL(c) ((uint32_t)(c)<0x20 || (uint32_t)((c)-0x7f)<=0x20) - /* dynamically loaded Unicode character properties -------------------------- */ /* fallback properties for the ASCII range if the data cannot be loaded */ /* these are printed by genprops in verbose mode */ -static const uint32_t staticProps32Table[]={ +static uint32_t staticProps32Table[0xa0]={ /* 0x00 */ 0x48f, /* 0x01 */ 0x48f, /* 0x02 */ 0x48f, @@ -50,11 +44,11 @@ static const uint32_t staticProps32Table[]={ /* 0x06 */ 0x48f, /* 0x07 */ 0x48f, /* 0x08 */ 0x48f, - /* 0x09 */ 0x20c, - /* 0x0a */ 0x1ce, - /* 0x0b */ 0x20c, - /* 0x0c */ 0x24d, - /* 0x0d */ 0x1ce, + /* 0x09 */ 0x20f, + /* 0x0a */ 0x1cf, + /* 0x0b */ 0x20f, + /* 0x0c */ 0x24f, + /* 0x0d */ 0x1cf, /* 0x0e */ 0x48f, /* 0x0f */ 0x48f, /* 0x10 */ 0x48f, @@ -69,10 +63,10 @@ static const uint32_t staticProps32Table[]={ /* 0x19 */ 0x48f, /* 0x1a */ 0x48f, /* 0x1b */ 0x48f, - /* 0x1c */ 0x1ce, - /* 0x1d */ 0x1ce, - /* 0x1e */ 0x1ce, - /* 0x1f */ 0x20c, + /* 0x1c */ 0x1cf, + /* 0x1d */ 0x1cf, + /* 0x1e */ 0x1cf, + /* 0x1f */ 0x20f, /* 0x20 */ 0x24c, /* 0x21 */ 0x297, /* 0x22 */ 0x297, @@ -114,8 +108,8 @@ static const uint32_t staticProps32Table[]={ /* 0x46 */ 0x2000001, /* 0x47 */ 0x2000001, /* 0x48 */ 0x2000001, - /* 0x49 */ 0x2000001, - /* 0x4a */ 0x2000001, + /* 0x49 */ 0x1, /* has exception */ + /* 0x4a */ 0x300001, /* has exception */ /* 0x4b */ 0x2000001, /* 0x4c */ 0x2000001, /* 0x4d */ 0x2000001, @@ -146,7 +140,7 @@ static const uint32_t staticProps32Table[]={ /* 0x66 */ 0x2000002, /* 0x67 */ 0x2000002, /* 0x68 */ 0x2000002, - /* 0x69 */ 0x2000002, + /* 0x69 */ 0x600002, /* has exception */ /* 0x6a */ 0x2000002, /* 0x6b */ 0x2000002, /* 0x6c */ 0x2000002, @@ -174,7 +168,7 @@ static const uint32_t staticProps32Table[]={ /* 0x82 */ 0x48f, /* 0x83 */ 0x48f, /* 0x84 */ 0x48f, - /* 0x85 */ 0x1ce, + /* 0x85 */ 0x1cf, /* 0x86 */ 0x48f, /* 0x87 */ 0x48f, /* 0x88 */ 0x48f, @@ -200,7 +194,7 @@ static const uint32_t staticProps32Table[]={ /* 0x9c */ 0x48f, /* 0x9d */ 0x48f, /* 0x9e */ 0x48f, - /* 0x9f */ 0x48f + /* 0x9f */ 0x48f, }; /* @@ -424,18 +418,6 @@ u_charType(UChar32 c) { return (int8_t)GET_CATEGORY(props); } -/* Gets the Unicode character's general category, as per the UCD.*/ -U_CAPI int8_t U_EXPORT2 -u_charUCDType(UChar32 c) { - if (IS_ISO_8_CONTROL(c)) { - return U_CONTROL_CHAR; - } else { - uint32_t props; - GET_PROPS(c, props); - return (int8_t)GET_CATEGORY(props); - } -} - /* Enumerate all code points with their general categories. */ struct _EnumTypeCallback { UCharEnumTypeRange *enumRange; @@ -547,26 +529,26 @@ u_isbase(UChar32 c) { /* Checks if the Unicode character is a control character.*/ U_CAPI UBool U_EXPORT2 u_iscntrl(UChar32 c) { - if(IS_ISO_8_CONTROL(c)) { - return TRUE; - } else { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - ((1UL<=0x09 && c <= 0x0d) || (c>=0x1c && c <=0x1f) || c==0x85) + /* Checks if the Unicode character is a space character.*/ U_CAPI UBool U_EXPORT2 u_isspace(UChar32 c) { uint32_t props; GET_PROPS(c, props); - return (UBool)(((1UL< bad rules" + + ".createFromRules() => bad rules" + /*", parse error " + parseError.code +*/ ", line " + parseError.line + ", offset " + parseError.offset + @@ -2967,7 +2967,7 @@ void TransliteratorTest::TestAnchorMasking(){ Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status); if(U_FAILURE(status)){ errln(UnicodeString("FAIL: ") + "ID" + - ".toRules() => bad rules" + + ".createFromRules() => bad rules" + /*", parse error " + parseError.code +*/ ", line " + parseError.line + ", offset " + parseError.offset + diff --git a/icu4c/source/test/intltest/ucdtest.cpp b/icu4c/source/test/intltest/ucdtest.cpp index 7320c9c6da..042c2bae6b 100644 --- a/icu4c/source/test/intltest/ucdtest.cpp +++ b/icu4c/source/test/intltest/ucdtest.cpp @@ -342,29 +342,8 @@ UnicodeTest::unicodeDataLineFn(void *context, } /* get general category, field 2 */ - /* we override the general category of some control characters */ - switch(c) { - case 9: - case 0xb: - case 0x1f: - type = U_SPACE_SEPARATOR; - break; - case 0xc: - type = U_LINE_SEPARATOR; - break; - case 0xa: - case 0xd: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x85: - type = U_PARAGRAPH_SEPARATOR; - break; - default: - *fields[2][1]=0; - type = (int8_t)tagValues[me->MakeProp(fields[2][0])]; - break; - } + *fields[2][1]=0; + type = (int8_t)tagValues[me->MakeProp(fields[2][0])]; if(Unicode::getType(c)!=type) { me->errln("error: Unicode::getType(U+%04lx)==%u instead of %u\n", c, Unicode::getType(c), type); *pErrorCode = U_PARSE_ERROR; diff --git a/icu4c/source/tools/genprops/genprops.c b/icu4c/source/tools/genprops/genprops.c index 3641c0ec38..7b30d308d5 100644 --- a/icu4c/source/tools/genprops/genprops.c +++ b/icu4c/source/tools/genprops/genprops.c @@ -525,23 +525,6 @@ bidiNames[U_CHAR_DIRECTION_COUNT]={ "WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN" }; -/* control code properties */ -static const struct { - uint32_t code; - uint8_t generalCategory; -} controlProps[]={ - /* TAB */ {0x9, U_SPACE_SEPARATOR}, - /* VT */ {0xb, U_SPACE_SEPARATOR}, - /* LF */ {0xa, U_PARAGRAPH_SEPARATOR}, - /* FF */ {0xc, U_LINE_SEPARATOR}, - /* CR */ {0xd, U_PARAGRAPH_SEPARATOR}, - /* FS */ {0x1c, U_PARAGRAPH_SEPARATOR}, - /* GS */ {0x1d, U_PARAGRAPH_SEPARATOR}, - /* RS */ {0x1e, U_PARAGRAPH_SEPARATOR}, - /* US */ {0x1f, U_SPACE_SEPARATOR}, - /* NL */ {0x85, U_PARAGRAPH_SEPARATOR} -}; - static struct { uint32_t first, last, props; char name[80]; @@ -714,15 +697,6 @@ unicodeDataLineFn(void *context, } p.titleCase=value; - /* override properties for some common control characters */ - if(p.generalCategory==U_CONTROL_CHAR) { - for(i=0; i