ICU-1970 add enumerated/integer/Numeric_Value properties, deprecate cellWidth

X-SVN-Rev: 9034
This commit is contained in:
Markus Scherer 2002-07-04 00:17:12 +00:00
parent 12f90fe0b3
commit 40e2f9754a
3 changed files with 198 additions and 44 deletions

View File

@ -183,17 +183,6 @@ loadPropsData(void) {
}
/* constants and macros for access to the data */
enum {
EXC_UPPERCASE,
EXC_LOWERCASE,
EXC_TITLECASE,
EXC_UNUSED,
EXC_NUMERIC_VALUE,
EXC_DENOMINATOR_VALUE,
EXC_MIRROR_MAPPING,
EXC_SPECIAL_CASING,
EXC_CASE_FOLDING
};
/* getting a uint32_t properties word from the data */
#define HAVE_DATA (havePropsData>0 || (havePropsData==0 && loadPropsData()>0))
@ -207,13 +196,6 @@ enum {
} else { \
(result)=0; \
}
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
#define GET_CATEGORY(props) ((props)&0x1f)
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
/* ### TODO: 2 or 3 bits for numericType?! */
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)
#define GET_EXCEPTIONS(props) (exceptionsTable+GET_UNSIGNED_VALUE(props))
/* finding an exception value */
#define HAVE_EXCEPTION_VALUE(flags, index) ((flags)&(1UL<<(index)))
@ -606,6 +588,61 @@ u_charDigitValue(UChar32 c) {
}
}
U_CAPI double U_EXPORT2
u_getNumericValue(UChar32 c) {
uint32_t props, numericType;
GET_PROPS(c, props);
numericType=GET_NUMERIC_TYPE(props);
if(numericType==0 || numericType>=(int32_t)U_NT_COUNT) {
return U_NO_NUMERIC_VALUE;
} else {
if(!PROPS_VALUE_IS_EXCEPTION(props)) {
return GET_SIGNED_VALUE(props);
} else {
const uint32_t *pe;
uint32_t firstExceptionValue;
int32_t numerator;
uint32_t denominator;
pe=GET_EXCEPTIONS(props);
firstExceptionValue=*pe++;
if(HAVE_EXCEPTION_VALUE(firstExceptionValue, EXC_NUMERIC_VALUE)) {
uint32_t flags=firstExceptionValue;
int i=EXC_NUMERIC_VALUE;
const uint32_t *p=pe;
ADD_EXCEPTION_OFFSET(flags, i, p);
numerator=(int32_t)*p;
} else {
numerator=0;
}
if(HAVE_EXCEPTION_VALUE(firstExceptionValue, EXC_DENOMINATOR_VALUE)) {
uint32_t flags=firstExceptionValue;
int i=EXC_DENOMINATOR_VALUE;
const uint32_t *p=pe;
ADD_EXCEPTION_OFFSET(flags, i, p);
denominator=*p;
} else {
denominator=0;
}
switch(firstExceptionValue&((1UL<<EXC_NUMERIC_VALUE)|(1UL<<EXC_DENOMINATOR_VALUE))) {
case 1UL<<EXC_NUMERIC_VALUE:
return numerator;
case 1UL<<EXC_DENOMINATOR_VALUE:
return (double)1./(double)denominator;
case (1UL<<EXC_NUMERIC_VALUE)|(1UL<<EXC_DENOMINATOR_VALUE):
return (double)numerator/(double)denominator;
case 0: /* none (should not occur with numericType>0) */
default:
return U_NO_NUMERIC_VALUE;
}
}
}
}
/* Gets the character's linguistic directionality.*/
U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar32 c) {
@ -775,15 +812,19 @@ U_CFUNC uint32_t
u_getUnicodeProperties(UChar32 c, int32_t column) {
uint16_t vecIndex;
if( !HAVE_DATA || countPropsVectors==0 ||
(uint32_t)c>0x10ffff ||
column<0 || column>=propsVectorsColumns
if(column==-1) {
uint32_t props;
GET_PROPS(c, props);
return props;
} else if( !HAVE_DATA || countPropsVectors==0 ||
(uint32_t)c>0x10ffff ||
column<0 || column>=propsVectorsColumns
) {
return 0;
} else {
UTRIE_GET16(&propsVectorsTrie, c, vecIndex);
return propsVectors[vecIndex+column];
}
UTRIE_GET16(&propsVectorsTrie, c, vecIndex);
return propsVectors[vecIndex+column];
}
/* string casing ------------------------------------------------------------ */

View File

@ -236,18 +236,127 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
};
}
UBool u_isUAlphabetic(UChar32 c) {
U_CAPI UBool U_EXPORT2
u_isUAlphabetic(UChar32 c) {
return u_hasBinaryProperty(c, UCHAR_ALPHABETIC);
}
UBool u_isULowercase(UChar32 c) {
U_CAPI UBool U_EXPORT2
u_isULowercase(UChar32 c) {
return u_hasBinaryProperty(c, UCHAR_LOWERCASE);
}
UBool u_isUUppercase(UChar32 c) {
U_CAPI UBool U_EXPORT2
u_isUUppercase(UChar32 c) {
return u_hasBinaryProperty(c, UCHAR_UPPERCASE);
}
UBool u_isUWhiteSpace(UChar32 c) {
U_CAPI UBool U_EXPORT2
u_isUWhiteSpace(UChar32 c) {
return u_hasBinaryProperty(c, UCHAR_WHITE_SPACE);
}
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
UErrorCode errorCode;
if(which<UCHAR_BINARY_START) {
return 0; /* undefined */
} else if(which<UCHAR_BINARY_LIMIT) {
return (int32_t)u_hasBinaryProperty(c, which);
} else if(which<UCHAR_INT_START) {
return 0; /* undefined */
} else if(which<UCHAR_INT_LIMIT) {
switch(which) {
case UCHAR_BIDI_CLASS:
return (int32_t)u_charDirection(c);
case UCHAR_BLOCK:
return (int32_t)ublock_getCode(c);
case UCHAR_CANONICAL_COMBINING_CLASS:
return u_getCombiningClass(c);
#if 0 /* ### */
case UCHAR_DECOMPOSITION_TYPE:
return ;
#endif
case UCHAR_EAST_ASIAN_WIDTH:
return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_WIDTH_MASK)>>UPROPS_EA_WIDTH_SHIFT;
case UCHAR_GENERAL_CATEGORY:
return (int32_t)u_charType(c);
#if 0 /* ### */
case UCHAR_JOINING_GROUP:
return ;
case UCHAR_JOINING_TYPE:
return ;
case UCHAR_LINE_BREAK:
return ;
#endif
case UCHAR_NUMERIC_TYPE:
return (int32_t)GET_NUMERIC_TYPE(u_getUnicodeProperties(c, -1));
case UCHAR_SCRIPT:
errorCode=U_ZERO_ERROR;
return (int32_t)uscript_getScript(c, &errorCode);
default:
return 0; /* undefined */
}
} else {
return 0; /* undefined */
}
}
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMinValue(UProperty which) {
switch(which) {
case UCHAR_BLOCK:
return UBLOCK_INVALID_CODE;
case UCHAR_SCRIPT:
return USCRIPT_INVALID_CODE;
default:
return 0; /* undefined; and: all other properties have a minimum value of 0 */
}
}
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which) {
if(which<UCHAR_BINARY_START) {
return 0; /* undefined */
} else if(which<UCHAR_BINARY_LIMIT) {
return 1; /* maximum TRUE for all binary properties */
} else if(which<UCHAR_INT_START) {
return 0; /* undefined */
} else if(which<UCHAR_INT_LIMIT) {
switch(which) {
case UCHAR_BIDI_CLASS:
return (int32_t)U_CHAR_DIRECTION_COUNT-1;
case UCHAR_BLOCK:
/* ### TODO This should be data-driven from uprops.dat */
return (int32_t)UBLOCK_COUNT-1;
case UCHAR_CANONICAL_COMBINING_CLASS:
return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
#if 0 /* ### */
case UCHAR_DECOMPOSITION_TYPE:
return ;
#endif
case UCHAR_EAST_ASIAN_WIDTH:
return (int32_t)U_EA_COUNT-1;
case UCHAR_GENERAL_CATEGORY:
return (int32_t)U_CHAR_CATEGORY_COUNT-1;
#if 0 /* ### */
case UCHAR_JOINING_GROUP:
return ;
case UCHAR_JOINING_TYPE:
return ;
case UCHAR_LINE_BREAK:
return ;
#endif
case UCHAR_NUMERIC_TYPE:
return (int32_t)U_NT_COUNT-1;
case UCHAR_SCRIPT:
/* ### TODO This should be data-driven from uprops.dat */
return (int32_t)USCRIPT_CODE_LIMIT-1;
default:
return 0; /* undefined */
}
} else {
return 0; /* undefined */
}
}

View File

@ -53,6 +53,25 @@ enum {
UPROPS_MAX_EXCEPTIONS_COUNT=1L<<UPROPS_VALUE_BITS
};
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
#define GET_CATEGORY(props) ((props)&0x1f)
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)
#define GET_EXCEPTIONS(props) (exceptionsTable+GET_UNSIGNED_VALUE(props))
enum {
EXC_UPPERCASE,
EXC_LOWERCASE,
EXC_TITLECASE,
EXC_UNUSED,
EXC_NUMERIC_VALUE,
EXC_DENOMINATOR_VALUE,
EXC_MIRROR_MAPPING,
EXC_SPECIAL_CASING,
EXC_CASE_FOLDING
};
/* number of properties vector words */
#define UPROPS_VECTOR_WORDS 2
@ -126,27 +145,12 @@ enum {
/**
* Get a properties vector word for a code point.
* Implemented in uchar.c for uprops.c.
* column==-1 gets the 32-bit main properties word instead.
* @return 0 if no data or illegal argument
*/
U_CFUNC uint32_t
u_getUnicodeProperties(UChar32 c, int32_t column);
/* ### TODO check with PropertyValueAliases.txt and move to uchar.h, @draft ICU 2.x */
/**
* East Asian Widths constants.
* Keep in sync with names list in genprops/props2.c.
*/
enum UEAWidthCode {
U_EA_NEUTRAL,
U_EA_AMBIGUOUS,
U_EA_HALF_WIDTH,
U_EA_FULL_WIDTH,
U_EA_NARROW,
U_EA_WIDE,
U_EA_TOP
};
typedef enum UEAWidthCode UEAWidthCode;
/**
* Unicode property names and property value names are compared
* "loosely". Property[Value]Aliases.txt say: