ICU-1970 add enumerated/integer/Numeric_Value properties, deprecate cellWidth
X-SVN-Rev: 9034
This commit is contained in:
parent
12f90fe0b3
commit
40e2f9754a
@ -183,17 +183,6 @@ loadPropsData(void) {
|
||||
}
|
||||
|
||||
/* constants and macros for access to the data */
|
||||
enum {
|
||||
EXC_UPPERCASE,
|
||||
EXC_LOWERCASE,
|
||||
EXC_TITLECASE,
|
||||
EXC_UNUSED,
|
||||
EXC_NUMERIC_VALUE,
|
||||
EXC_DENOMINATOR_VALUE,
|
||||
EXC_MIRROR_MAPPING,
|
||||
EXC_SPECIAL_CASING,
|
||||
EXC_CASE_FOLDING
|
||||
};
|
||||
|
||||
/* getting a uint32_t properties word from the data */
|
||||
#define HAVE_DATA (havePropsData>0 || (havePropsData==0 && loadPropsData()>0))
|
||||
@ -207,13 +196,6 @@ enum {
|
||||
} else { \
|
||||
(result)=0; \
|
||||
}
|
||||
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
|
||||
#define GET_CATEGORY(props) ((props)&0x1f)
|
||||
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
|
||||
/* ### TODO: 2 or 3 bits for numericType?! */
|
||||
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
|
||||
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)
|
||||
#define GET_EXCEPTIONS(props) (exceptionsTable+GET_UNSIGNED_VALUE(props))
|
||||
|
||||
/* finding an exception value */
|
||||
#define HAVE_EXCEPTION_VALUE(flags, index) ((flags)&(1UL<<(index)))
|
||||
@ -606,6 +588,61 @@ u_charDigitValue(UChar32 c) {
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI double U_EXPORT2
|
||||
u_getNumericValue(UChar32 c) {
|
||||
uint32_t props, numericType;
|
||||
GET_PROPS(c, props);
|
||||
numericType=GET_NUMERIC_TYPE(props);
|
||||
|
||||
if(numericType==0 || numericType>=(int32_t)U_NT_COUNT) {
|
||||
return U_NO_NUMERIC_VALUE;
|
||||
} else {
|
||||
if(!PROPS_VALUE_IS_EXCEPTION(props)) {
|
||||
return GET_SIGNED_VALUE(props);
|
||||
} else {
|
||||
const uint32_t *pe;
|
||||
uint32_t firstExceptionValue;
|
||||
|
||||
int32_t numerator;
|
||||
uint32_t denominator;
|
||||
|
||||
pe=GET_EXCEPTIONS(props);
|
||||
firstExceptionValue=*pe++;
|
||||
|
||||
if(HAVE_EXCEPTION_VALUE(firstExceptionValue, EXC_NUMERIC_VALUE)) {
|
||||
uint32_t flags=firstExceptionValue;
|
||||
int i=EXC_NUMERIC_VALUE;
|
||||
const uint32_t *p=pe;
|
||||
ADD_EXCEPTION_OFFSET(flags, i, p);
|
||||
numerator=(int32_t)*p;
|
||||
} else {
|
||||
numerator=0;
|
||||
}
|
||||
if(HAVE_EXCEPTION_VALUE(firstExceptionValue, EXC_DENOMINATOR_VALUE)) {
|
||||
uint32_t flags=firstExceptionValue;
|
||||
int i=EXC_DENOMINATOR_VALUE;
|
||||
const uint32_t *p=pe;
|
||||
ADD_EXCEPTION_OFFSET(flags, i, p);
|
||||
denominator=*p;
|
||||
} else {
|
||||
denominator=0;
|
||||
}
|
||||
|
||||
switch(firstExceptionValue&((1UL<<EXC_NUMERIC_VALUE)|(1UL<<EXC_DENOMINATOR_VALUE))) {
|
||||
case 1UL<<EXC_NUMERIC_VALUE:
|
||||
return numerator;
|
||||
case 1UL<<EXC_DENOMINATOR_VALUE:
|
||||
return (double)1./(double)denominator;
|
||||
case (1UL<<EXC_NUMERIC_VALUE)|(1UL<<EXC_DENOMINATOR_VALUE):
|
||||
return (double)numerator/(double)denominator;
|
||||
case 0: /* none (should not occur with numericType>0) */
|
||||
default:
|
||||
return U_NO_NUMERIC_VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Gets the character's linguistic directionality.*/
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
u_charDirection(UChar32 c) {
|
||||
@ -775,15 +812,19 @@ U_CFUNC uint32_t
|
||||
u_getUnicodeProperties(UChar32 c, int32_t column) {
|
||||
uint16_t vecIndex;
|
||||
|
||||
if( !HAVE_DATA || countPropsVectors==0 ||
|
||||
(uint32_t)c>0x10ffff ||
|
||||
column<0 || column>=propsVectorsColumns
|
||||
if(column==-1) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return props;
|
||||
} else if( !HAVE_DATA || countPropsVectors==0 ||
|
||||
(uint32_t)c>0x10ffff ||
|
||||
column<0 || column>=propsVectorsColumns
|
||||
) {
|
||||
return 0;
|
||||
} else {
|
||||
UTRIE_GET16(&propsVectorsTrie, c, vecIndex);
|
||||
return propsVectors[vecIndex+column];
|
||||
}
|
||||
|
||||
UTRIE_GET16(&propsVectorsTrie, c, vecIndex);
|
||||
return propsVectors[vecIndex+column];
|
||||
}
|
||||
|
||||
/* string casing ------------------------------------------------------------ */
|
||||
|
@ -236,18 +236,127 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
||||
};
|
||||
}
|
||||
|
||||
UBool u_isUAlphabetic(UChar32 c) {
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isUAlphabetic(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_ALPHABETIC);
|
||||
}
|
||||
|
||||
UBool u_isULowercase(UChar32 c) {
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isULowercase(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_LOWERCASE);
|
||||
}
|
||||
|
||||
UBool u_isUUppercase(UChar32 c) {
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isUUppercase(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_UPPERCASE);
|
||||
}
|
||||
|
||||
UBool u_isUWhiteSpace(UChar32 c) {
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isUWhiteSpace(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_WHITE_SPACE);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getIntPropertyValue(UChar32 c, UProperty which) {
|
||||
UErrorCode errorCode;
|
||||
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return 0; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
return (int32_t)u_hasBinaryProperty(c, which);
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return 0; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
return (int32_t)u_charDirection(c);
|
||||
case UCHAR_BLOCK:
|
||||
return (int32_t)ublock_getCode(c);
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
return u_getCombiningClass(c);
|
||||
#if 0 /* ### */
|
||||
case UCHAR_DECOMPOSITION_TYPE:
|
||||
return ;
|
||||
#endif
|
||||
case UCHAR_EAST_ASIAN_WIDTH:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_WIDTH_MASK)>>UPROPS_EA_WIDTH_SHIFT;
|
||||
case UCHAR_GENERAL_CATEGORY:
|
||||
return (int32_t)u_charType(c);
|
||||
#if 0 /* ### */
|
||||
case UCHAR_JOINING_GROUP:
|
||||
return ;
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return ;
|
||||
case UCHAR_LINE_BREAK:
|
||||
return ;
|
||||
#endif
|
||||
case UCHAR_NUMERIC_TYPE:
|
||||
return (int32_t)GET_NUMERIC_TYPE(u_getUnicodeProperties(c, -1));
|
||||
case UCHAR_SCRIPT:
|
||||
errorCode=U_ZERO_ERROR;
|
||||
return (int32_t)uscript_getScript(c, &errorCode);
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
} else {
|
||||
return 0; /* undefined */
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getIntPropertyMinValue(UProperty which) {
|
||||
switch(which) {
|
||||
case UCHAR_BLOCK:
|
||||
return UBLOCK_INVALID_CODE;
|
||||
case UCHAR_SCRIPT:
|
||||
return USCRIPT_INVALID_CODE;
|
||||
default:
|
||||
return 0; /* undefined; and: all other properties have a minimum value of 0 */
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getIntPropertyMaxValue(UProperty which) {
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return 0; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
return 1; /* maximum TRUE for all binary properties */
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return 0; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
return (int32_t)U_CHAR_DIRECTION_COUNT-1;
|
||||
case UCHAR_BLOCK:
|
||||
/* ### TODO This should be data-driven from uprops.dat */
|
||||
return (int32_t)UBLOCK_COUNT-1;
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
|
||||
#if 0 /* ### */
|
||||
case UCHAR_DECOMPOSITION_TYPE:
|
||||
return ;
|
||||
#endif
|
||||
case UCHAR_EAST_ASIAN_WIDTH:
|
||||
return (int32_t)U_EA_COUNT-1;
|
||||
case UCHAR_GENERAL_CATEGORY:
|
||||
return (int32_t)U_CHAR_CATEGORY_COUNT-1;
|
||||
#if 0 /* ### */
|
||||
case UCHAR_JOINING_GROUP:
|
||||
return ;
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return ;
|
||||
case UCHAR_LINE_BREAK:
|
||||
return ;
|
||||
#endif
|
||||
case UCHAR_NUMERIC_TYPE:
|
||||
return (int32_t)U_NT_COUNT-1;
|
||||
case UCHAR_SCRIPT:
|
||||
/* ### TODO This should be data-driven from uprops.dat */
|
||||
return (int32_t)USCRIPT_CODE_LIMIT-1;
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
} else {
|
||||
return 0; /* undefined */
|
||||
}
|
||||
}
|
||||
|
@ -53,6 +53,25 @@ enum {
|
||||
UPROPS_MAX_EXCEPTIONS_COUNT=1L<<UPROPS_VALUE_BITS
|
||||
};
|
||||
|
||||
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&UPROPS_EXCEPTION_BIT)
|
||||
#define GET_CATEGORY(props) ((props)&0x1f)
|
||||
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
|
||||
#define GET_UNSIGNED_VALUE(props) ((props)>>UPROPS_VALUE_SHIFT)
|
||||
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>UPROPS_VALUE_SHIFT)
|
||||
#define GET_EXCEPTIONS(props) (exceptionsTable+GET_UNSIGNED_VALUE(props))
|
||||
|
||||
enum {
|
||||
EXC_UPPERCASE,
|
||||
EXC_LOWERCASE,
|
||||
EXC_TITLECASE,
|
||||
EXC_UNUSED,
|
||||
EXC_NUMERIC_VALUE,
|
||||
EXC_DENOMINATOR_VALUE,
|
||||
EXC_MIRROR_MAPPING,
|
||||
EXC_SPECIAL_CASING,
|
||||
EXC_CASE_FOLDING
|
||||
};
|
||||
|
||||
/* number of properties vector words */
|
||||
#define UPROPS_VECTOR_WORDS 2
|
||||
|
||||
@ -126,27 +145,12 @@ enum {
|
||||
/**
|
||||
* Get a properties vector word for a code point.
|
||||
* Implemented in uchar.c for uprops.c.
|
||||
* column==-1 gets the 32-bit main properties word instead.
|
||||
* @return 0 if no data or illegal argument
|
||||
*/
|
||||
U_CFUNC uint32_t
|
||||
u_getUnicodeProperties(UChar32 c, int32_t column);
|
||||
|
||||
/* ### TODO check with PropertyValueAliases.txt and move to uchar.h, @draft ICU 2.x */
|
||||
/**
|
||||
* East Asian Widths constants.
|
||||
* Keep in sync with names list in genprops/props2.c.
|
||||
*/
|
||||
enum UEAWidthCode {
|
||||
U_EA_NEUTRAL,
|
||||
U_EA_AMBIGUOUS,
|
||||
U_EA_HALF_WIDTH,
|
||||
U_EA_FULL_WIDTH,
|
||||
U_EA_NARROW,
|
||||
U_EA_WIDE,
|
||||
U_EA_TOP
|
||||
};
|
||||
typedef enum UEAWidthCode UEAWidthCode;
|
||||
|
||||
/**
|
||||
* Unicode property names and property value names are compared
|
||||
* "loosely". Property[Value]Aliases.txt say:
|
||||
|
Loading…
Reference in New Issue
Block a user