ICU-298 jitterbug 130: add support for u_isMirrored() and u_charMirror()
X-SVN-Rev: 1240
This commit is contained in:
parent
b091b5e0f2
commit
117c9b824b
@ -23,9 +23,362 @@
|
||||
#include "ucmp8.h"
|
||||
#include "umutex.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
/* dynamically loaded Unicode character properties -------------------------- */
|
||||
|
||||
/* fallback properties for the ASCII range if the data cannot be loaded */
|
||||
/* these are printed by genprops in verbose mode */
|
||||
static uint32_t staticProps32Table[0xa0]={
|
||||
/* 0x00 */ 0x48f,
|
||||
/* 0x01 */ 0x48f,
|
||||
/* 0x02 */ 0x48f,
|
||||
/* 0x03 */ 0x48f,
|
||||
/* 0x04 */ 0x48f,
|
||||
/* 0x05 */ 0x48f,
|
||||
/* 0x06 */ 0x48f,
|
||||
/* 0x07 */ 0x48f,
|
||||
/* 0x08 */ 0x48f,
|
||||
/* 0x09 */ 0x20c,
|
||||
/* 0x0a */ 0x1ce,
|
||||
/* 0x0b */ 0x20c,
|
||||
/* 0x0c */ 0x24d,
|
||||
/* 0x0d */ 0x1ce,
|
||||
/* 0x0e */ 0x48f,
|
||||
/* 0x0f */ 0x48f,
|
||||
/* 0x10 */ 0x48f,
|
||||
/* 0x11 */ 0x48f,
|
||||
/* 0x12 */ 0x48f,
|
||||
/* 0x13 */ 0x48f,
|
||||
/* 0x14 */ 0x48f,
|
||||
/* 0x15 */ 0x48f,
|
||||
/* 0x16 */ 0x48f,
|
||||
/* 0x17 */ 0x48f,
|
||||
/* 0x18 */ 0x48f,
|
||||
/* 0x19 */ 0x48f,
|
||||
/* 0x1a */ 0x48f,
|
||||
/* 0x1b */ 0x48f,
|
||||
/* 0x1c */ 0x1ce,
|
||||
/* 0x1d */ 0x1ce,
|
||||
/* 0x1e */ 0x1ce,
|
||||
/* 0x1f */ 0x20c,
|
||||
/* 0x20 */ 0x24c,
|
||||
/* 0x21 */ 0x297,
|
||||
/* 0x22 */ 0x297,
|
||||
/* 0x23 */ 0x117,
|
||||
/* 0x24 */ 0x119,
|
||||
/* 0x25 */ 0x117,
|
||||
/* 0x26 */ 0x297,
|
||||
/* 0x27 */ 0x297,
|
||||
/* 0x28 */ 0x100a94,
|
||||
/* 0x29 */ 0xfff00a95,
|
||||
/* 0x2a */ 0x297,
|
||||
/* 0x2b */ 0x118,
|
||||
/* 0x2c */ 0x197,
|
||||
/* 0x2d */ 0x113,
|
||||
/* 0x2e */ 0x197,
|
||||
/* 0x2f */ 0xd7,
|
||||
/* 0x30 */ 0x89,
|
||||
/* 0x31 */ 0x100089,
|
||||
/* 0x32 */ 0x200089,
|
||||
/* 0x33 */ 0x300089,
|
||||
/* 0x34 */ 0x400089,
|
||||
/* 0x35 */ 0x500089,
|
||||
/* 0x36 */ 0x600089,
|
||||
/* 0x37 */ 0x700089,
|
||||
/* 0x38 */ 0x800089,
|
||||
/* 0x39 */ 0x900089,
|
||||
/* 0x3a */ 0x197,
|
||||
/* 0x3b */ 0x297,
|
||||
/* 0x3c */ 0x200a98,
|
||||
/* 0x3d */ 0x298,
|
||||
/* 0x3e */ 0xffe00a98,
|
||||
/* 0x3f */ 0x297,
|
||||
/* 0x40 */ 0x297,
|
||||
/* 0x41 */ 0x2000001,
|
||||
/* 0x42 */ 0x2000001,
|
||||
/* 0x43 */ 0x2000001,
|
||||
/* 0x44 */ 0x2000001,
|
||||
/* 0x45 */ 0x2000001,
|
||||
/* 0x46 */ 0x2000001,
|
||||
/* 0x47 */ 0x2000001,
|
||||
/* 0x48 */ 0x2000001,
|
||||
/* 0x49 */ 0x2000001,
|
||||
/* 0x4a */ 0x2000001,
|
||||
/* 0x4b */ 0x2000001,
|
||||
/* 0x4c */ 0x2000001,
|
||||
/* 0x4d */ 0x2000001,
|
||||
/* 0x4e */ 0x2000001,
|
||||
/* 0x4f */ 0x2000001,
|
||||
/* 0x50 */ 0x2000001,
|
||||
/* 0x51 */ 0x2000001,
|
||||
/* 0x52 */ 0x2000001,
|
||||
/* 0x53 */ 0x2000001,
|
||||
/* 0x54 */ 0x2000001,
|
||||
/* 0x55 */ 0x2000001,
|
||||
/* 0x56 */ 0x2000001,
|
||||
/* 0x57 */ 0x2000001,
|
||||
/* 0x58 */ 0x2000001,
|
||||
/* 0x59 */ 0x2000001,
|
||||
/* 0x5a */ 0x2000001,
|
||||
/* 0x5b */ 0x200a94,
|
||||
/* 0x5c */ 0x297,
|
||||
/* 0x5d */ 0xffe00a95,
|
||||
/* 0x5e */ 0x29a,
|
||||
/* 0x5f */ 0x296,
|
||||
/* 0x60 */ 0x29a,
|
||||
/* 0x61 */ 0x2000002,
|
||||
/* 0x62 */ 0x2000002,
|
||||
/* 0x63 */ 0x2000002,
|
||||
/* 0x64 */ 0x2000002,
|
||||
/* 0x65 */ 0x2000002,
|
||||
/* 0x66 */ 0x2000002,
|
||||
/* 0x67 */ 0x2000002,
|
||||
/* 0x68 */ 0x2000002,
|
||||
/* 0x69 */ 0x2000002,
|
||||
/* 0x6a */ 0x2000002,
|
||||
/* 0x6b */ 0x2000002,
|
||||
/* 0x6c */ 0x2000002,
|
||||
/* 0x6d */ 0x2000002,
|
||||
/* 0x6e */ 0x2000002,
|
||||
/* 0x6f */ 0x2000002,
|
||||
/* 0x70 */ 0x2000002,
|
||||
/* 0x71 */ 0x2000002,
|
||||
/* 0x72 */ 0x2000002,
|
||||
/* 0x73 */ 0x2000002,
|
||||
/* 0x74 */ 0x2000002,
|
||||
/* 0x75 */ 0x2000002,
|
||||
/* 0x76 */ 0x2000002,
|
||||
/* 0x77 */ 0x2000002,
|
||||
/* 0x78 */ 0x2000002,
|
||||
/* 0x79 */ 0x2000002,
|
||||
/* 0x7a */ 0x2000002,
|
||||
/* 0x7b */ 0x200a94,
|
||||
/* 0x7c */ 0x298,
|
||||
/* 0x7d */ 0xffe00a95,
|
||||
/* 0x7e */ 0x298,
|
||||
/* 0x7f */ 0x48f,
|
||||
/* 0x80 */ 0x48f,
|
||||
/* 0x81 */ 0x48f,
|
||||
/* 0x82 */ 0x48f,
|
||||
/* 0x83 */ 0x48f,
|
||||
/* 0x84 */ 0x48f,
|
||||
/* 0x85 */ 0x1ce,
|
||||
/* 0x86 */ 0x48f,
|
||||
/* 0x87 */ 0x48f,
|
||||
/* 0x88 */ 0x48f,
|
||||
/* 0x89 */ 0x48f,
|
||||
/* 0x8a */ 0x48f,
|
||||
/* 0x8b */ 0x48f,
|
||||
/* 0x8c */ 0x48f,
|
||||
/* 0x8d */ 0x48f,
|
||||
/* 0x8e */ 0x48f,
|
||||
/* 0x8f */ 0x48f,
|
||||
/* 0x90 */ 0x48f,
|
||||
/* 0x91 */ 0x48f,
|
||||
/* 0x92 */ 0x48f,
|
||||
/* 0x93 */ 0x48f,
|
||||
/* 0x94 */ 0x48f,
|
||||
/* 0x95 */ 0x48f,
|
||||
/* 0x96 */ 0x48f,
|
||||
/* 0x97 */ 0x48f,
|
||||
/* 0x98 */ 0x48f,
|
||||
/* 0x99 */ 0x48f,
|
||||
/* 0x9a */ 0x48f,
|
||||
/* 0x9b */ 0x48f,
|
||||
/* 0x9c */ 0x48f,
|
||||
/* 0x9d */ 0x48f,
|
||||
/* 0x9e */ 0x48f,
|
||||
/* 0x9f */ 0x48f
|
||||
};
|
||||
|
||||
/*
|
||||
* loaded uprops.dat -
|
||||
* for a description of the file format, see icu/source/tools/genprops/store.c
|
||||
*/
|
||||
#define DATA_NAME "uprops"
|
||||
#define DATA_TYPE "dat"
|
||||
|
||||
static UDataMemory *propsData=NULL;
|
||||
|
||||
static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
|
||||
static UVersionInfo dataVersion={ 3, 0, 0, 0 };
|
||||
|
||||
static const uint16_t *propsTable=NULL;
|
||||
#define props32Table ((uint32_t *)propsTable)
|
||||
|
||||
static int8_t havePropsData=0;
|
||||
|
||||
/* index values loaded from uprops.dat */
|
||||
static uint16_t indexes[8];
|
||||
|
||||
enum {
|
||||
INDEX_STAGE_2_BITS,
|
||||
INDEX_STAGE_3_BITS,
|
||||
INDEX_EXCEPTIONS
|
||||
};
|
||||
|
||||
/* access values calculated from indexes */
|
||||
static uint16_t stage23Bits, stage2Mask, stage3Mask;
|
||||
|
||||
static bool_t
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
|
||||
pInfo->dataFormat[1]==0x50 &&
|
||||
pInfo->dataFormat[2]==0x72 &&
|
||||
pInfo->dataFormat[3]==0x6f &&
|
||||
pInfo->formatVersion[0]==1
|
||||
) {
|
||||
uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
|
||||
uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static int8_t
|
||||
loadPropsData() {
|
||||
/* load Unicode character properties data from file if necessary */
|
||||
if(havePropsData==0) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UDataMemory *data;
|
||||
const uint16_t *p=NULL;
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return havePropsData=-1;
|
||||
}
|
||||
|
||||
p=(const uint16_t *)udata_getMemory(data);
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(propsData==NULL) {
|
||||
propsData=data;
|
||||
data=NULL;
|
||||
propsTable=p;
|
||||
p=NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
/* initialize some variables */
|
||||
uprv_memcpy(indexes, propsTable, 16);
|
||||
stage23Bits=indexes[INDEX_STAGE_2_BITS]+indexes[INDEX_STAGE_3_BITS];
|
||||
stage2Mask=(1<<indexes[INDEX_STAGE_2_BITS])-1;
|
||||
stage3Mask=(1<<indexes[INDEX_STAGE_3_BITS])-1;
|
||||
havePropsData=1;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if(data!=NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return havePropsData;
|
||||
}
|
||||
|
||||
/* constants and macros for access to the data */
|
||||
enum {
|
||||
EXC_UPPERCASE,
|
||||
EXC_LOWERCASE,
|
||||
EXC_TITLECASE,
|
||||
EXC_NUMERIC_VALUE,
|
||||
EXC_DENOMINATOR_VALUE,
|
||||
|
||||
EXC_MIRROR_MAPPING
|
||||
};
|
||||
|
||||
enum {
|
||||
EXCEPTION_SHIFT=5,
|
||||
BIDI_SHIFT,
|
||||
MIRROR_SHIFT=BIDI_SHIFT+5,
|
||||
VALUE_SHIFT=20,
|
||||
|
||||
VALUE_BITS=32-VALUE_SHIFT
|
||||
};
|
||||
|
||||
/* getting a uint32_t properties word from the data */
|
||||
#define HAVE_DATA (havePropsData>0 || havePropsData==0 && loadPropsData()>0)
|
||||
#define VALIDATE(c) (((uint32_t)(c))<=0x10ffff && HAVE_DATA)
|
||||
#define GET_PROPS(c) \
|
||||
(((uint32_t)(c))<=0x10ffff ? \
|
||||
HAVE_DATA ? \
|
||||
props32Table[ \
|
||||
propsTable[ \
|
||||
propsTable[ \
|
||||
propsTable[8+(c>>stage23Bits)]+ \
|
||||
(c>>indexes[INDEX_STAGE_3_BITS]&stage2Mask)]+ \
|
||||
(c&stage3Mask)] \
|
||||
] \
|
||||
: (c)<=0x9f ? \
|
||||
staticProps32Table[c] \
|
||||
: 0 \
|
||||
: 0)
|
||||
#define PROPS_VALUE_IS_EXCEPTION(props) ((props)&(1UL<<EXCEPTION_SHIFT))
|
||||
#define GET_CATEGORY(props) ((props)&0x1f)
|
||||
#define GET_UNSIGNED_VALUE(props) ((props)>>VALUE_SHIFT)
|
||||
#define GET_SIGNED_VALUE(props) ((int32_t)(props)>>VALUE_SHIFT)
|
||||
#define GET_EXCEPTIONS(props) (props32Table+indexes[INDEX_EXCEPTIONS]+GET_UNSIGNED_VALUE(props))
|
||||
|
||||
/* finding an exception value */
|
||||
#define HAVE_EXCEPTION_VALUE(flags, index) ((flags)&(1UL<<(index)))
|
||||
|
||||
/* number of bits in an integer value 0..31 */
|
||||
static uint8_t flagsOffset[32]={
|
||||
0, 1, 1, 2, 1, 2, 2, 3,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5
|
||||
};
|
||||
|
||||
#define ADD_EXCEPTION_OFFSET(flags, index, offset) { \
|
||||
if((index)>=5) { \
|
||||
(offset)+=flagsOffset[(flags)&0x1f]; \
|
||||
(flags)>>=5; \
|
||||
(index)-=5; \
|
||||
} \
|
||||
(offset)+=flagsOffset[(flags)&((1<<(index))-1)]; \
|
||||
}
|
||||
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isMirrored(UChar32 c) {
|
||||
return GET_PROPS(c)&(1UL<<MIRROR_SHIFT) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_charMirror(UChar32 c) {
|
||||
uint32_t props=GET_PROPS(c);
|
||||
if((props&(1UL<<MIRROR_SHIFT))==0) {
|
||||
/* not mirrored - the value is not a mirror offset */
|
||||
return c;
|
||||
} else if(!PROPS_VALUE_IS_EXCEPTION(props)) {
|
||||
return c+GET_SIGNED_VALUE(props);
|
||||
} else {
|
||||
uint32_t *pe=GET_EXCEPTIONS(props);
|
||||
uint32_t firstExceptionValue=*pe;
|
||||
if(HAVE_EXCEPTION_VALUE(firstExceptionValue, EXC_MIRROR_MAPPING)) {
|
||||
int i=EXC_MIRROR_MAPPING;
|
||||
++pe;
|
||||
ADD_EXCEPTION_OFFSET(firstExceptionValue, i, pe);
|
||||
return (UChar32)*pe;
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* static data tables ------------------------------------------------------- */
|
||||
|
||||
struct UCharDigitPair{
|
||||
uint16_t fUnicode;
|
||||
int8_t fValue;
|
||||
@ -5371,10 +5724,12 @@ createDirTables()
|
||||
#endif
|
||||
}
|
||||
|
||||
/* this function will become public */
|
||||
/* ### this function will become public */
|
||||
U_CFUNC void
|
||||
u_versionFromString(UVersionInfo versionArray, const char *versionString);
|
||||
|
||||
void u_getUnicodeVersion(UVersionInfo versionArray) {
|
||||
u_versionFromString(versionArray, U_UNICODE_VERSION);
|
||||
if(versionArray!=NULL) {
|
||||
uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
|
||||
}
|
||||
}
|
||||
|
@ -30,6 +30,9 @@
|
||||
static void
|
||||
TestCharNames();
|
||||
|
||||
static void
|
||||
TestMirroring();
|
||||
|
||||
/* test data ---------------------------------------------------------------- */
|
||||
#define MIN(a,b) (a < b ? a : b)
|
||||
|
||||
@ -104,6 +107,7 @@ void addUnicodeTest(TestNode** root)
|
||||
addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
|
||||
addTest(root, &TestStringFunctions, "tsutil/cucdtst/TestStringFunctions");
|
||||
addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
|
||||
addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
|
||||
}
|
||||
|
||||
/*==================================================== */
|
||||
@ -645,3 +649,24 @@ TestCharNames() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* test u_isMirrored() and u_charMirror() ----------------------------------- */
|
||||
|
||||
static void
|
||||
TestMirroring() {
|
||||
log_verbose("Testing u_isMirrored()\n");
|
||||
if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
|
||||
!u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
|
||||
)
|
||||
) {
|
||||
log_err("u_isMirrored() does not work correctly\n");
|
||||
}
|
||||
|
||||
log_verbose("Testing u_charMirror()\n");
|
||||
if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
|
||||
u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab
|
||||
)
|
||||
) {
|
||||
log_err("u_charMirror() does not work correctly\n");
|
||||
}
|
||||
}
|
||||
|
@ -356,6 +356,22 @@ void UnicodeTest::TestUnicodeData()
|
||||
if(name!=UNICODE_STRING("LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 49)) {
|
||||
errln("Unicode character name lookup failed\n");
|
||||
}
|
||||
|
||||
// test Unicode::isMirrored() and charMirror()
|
||||
// see also cintltst/cucdtest.c
|
||||
if(!(Unicode::isMirrored(0x28) && Unicode::isMirrored(0xbb) && Unicode::isMirrored(0x2045) && Unicode::isMirrored(0x232a) &&
|
||||
!Unicode::isMirrored(0x27) && !Unicode::isMirrored(0x61) && !Unicode::isMirrored(0x284) && !Unicode::isMirrored(0x3400)
|
||||
)
|
||||
) {
|
||||
errln("Unicode::isMirrored() does not work correctly\n");
|
||||
}
|
||||
|
||||
if(!(Unicode::charMirror(0x3c)==0x3e && Unicode::charMirror(0x5d)==0x5b && Unicode::charMirror(0x208d)==0x208e && Unicode::charMirror(0x3017)==0x3016 &&
|
||||
Unicode::charMirror(0x2e)==0x2e && Unicode::charMirror(0x6f3)==0x6f3 && Unicode::charMirror(0x301c)==0x301c && Unicode::charMirror(0xa4ab)==0xa4ab
|
||||
)
|
||||
) {
|
||||
errln("Unicode::charMirror() does not work correctly\n");
|
||||
}
|
||||
}
|
||||
|
||||
int32_t UnicodeTest::MakeProp(char* str)
|
||||
|
Loading…
Reference in New Issue
Block a user