From 4d9f1eaa3cf4c7f2d17a35db5415186845b4f74b Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 23 Feb 2005 00:54:19 +0000 Subject: [PATCH] ICU-4143 avoid mutexes in u_tolower() and similar; require u_init() for thread safety of case/bidi properties (as in 3.0) and use dummy objects when data is not available X-SVN-Rev: 17243 --- icu4c/source/common/ubidi_props.c | 57 +++++++++- icu4c/source/common/ubidi_props.h | 8 ++ icu4c/source/common/ucase.c | 123 ++++++++++---------- icu4c/source/common/ucase.h | 8 ++ icu4c/source/common/ucln_cmn.h | 3 +- icu4c/source/common/uinit.c | 6 +- icu4c/source/common/uprops.c | 157 +++++++++++++++++++++----- icu4c/source/common/utrie.c | 125 +++++++++++++++++++- icu4c/source/common/utrie.h | 46 +++++++- icu4c/source/test/cintltst/cucdtst.c | 14 +++ icu4c/source/test/cintltst/trietest.c | 72 +++++++++++- 11 files changed, 514 insertions(+), 105 deletions(-) diff --git a/icu4c/source/common/ubidi_props.c b/icu4c/source/common/ubidi_props.c index 393557c742..7a7b6fb47d 100644 --- a/icu4c/source/common/ubidi_props.c +++ b/icu4c/source/common/ubidi_props.c @@ -187,7 +187,7 @@ ubidi_closeProps(UBiDiProps *bdp) { /* UBiDiProps singleton ----------------------------------------------------- */ -static UBiDiProps *gBdp=NULL; +static UBiDiProps *gBdp=NULL, *gBdpDummy=NULL; static UErrorCode gErrorCode=U_ZERO_ERROR; static int8_t gHaveData=0; @@ -197,6 +197,8 @@ ubidi_cleanup(void) { gBdp=NULL; gErrorCode=U_ZERO_ERROR; gHaveData=0; + uprv_free(gBdpDummy); + gBdpDummy=NULL; return TRUE; } @@ -241,6 +243,59 @@ ubidi_getSingleton(UErrorCode *pErrorCode) { } } +U_CAPI UBiDiProps * U_EXPORT2 +ubidi_getDummy(UErrorCode *pErrorCode) { + UBiDiProps *bdp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + + UMTX_CHECK(NULL, gBdpDummy, bdp); + + if(bdp!=NULL) { + /* the dummy object was already created */ + return bdp; + } else /* bdp==NULL */ { + /* create the dummy object */ + UBiDiProps *bdp; + int32_t *indexes; + + bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)+UBIDI_IX_TOP*4+UTRIE_DUMMY_SIZE); + if(bdp==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(bdp, 0, sizeof(UBiDiProps)+UBIDI_IX_TOP*4); + + bdp->indexes=indexes=(int32_t *)(bdp+1); + indexes[UBIDI_IX_INDEX_TOP]=UBIDI_IX_TOP; + + indexes[UBIDI_IX_TRIE_SIZE]= + utrie_unserializeDummy(&bdp->trie, indexes+UBIDI_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + uprv_free(bdp); + return NULL; + } + + bdp->formatVersion[0]=1; + bdp->formatVersion[2]=UTRIE_SHIFT; + bdp->formatVersion[3]=UTRIE_INDEX_SHIFT; + + /* set the static variables */ + umtx_lock(NULL); + if(gBdpDummy==NULL) { + gBdpDummy=bdp; + bdp=NULL; + ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup); + } + umtx_unlock(NULL); + + uprv_free(bdp); + return gBdpDummy; + } +} + /* Unicode bidi/shaping data swapping --------------------------------------- */ U_CAPI int32_t U_EXPORT2 diff --git a/icu4c/source/common/ubidi_props.h b/icu4c/source/common/ubidi_props.h index 410aa93c8d..d6862df2a0 100644 --- a/icu4c/source/common/ubidi_props.h +++ b/icu4c/source/common/ubidi_props.h @@ -44,6 +44,14 @@ ubidi_closeProps(UBiDiProps *bdp); U_CAPI UBiDiProps * U_EXPORT2 ubidi_getSingleton(UErrorCode *pErrorCode); +/** + * Get a singleton dummy object, one that works with no real data. + * This can be used when the real data is not available. + * Using the dummy can reduce checks for available data after an initial failure. + */ +U_CAPI UBiDiProps * U_EXPORT2 +ubidi_getDummy(UErrorCode *pErrorCode); + U_CAPI int32_t U_EXPORT2 ubidi_swap(const UDataSwapper *ds, diff --git a/icu4c/source/common/ucase.c b/icu4c/source/common/ucase.c index 2b9d2e9acd..55dde07a27 100644 --- a/icu4c/source/common/ucase.c +++ b/icu4c/source/common/ucase.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2004, International Business Machines +* Copyright (C) 2004-2005, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -192,7 +192,7 @@ ucase_close(UCaseProps *csp) { /* UCaseProps singleton ----------------------------------------------------- */ -static UCaseProps *gCsp=NULL; +static UCaseProps *gCsp=NULL, *gCspDummy=NULL; static UErrorCode gErrorCode=U_ZERO_ERROR; static int8_t gHaveData=0; @@ -201,6 +201,8 @@ static UBool U_CALLCONV ucase_cleanup(void) { gCsp=NULL; gErrorCode=U_ZERO_ERROR; gHaveData=0; + uprv_free(gCspDummy); + gCspDummy=NULL; return TRUE; } @@ -245,6 +247,59 @@ ucase_getSingleton(UErrorCode *pErrorCode) { } } +U_CAPI UCaseProps * U_EXPORT2 +ucase_getDummy(UErrorCode *pErrorCode) { + UCaseProps *csp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + + UMTX_CHECK(NULL, gCspDummy, csp); + + if(csp!=NULL) { + /* the dummy object was already created */ + return csp; + } else /* csp==NULL */ { + /* create the dummy object */ + UCaseProps *csp; + int32_t *indexes; + + csp=(UCaseProps *)uprv_malloc(sizeof(UCaseProps)+UCASE_IX_TOP*4+UTRIE_DUMMY_SIZE); + if(csp==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(csp, 0, sizeof(UCaseProps)+UCASE_IX_TOP*4); + + csp->indexes=indexes=(int32_t *)(csp+1); + indexes[UCASE_IX_INDEX_TOP]=UCASE_IX_TOP; + + indexes[UCASE_IX_TRIE_SIZE]= + utrie_unserializeDummy(&csp->trie, indexes+UCASE_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + uprv_free(csp); + return NULL; + } + + csp->formatVersion[0]=1; + csp->formatVersion[2]=UTRIE_SHIFT; + csp->formatVersion[3]=UTRIE_INDEX_SHIFT; + + /* set the static variables */ + umtx_lock(NULL); + if(gCspDummy==NULL) { + gCspDummy=csp; + csp=NULL; + ucln_common_registerCleanup(UCLN_COMMON_UCASE, ucase_cleanup); + } + umtx_unlock(NULL); + + uprv_free(csp); + return gCspDummy; + } +} + /* Unicode case mapping data swapping --------------------------------------- */ U_CAPI int32_t U_EXPORT2 @@ -742,70 +797,6 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { return (UBool)((props&UCASE_SENSITIVE)!=0); } -/* public API (see uchar.h) ------------------------------------------------- */ - -U_CAPI UBool U_EXPORT2 -u_isULowercase(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - return (UBool)(csp!=NULL && UCASE_LOWER==ucase_getType(csp, c)); -} - -U_CAPI UBool U_EXPORT2 -u_isUUppercase(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - return (UBool)(csp!=NULL && UCASE_UPPER==ucase_getType(csp, c)); -} - -/* Transforms the Unicode character to its lower case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_tolower(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(csp!=NULL) { - return ucase_tolower(csp, c); - } else { - return c; - } -} - -/* Transforms the Unicode character to its upper case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_toupper(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(csp!=NULL) { - return ucase_toupper(csp, c); - } else { - return c; - } -} - -/* Transforms the Unicode character to its title case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_totitle(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(csp!=NULL) { - return ucase_totitle(csp, c); - } else { - return c; - } -} - -/* return the simple case folding mapping for c */ -U_CAPI UChar32 U_EXPORT2 -u_foldCase(UChar32 c, uint32_t options) { - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(csp!=NULL) { - return ucase_fold(csp, c, options); - } else { - return c; - } -} - /* string casing ------------------------------------------------------------ */ /* diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h index 15a84000d8..318def8246 100644 --- a/icu4c/source/common/ucase.h +++ b/icu4c/source/common/ucase.h @@ -44,6 +44,14 @@ ucase_close(UCaseProps *csp); U_CAPI UCaseProps * U_EXPORT2 ucase_getSingleton(UErrorCode *pErrorCode); +/** + * Get a singleton dummy object, one that works with no real data. + * This can be used when the real data is not available. + * Using the dummy can reduce checks for available data after an initial failure. + */ +U_CAPI UCaseProps * U_EXPORT2 +ucase_getDummy(UErrorCode *pErrorCode); + U_CAPI int32_t U_EXPORT2 ucase_swap(const UDataSwapper *ds, diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h index 7c5b38c1ac..167f1f3c67 100644 --- a/icu4c/source/common/ucln_cmn.h +++ b/icu4c/source/common/ucln_cmn.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * * -* Copyright (C) 2001-2004, International Business Machines * +* Copyright (C) 2001-2005, International Business Machines * * Corporation and others. All Rights Reserved. * * * ****************************************************************************** @@ -43,6 +43,7 @@ typedef enum ECleanupCommonType { UCLN_COMMON_USET, UCLN_COMMON_UNAMES, UCLN_COMMON_PNAME, + UCLN_COMMON_UPROPS, UCLN_COMMON_UBIDI, UCLN_COMMON_UCASE, UCLN_COMMON_UCHAR, diff --git a/icu4c/source/common/uinit.c b/icu4c/source/common/uinit.c index 41e13849ea..e9f0d4eee4 100644 --- a/icu4c/source/common/uinit.c +++ b/icu4c/source/common/uinit.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * * -* Copyright (C) 2001-2004, International Business Machines * +* Copyright (C) 2001-2005, International Business Machines * * Corporation and others. All Rights Reserved. * * * ****************************************************************************** @@ -102,6 +102,10 @@ u_init(UErrorCode *status) { /* Char Properties */ uprv_loadPropsData(status); + /* load the case and bidi properties but don't fail if they are not available */ + u_isULowercase(0x61); + u_getIntPropertyValue(0x200D, UCHAR_JOINING_TYPE); /* ZERO WIDTH JOINER: Join_Causing */ + #if !UCONFIG_NO_NORMALIZATION /* Normalization */ unorm_haveData(status); diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c index 006c4be9ac..96a61330ce 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.c @@ -25,13 +25,136 @@ #include "unicode/uchar.h" #include "unicode/uscript.h" #include "cstring.h" +#include "ucln_cmn.h" +#include "umutex.h" #include "unormimp.h" #include "ubidi_props.h" #include "uprops.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -/* API functions ------------------------------------------------------------ */ +/* cleanup ------------------------------------------------------------------ */ + +static UCaseProps *gCsp=NULL; +static UBiDiProps *gBdp=NULL; + +static UBool U_CALLCONV uprops_cleanup(void) { + gCsp=NULL; + gBdp=NULL; + return TRUE; +} + +/* case mapping properties API ---------------------------------------------- */ + +/* get the UCaseProps singleton, or else its dummy, once and for all */ +static UCaseProps * +getCaseProps() { + /* + * This lazy intialization with double-checked locking (without mutex protection for + * the initial check) is transiently unsafe under certain circumstances. + * Check the readme and use u_init() if necessary. + */ + + /* the initial check is performed by the GET_CASE_PROPS() macro */ + UCaseProps *csp; + UErrorCode errorCode=U_ZERO_ERROR; + + csp=ucase_getSingleton(&errorCode); + if(U_FAILURE(errorCode)) { + errorCode=U_ZERO_ERROR; + csp=ucase_getDummy(&errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + } + + umtx_lock(NULL); + if(gCsp==NULL) { + gCsp=csp; + csp=NULL; + ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup); + } + umtx_unlock(NULL); + + return gCsp; +} + +#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps()) + +/* public API (see uchar.h) */ + +U_CAPI UBool U_EXPORT2 +u_isULowercase(UChar32 c) { + return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c)); +} + +U_CAPI UBool U_EXPORT2 +u_isUUppercase(UChar32 c) { + return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c)); +} + +/* Transforms the Unicode character to its lower case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_tolower(UChar32 c) { + return ucase_tolower(GET_CASE_PROPS(), c); +} + +/* Transforms the Unicode character to its upper case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_toupper(UChar32 c) { + return ucase_toupper(GET_CASE_PROPS(), c); +} + +/* Transforms the Unicode character to its title case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_totitle(UChar32 c) { + return ucase_totitle(GET_CASE_PROPS(), c); +} + +/* return the simple case folding mapping for c */ +U_CAPI UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options) { + return ucase_fold(GET_CASE_PROPS(), c, options); +} + +/* bidi/shaping properties API ---------------------------------------------- */ + +/* get the UBiDiProps singleton, or else its dummy, once and for all */ +static UBiDiProps * +getBiDiProps() { + /* + * This lazy intialization with double-checked locking (without mutex protection for + * the initial check) is transiently unsafe under certain circumstances. + * Check the readme and use u_init() if necessary. + */ + + /* the initial check is performed by the GET_BIDI_PROPS() macro */ + UBiDiProps *bdp; + UErrorCode errorCode=U_ZERO_ERROR; + + bdp=ubidi_getSingleton(&errorCode); + if(U_FAILURE(errorCode)) { + errorCode=U_ZERO_ERROR; + bdp=ubidi_getDummy(&errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + } + + umtx_lock(NULL); + if(gBdp==NULL) { + gBdp=bdp; + bdp=NULL; + ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup); + } + umtx_unlock(NULL); + + return gBdp; +} + +#define GET_BIDI_PROPS() (gBdp!=NULL ? gBdp : getBiDiProps()) + +/* general properties API functions ----------------------------------------- */ static const struct { int32_t column; @@ -105,9 +228,8 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { } else { if(column==UPROPS_SRC_CASE) { /* case mapping properties */ - UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(U_FAILURE(errorCode)) { + UCaseProps *csp=GET_CASE_PROPS(); + if(csp==NULL) { return FALSE; } switch(which) { @@ -141,9 +263,8 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { #endif } else if(column==UPROPS_SRC_BIDI) { /* bidi/shaping properties */ - UErrorCode errorCode=U_ZERO_ERROR; - UBiDiProps *bdp=ubidi_getSingleton(&errorCode); - if(U_FAILURE(errorCode)) { + UBiDiProps *bdp=GET_BIDI_PROPS(); + if(bdp==NULL) { return FALSE; } switch(which) { @@ -164,7 +285,6 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { - UBiDiProps *bdp; UErrorCode errorCode; int32_t type; @@ -193,21 +313,9 @@ u_getIntPropertyValue(UChar32 c, UProperty which) { case UCHAR_GENERAL_CATEGORY: return (int32_t)u_charType(c); case UCHAR_JOINING_GROUP: - errorCode=U_ZERO_ERROR; - bdp=ubidi_getSingleton(&errorCode); - if(bdp!=NULL) { - return ubidi_getJoiningGroup(bdp, c); - } else { - return 0; - } + return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); case UCHAR_JOINING_TYPE: - errorCode=U_ZERO_ERROR; - bdp=ubidi_getSingleton(&errorCode); - if(bdp!=NULL) { - return ubidi_getJoiningType(bdp, c); - } else { - return 0; - } + return ubidi_getJoiningType(GET_BIDI_PROPS(), c); case UCHAR_LINE_BREAK: return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT; case UCHAR_NUMERIC_TYPE: @@ -256,8 +364,6 @@ u_getIntPropertyMinValue(UProperty which) { U_CAPI int32_t U_EXPORT2 u_getIntPropertyMaxValue(UProperty which) { - UErrorCode errorCode; - if(which>UPROPS_BLOCK_SHIFT; case UCHAR_CANONICAL_COMBINING_CLASS: diff --git a/icu4c/source/common/utrie.c b/icu4c/source/common/utrie.c index dab992f017..49db19002c 100644 --- a/icu4c/source/common/utrie.c +++ b/icu4c/source/common/utrie.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2004, International Business Machines +* Copyright (C) 2001-2005, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -889,8 +889,8 @@ defaultGetFoldingOffset(uint32_t data) { U_CAPI int32_t U_EXPORT2 utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { - UTrieHeader *header; - uint16_t *p16; + const UTrieHeader *header; + const uint16_t *p16; uint32_t options; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { @@ -904,7 +904,7 @@ utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pEr } /* check the signature */ - header=(UTrieHeader *)data; + header=(const UTrieHeader *)data; if(header->signature!=0x54726965) { *pErrorCode=U_INVALID_FORMAT_ERROR; return -1; @@ -931,7 +931,7 @@ utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pEr *pErrorCode=U_INVALID_FORMAT_ERROR; return -1; } - p16=(uint16_t *)(header+1); + p16=(const uint16_t *)(header+1); trie->index=p16; p16+=trie->indexLength; length-=2*trie->indexLength; @@ -962,6 +962,121 @@ utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pEr return length; } +U_CAPI int32_t U_EXPORT2 +utrie_unserializeDummy(UTrie *trie, + void *data, int32_t length, + uint32_t initialValue, uint32_t leadUnitValue, + UBool make16BitTrie, + UErrorCode *pErrorCode) { + uint16_t *p16; + int32_t actualLength, latin1Length, i, limit; + uint16_t block; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return -1; + } + + /* calculate the actual size of the dummy trie data */ + + /* max(Latin-1, block 0) */ + latin1Length= UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH; + + trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT; + trie->dataLength=latin1Length; + if(leadUnitValue!=initialValue) { + trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH; + } + + actualLength=trie->indexLength*2; + if(make16BitTrie) { + actualLength+=trie->dataLength*2; + } else { + actualLength+=trie->dataLength*4; + } + + /* enough space for the dummy trie? */ + if(lengthisLatin1Linear=TRUE; + trie->initialValue=initialValue; + + /* fill the index and data arrays */ + p16=(uint16_t *)data; + trie->index=p16; + + if(make16BitTrie) { + /* indexes to block 0 */ + block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT); + limit=trie->indexLength; + for(i=0; i>UTRIE_INDEX_SHIFT); + i=0xd800>>UTRIE_SHIFT; + limit=0xdc00>>UTRIE_SHIFT; + for(; idata32=NULL; + + /* Latin-1 data */ + p16+=trie->indexLength; + for(i=0; iindexLength*2); + + if(leadUnitValue!=initialValue) { + /* indexes for lead surrogate code units to the block after Latin-1 */ + block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); + i=0xd800>>UTRIE_SHIFT; + limit=0xdc00>>UTRIE_SHIFT; + for(; idata32=p32=(uint32_t *)(p16+trie->indexLength); + + /* Latin-1 data */ + for(i=0; igetFoldingOffset=defaultGetFoldingOffset; + + return actualLength; +} + /* swapping ----------------------------------------------------------------- */ U_CAPI int32_t U_EXPORT2 diff --git a/icu4c/source/common/utrie.h b/icu4c/source/common/utrie.h index 0492f0f0bf..187bbd44ef 100644 --- a/icu4c/source/common/utrie.h +++ b/icu4c/source/common/utrie.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2004, International Business Machines +* Copyright (C) 2001-2005, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -110,6 +110,23 @@ enum { */ #define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400) +/** + * Number of bytes for a dummy trie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * The number of bytes works for Latin-1-linear tries with 32-bit data + * (worst case). + * + * Calculation: + * BMP index + 1 index block for lead surrogate code points + + * Latin-1-linear array + 1 data block for lead surrogate code points + * + * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block } + * + * @see utrie_unserializeDummy + */ +#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4) + /** * Runtime UTrie callback function. * Extract from a lead surrogate's data the @@ -483,6 +500,33 @@ utrie_enum(const UTrie *trie, U_CAPI int32_t U_EXPORT2 utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode); +/** + * "Unserialize" a dummy trie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * + * The input memory is filled so that the trie always returns the initialValue, + * or the leadUnitValue for lead surrogate code points. + * The Latin-1 part is always set up to be linear. + * + * @param trie a pointer to the runtime trie structure + * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data + * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE) + * @param initialValue the initial value that is set for all code points + * @param leadUnitValue the value for lead surrogate code _units_ that do not + * have associated supplementary data + * @param pErrorCode an in/out ICU UErrorCode + * + * @see UTRIE_DUMMY_SIZE + * @see utrie_open + */ +U_CAPI int32_t U_EXPORT2 +utrie_unserializeDummy(UTrie *trie, + void *data, int32_t length, + uint32_t initialValue, uint32_t leadUnitValue, + UBool make16BitTrie, + UErrorCode *pErrorCode); + /* Building a trie ----------------------------------------------------------*/ /** diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index 604d21b11d..9324eb6f06 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -2913,6 +2913,13 @@ static void TestUCase() { ucase_close(csp); udata_close(pData); + + /* coverage for ucase_getDummy() */ + errorCode=U_ZERO_ERROR; + csp=ucase_getDummy(&errorCode); + if(ucase_tolower(csp, 0x41)!=0x41) { + log_err("ucase_tolower(dummy, A)!=A\n"); + } } /* API coverage for ubidi_props.c */ @@ -2944,4 +2951,11 @@ static void TestUBiDiProps() { ubidi_closeProps(bdp); udata_close(pData); + + /* coverage for ubidi_getDummy() */ + errorCode=U_ZERO_ERROR; + bdp=ubidi_getDummy(&errorCode); + if(ubidi_getClass(bdp, 0x20)!=0) { + log_err("ubidi_getClass(dummy, space)!=0\n"); + } } diff --git a/icu4c/source/test/cintltst/trietest.c b/icu4c/source/test/cintltst/trietest.c index 61c9e62b5c..56dea8a39b 100644 --- a/icu4c/source/test/cintltst/trietest.c +++ b/icu4c/source/test/cintltst/trietest.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2003, International Business Machines +* Copyright (C) 2001-2005, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -781,13 +781,77 @@ TrieTest(void) { checkRanges3, ARRAY_LENGTH(checkRanges3)); } -#if 1 -void -addTrieTest(TestNode** root); +/* test utrie_unserializeDummy() -------------------------------------------- */ +static int32_t U_CALLCONV +dummyGetFoldingOffset(uint32_t data) { + return -1; /* never get non-initialValue data for supplementary code points */ +} + +static void +dummyTest(UBool make16BitTrie) { + static int32_t mem[UTRIE_DUMMY_SIZE/4]; + + UTrie trie; + UErrorCode errorCode; + UChar32 c; + + uint32_t value, initialValue, leadUnitValue; + + if(make16BitTrie) { + initialValue=0x313; + leadUnitValue=0xaffe; + } else { + initialValue=0x01234567; + leadUnitValue=0x89abcdef; + } + + errorCode=U_ZERO_ERROR; + utrie_unserializeDummy(&trie, mem, sizeof(mem), initialValue, leadUnitValue, make16BitTrie, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("utrie_unserializeDummy(make16BitTrie=%d) failed - %s\n", make16BitTrie, u_errorName(errorCode)); + return; + } + trie.getFoldingOffset=dummyGetFoldingOffset; + + /* test that all code points have initialValue */ + for(c=0; c<=0x10ffff; ++c) { + if(make16BitTrie) { + UTRIE_GET16(&trie, c, value); + } else { + UTRIE_GET32(&trie, c, value); + } + if(value!=initialValue) { + log_err("UTRIE_GET%s(dummy, U+%04lx)=0x%lx instead of 0x%lx\n", + make16BitTrie ? "16" : "32", (long)c, (long)value, (long)initialValue); + } + } + + /* test that the lead surrogate code units have leadUnitValue */ + for(c=0xd800; c<=0xdbff; ++c) { + if(make16BitTrie) { + value=UTRIE_GET16_FROM_LEAD(&trie, c); + } else { + value=UTRIE_GET32_FROM_LEAD(&trie, c); + } + if(value!=leadUnitValue) { + log_err("UTRIE_GET%s_FROM_LEAD(dummy, U+%04lx)=0x%lx instead of 0x%lx\n", + make16BitTrie ? "16" : "32", (long)c, (long)value, (long)leadUnitValue); + } + } +} + +static void +DummyTrieTest(void) { + dummyTest(TRUE); + dummyTest(FALSE); +} + +#if 1 void addTrieTest(TestNode** root) { addTest(root, &TrieTest, "tsutil/trietest/TrieTest"); + addTest(root, &DummyTrieTest, "tsutil/trietest/DummyTrieTest"); } #else /* standalone utrie development */