diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index 98387308b6..7cd1f42807 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -4494,4 +4494,123 @@ cleanup: return result; } +/* data swapping ------------------------------------------------------------ */ + +U_CAPI int32_t U_EXPORT2 +unorm_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + const int32_t *inIndexes; + int32_t indexes[32]; + + int32_t i, offset, count, size; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ + pInfo->dataFormat[1]==0x6f && + pInfo->dataFormat[2]==0x72 && + pInfo->dataFormat[3]==0x6d && + pInfo->formatVersion[0]==2 + )) { + udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + inIndexes=(const int32_t *)inBytes; + + if(length>=0) { + length-=headerSize; + if(length<32*4) { + udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ + for(i=0; i<32; ++i) { + indexes[i]=udata_readInt32(ds, inIndexes[i]); + } + + /* calculate the total length of the data */ + size= + 32*4+ /* size of indexes[] */ + indexes[_NORM_INDEX_TRIE_SIZE]+ + indexes[_NORM_INDEX_UCHAR_COUNT]*2+ + indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ + indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ + indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ + indexes[_NORM_INDEX_CANON_SET_COUNT]*2; + + if(length>=0) { + if(lengthswapArray32(ds, inBytes, count, outBytes, pErrorCode); + offset+=count; + + /* swap the main UTrie */ + count=indexes[_NORM_INDEX_TRIE_SIZE]; + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + + /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ + count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + + /* swap the FCD UTrie */ + count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + + /* swap the aux UTrie */ + count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; + ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + + /* swap the uint16_t combiningTable[] */ + count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + } + + return headerSize+size; +} + #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h index 284bca59e1..e3fe602c24 100644 --- a/icu4c/source/common/unormimp.h +++ b/icu4c/source/common/unormimp.h @@ -372,6 +372,15 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode); U_CAPI void U_EXPORT2 unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode); +/** + * Swap unorm.icu. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +unorm_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + /** * Description of the format of unorm.dat version 2.2. * @@ -391,7 +400,7 @@ unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode); * unorm.dat customarily begins with a UDataInfo structure, see udata.h and .c. * After that there are the following structures: * - * uint16_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file + * int32_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file * * UTrie normTrie; -- size in bytes=indexes[_NORM_INDEX_TRIE_SIZE] * diff --git a/icu4c/source/tools/icuswap/icuswap.cpp b/icu4c/source/tools/icuswap/icuswap.cpp index 59f8de759b..9ac20760ab 100644 --- a/icu4c/source/tools/icuswap/icuswap.cpp +++ b/icu4c/source/tools/icuswap/icuswap.cpp @@ -41,6 +41,7 @@ #include "uprops.h" #include "ucol_swp.h" #include "ucnv_bld.h" +#include "unormimp.h" /* swapping implementations in i18n */ @@ -285,6 +286,9 @@ static const struct { { { 0x43, 0x6d, 0x6e, 0x44 }, udata_swapPackage }, /* dataFormat="CmnD" */ /* insert data formats here, descending by expected frequency of occurrence */ { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ +#if !UCONFIG_NO_NORMALIZATION + { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ +#endif #if !UCONFIG_NO_COLLATION { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */