/* ********************************************************************** * Copyright (c) 2002-2003, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu * Created: October 30 2002 * Since: ICU 2.4 ********************************************************************** */ #include "propname.h" #include "unicode/uchar.h" #include "unicode/udata.h" #include "umutex.h" #include "cmemory.h" #include "cstring.h" #include "uarrsort.h" U_NAMESPACE_BEGIN //---------------------------------------------------------------------- // PropertyAliases implementation const char* PropertyAliases::chooseNameInGroup(Offset offset, UPropertyNameChoice choice) const { int32_t c = choice; if (!offset || c < 0) { return NULL; } const Offset* p = (const Offset*) getPointer(offset); while (c-- > 0) { if (*p++ < 0) return NULL; } Offset a = *p; if (a < 0) a = -a; return (const char*) getPointerNull(a); } const ValueMap* PropertyAliases::getValueMap(EnumValue prop) const { NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset); Offset a = e2o->getOffset(prop); return (const ValueMap*) (a ? getPointerNull(a) : NULL); } inline const char* PropertyAliases::getPropertyName(EnumValue prop, UPropertyNameChoice choice) const { NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset); return chooseNameInGroup(e2n->getOffset(prop), choice); } inline EnumValue PropertyAliases::getPropertyEnum(const char* alias) const { NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset); return n2e->getEnum(alias, *this); } inline const char* PropertyAliases::getPropertyValueName(EnumValue prop, EnumValue value, UPropertyNameChoice choice) const { const ValueMap* vm = getValueMap(prop); if (!vm) return NULL; Offset a; if (vm->enumToName_offset) { a = ((EnumToOffset*) getPointer(vm->enumToName_offset))-> getOffset(value); } else { a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))-> getOffset(value); } return chooseNameInGroup(a, choice); } inline EnumValue PropertyAliases::getPropertyValueEnum(EnumValue prop, const char* alias) const { const ValueMap* vm = getValueMap(prop); if (!vm) return UCHAR_INVALID_CODE; NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset); return n2e->getEnum(alias, *this); } U_NAMESPACE_END //---------------------------------------------------------------------- // UDataMemory structures static const PropertyAliases* PNAME = NULL; static UDataMemory* UDATA = NULL; //---------------------------------------------------------------------- // UDataMemory loading/unloading /** * udata callback to verify the zone data. */ U_CDECL_BEGIN static UBool U_CALLCONV isAcceptable(void* /*context*/, const char* /*type*/, const char* /*name*/, const UDataInfo* info) { return info->size >= sizeof(UDataInfo) && info->isBigEndian == U_IS_BIG_ENDIAN && info->charsetFamily == U_CHARSET_FAMILY && info->dataFormat[0] == PNAME_SIG_0 && info->dataFormat[1] == PNAME_SIG_1 && info->dataFormat[2] == PNAME_SIG_2 && info->dataFormat[3] == PNAME_SIG_3 && info->formatVersion[0] == PNAME_FORMAT_VERSION; } UBool pname_cleanup() { if (UDATA) { udata_close(UDATA); UDATA = NULL; } PNAME = NULL; return TRUE; } U_CDECL_END /** * Load the property names data. Caller should check that data is * not loaded BEFORE calling this function. Returns TRUE if the load * succeeds. */ static UBool _load() { UErrorCode ec = U_ZERO_ERROR; UDataMemory* data = udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME, isAcceptable, 0, &ec); if (U_SUCCESS(ec)) { umtx_lock(NULL); if (UDATA == NULL) { UDATA = data; PNAME = (const PropertyAliases*) udata_getMemory(UDATA); data = NULL; } umtx_unlock(NULL); } if (data) { udata_close(data); } return PNAME!=NULL; } /** * Inline function that expands to code that does a lazy load of the * property names data. If the data is already loaded, avoids an * unnecessary function call. If the data is not loaded, call _load() * to load it, and return TRUE if the load succeeds. */ static inline UBool load() { umtx_lock(NULL); UBool f = (PNAME!=NULL); umtx_unlock(NULL); return f || _load(); } //---------------------------------------------------------------------- // Public API implementation // The C API is just a thin wrapper. Each function obtains a pointer // to the singleton PropertyAliases, and calls the appropriate method // on it. If it cannot obtain a pointer, because valid data is not // available, then it returns NULL or UCHAR_INVALID_CODE. U_CAPI const char* U_EXPORT2 u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice) { return load() ? PNAME->getPropertyName(property, nameChoice) : NULL; } U_CAPI UProperty U_EXPORT2 u_getPropertyEnum(const char* alias) { UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias) : UCHAR_INVALID_CODE; return p; } U_CAPI const char* U_EXPORT2 u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice) { return load() ? PNAME->getPropertyValueName(property, value, nameChoice) : NULL; } U_CAPI int32_t U_EXPORT2 u_getPropertyValueEnum(UProperty property, const char* alias) { return load() ? PNAME->getPropertyValueEnum(property, alias) : UCHAR_INVALID_CODE; } /* data swapping ------------------------------------------------------------ */ /* * Sub-structure-swappers use the temp array (which is as large as the * actual data) for intermediate storage, * as well as to indicate if a particular structure has been swapped already. * The temp array is initially reset to all 0. * pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays. */ int32_t EnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const EnumToOffset *inMap; EnumToOffset *outMap, *tempMap; int32_t size; tempMap=(EnumToOffset *)(temp+pos); if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const EnumToOffset *)(inBytes+pos); outMap=(EnumToOffset *)(outBytes+pos); tempMap->enumStart=udata_readInt32(ds, inMap->enumStart); tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(lengthenumStart, tempMap->enumLimit, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap enumStart and enumLimit */ ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode); /* swap _offsetArray[] */ ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset), outMap->getOffsetArray(), pErrorCode); } return size; } int32_t NonContiguousEnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NonContiguousEnumToOffset *inMap; NonContiguousEnumToOffset *outMap, *tempMap; int32_t size; tempMap=(NonContiguousEnumToOffset *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NonContiguousEnumToOffset *)(inBytes+pos); outMap=(NonContiguousEnumToOffset *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(lengthcount, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count and _enumArray[] */ length=(1+tempMap->count)*sizeof(EnumValue); ds->swapArray32(ds, inMap, length, outMap, pErrorCode); /* swap _offsetArray[] */ pos+=length; ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset), outBytes+pos, pErrorCode); } return size; } struct NameAndIndex { Offset name, index; }; typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2); struct CompareContext { const char *chars; PropNameCompareFn *propCompare; }; static int32_t upname_compareRows(const void *context, const void *left, const void *right) { CompareContext *cmp=(CompareContext *)context; return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name, cmp->chars+((const NameAndIndex *)right)->name); } int32_t NameToEnum::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NameToEnum *inMap; NameToEnum *outMap, *tempMap; const EnumValue *inEnumArray; EnumValue *outEnumArray; const Offset *inNameArray; Offset *outNameArray; NameAndIndex *sortArray; CompareContext cmp; int32_t i, size, oldIndex; tempMap=(NameToEnum *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NameToEnum *)(inBytes+pos); outMap=(NameToEnum *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(lengthcount, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count */ ds->swapArray32(ds, inMap, 4, outMap, pErrorCode); inEnumArray=inMap->getEnumArray(); outEnumArray=outMap->getEnumArray(); inNameArray=(const Offset *)(inEnumArray+tempMap->count); outNameArray=(Offset *)(outEnumArray+tempMap->count); if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap the enum/name arrays */ ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode); ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode); return size; } /* * The name and enum arrays are sorted by names and must be resorted * if inCharset!=outCharset. * We use the corresponding part of the temp array to sort an array * of pairs of name offsets and sorting indexes. * Then the sorting indexes are used to permutate-swap the name and enum arrays. * * The outBytes must already contain the swapped strings. */ sortArray=(NameAndIndex *)tempMap->getEnumArray(); for(i=0; icount; ++i) { sortArray[i].name=udata_readInt16(ds, inNameArray[i]); sortArray[i].index=(Offset)i; } /* * use a stable sort to avoid shuffling of equal strings, * which makes testing harder */ cmp.chars=(const char *)outBytes; cmp.propCompare= ds->outCharset==U_ASCII_FAMILY ? uprv_compareASCIIPropertyNames : uprv_compareEBCDICPropertyNames; uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex), upname_compareRows, &cmp, TRUE, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n", tempMap->count, u_errorName(*pErrorCode)); return 0; } /* copy/swap/permutate _enumArray[] and _nameArray[] */ if(inEnumArray!=outEnumArray) { for(i=0; icount; ++i) { oldIndex=sortArray[i].index; ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode); ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode); } } else { /* * in-place swapping: need to permutate into a temporary array * and then copy back to not destroy the data */ EnumValue *tempEnumArray; Offset *oldIndexes; /* write name offsets directly from sortArray */ for(i=0; icount; ++i) { ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name); } /* * compress the oldIndexes into a separate array to make space for tempEnumArray * the tempMap _nameArray becomes oldIndexes[], getting the index * values from the 2D sortArray[], * while sortArray=tempMap _enumArray[] becomes tempEnumArray[] * this saves us allocating more memory * * it works because sizeof(NameAndIndex)<=sizeof(EnumValue) * and because the nameArray[] can be used for oldIndexes[] */ tempEnumArray=(EnumValue *)sortArray; oldIndexes=(Offset *)(sortArray+tempMap->count); /* copy sortArray[].index values into oldIndexes[] */ for(i=0; icount; ++i) { oldIndexes[i]=sortArray[i].index; } /* permutate inEnumArray[] into tempEnumArray[] */ for(i=0; icount; ++i) { ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode); } /* copy tempEnumArray[] to outEnumArray[] */ uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4); } } return size; } int32_t PropertyAliases::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, UErrorCode *pErrorCode) { const PropertyAliases *inAliases; PropertyAliases *outAliases; PropertyAliases aliases; const ValueMap *inValueMaps; ValueMap *outValueMaps; ValueMap valueMap; uint8_t *temp; int32_t i; inAliases=(const PropertyAliases *)inBytes; outAliases=(PropertyAliases *)outBytes; /* read the input PropertyAliases - all 16-bit values */ for(i=0; ireadUInt16(((const uint16_t *)inBytes)[i]); } if(length>=0) { if(lengthswapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode); /* swap the name groups */ ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset, aliases.stringPool_offset-aliases.nameGroupPool_offset, outBytes+aliases.nameGroupPool_offset, pErrorCode); /* swap the strings */ udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset, aliases.total_size-aliases.stringPool_offset, outBytes+aliases.stringPool_offset, pErrorCode); /* * alloc uint8_t temp[total_size] and reset it * swap each top-level struct, put at least the count fields into temp * use subclass-specific swap() functions * enumerate value maps, for each * if temp does not have count!=0 yet * read count, put it into temp * swap the array(s) * resort strings in name->enum maps * swap value maps */ temp=(uint8_t *)uprv_malloc(aliases.total_size); if(temp==NULL) { udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n", aliases.total_size); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } uprv_memset(temp, 0, aliases.total_size); /* swap properties->name groups map */ NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes, temp, aliases.enumToName_offset, pErrorCode); /* swap name->properties map */ NameToEnum::swap(ds, inBytes, length, outBytes, temp, aliases.nameToEnum_offset, pErrorCode); /* swap properties->value maps map */ NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes, temp, aliases.enumToValue_offset, pErrorCode); /* enumerate all ValueMaps and swap them */ inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset); outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset); for(i=0; iswapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap), outValueMaps, pErrorCode); /* name groups and strings were swapped above */ /* release temp */ uprv_free(temp); } return aliases.total_size; } U_CAPI int32_t U_EXPORT2 upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; if(length>=0) { length-=headerSize; if(length