scuffed-code/icu4c/source/common/propname.cpp
2009-11-21 22:04:55 +00:00

753 lines
25 KiB
C++

/*
**********************************************************************
* Copyright (c) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
**********************************************************************
*/
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "ucln_cmn.h"
#include "uarrsort.h"
U_CDECL_BEGIN
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------
// PropertyAliases implementation
const char*
PropertyAliases::chooseNameInGroup(Offset offset,
UPropertyNameChoice choice) const {
int32_t c = choice;
if (!offset || c < 0) {
return NULL;
}
const Offset* p = (const Offset*) getPointer(offset);
while (c-- > 0) {
if (*p++ < 0) return NULL;
}
Offset a = *p;
if (a < 0) a = -a;
return (const char*) getPointerNull(a);
}
const ValueMap*
PropertyAliases::getValueMap(EnumValue prop) const {
NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
Offset a = e2o->getOffset(prop);
return (const ValueMap*) (a ? getPointerNull(a) : NULL);
}
inline const char*
PropertyAliases::getPropertyName(EnumValue prop,
UPropertyNameChoice choice) const {
NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
return chooseNameInGroup(e2n->getOffset(prop), choice);
}
inline EnumValue
PropertyAliases::getPropertyEnum(const char* alias) const {
NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
return n2e->getEnum(alias, *this);
}
inline const char*
PropertyAliases::getPropertyValueName(EnumValue prop,
EnumValue value,
UPropertyNameChoice choice) const {
const ValueMap* vm = getValueMap(prop);
if (!vm) return NULL;
Offset a;
if (vm->enumToName_offset) {
a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
getOffset(value);
} else {
a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
getOffset(value);
}
return chooseNameInGroup(a, choice);
}
inline EnumValue
PropertyAliases::getPropertyValueEnum(EnumValue prop,
const char* alias) const {
const ValueMap* vm = getValueMap(prop);
if (!vm) return UCHAR_INVALID_CODE;
NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
return n2e->getEnum(alias, *this);
}
U_NAMESPACE_END
U_NAMESPACE_USE
//----------------------------------------------------------------------
// UDataMemory structures
static const PropertyAliases* PNAME = NULL;
static UDataMemory* UDATA = NULL;
//----------------------------------------------------------------------
// UDataMemory loading/unloading
/**
* udata callback to verify the zone data.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV
isPNameAcceptable(void* /*context*/,
const char* /*type*/, const char* /*name*/,
const UDataInfo* info) {
return
info->size >= sizeof(UDataInfo) &&
info->isBigEndian == U_IS_BIG_ENDIAN &&
info->charsetFamily == U_CHARSET_FAMILY &&
info->dataFormat[0] == PNAME_SIG_0 &&
info->dataFormat[1] == PNAME_SIG_1 &&
info->dataFormat[2] == PNAME_SIG_2 &&
info->dataFormat[3] == PNAME_SIG_3 &&
info->formatVersion[0] == PNAME_FORMAT_VERSION;
}
static UBool U_CALLCONV pname_cleanup(void) {
if (UDATA) {
udata_close(UDATA);
UDATA = NULL;
}
PNAME = NULL;
return TRUE;
}
U_CDECL_END
/**
* Load the property names data. Caller should check that data is
* not loaded BEFORE calling this function. Returns TRUE if the load
* succeeds.
*/
static UBool _load() {
UErrorCode ec = U_ZERO_ERROR;
UDataMemory* data =
udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
isPNameAcceptable, 0, &ec);
if (U_SUCCESS(ec)) {
umtx_lock(NULL);
if (UDATA == NULL) {
UDATA = data;
PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
ucln_common_registerCleanup(UCLN_COMMON_PNAME, pname_cleanup);
data = NULL;
}
umtx_unlock(NULL);
}
if (data) {
udata_close(data);
}
return PNAME!=NULL;
}
/**
* Inline function that expands to code that does a lazy load of the
* property names data. If the data is already loaded, avoids an
* unnecessary function call. If the data is not loaded, call _load()
* to load it, and return TRUE if the load succeeds.
*/
static inline UBool load() {
UBool f;
UMTX_CHECK(NULL, (PNAME!=NULL), f);
return f || _load();
}
//----------------------------------------------------------------------
// Public API implementation
// The C API is just a thin wrapper. Each function obtains a pointer
// to the singleton PropertyAliases, and calls the appropriate method
// on it. If it cannot obtain a pointer, because valid data is not
// available, then it returns NULL or UCHAR_INVALID_CODE.
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
return load() ? PNAME->getPropertyName(property, nameChoice)
: NULL;
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
: UCHAR_INVALID_CODE;
return p;
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
: NULL;
}
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
return load() ? PNAME->getPropertyValueEnum(property, alias)
: (int32_t)UCHAR_INVALID_CODE;
}
/* data swapping ------------------------------------------------------------ */
/*
* Sub-structure-swappers use the temp array (which is as large as the
* actual data) for intermediate storage,
* as well as to indicate if a particular structure has been swapped already.
* The temp array is initially reset to all 0.
* pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays.
*/
int32_t
EnumToOffset::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const EnumToOffset *inMap;
EnumToOffset *outMap, *tempMap;
int32_t size;
tempMap=(EnumToOffset *)(temp+pos);
if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const EnumToOffset *)(inBytes+pos);
outMap=(EnumToOffset *)(outBytes+pos);
tempMap->enumStart=udata_readInt32(ds, inMap->enumStart);
tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n"
" for pnames.icu EnumToOffset{%d..%d} at %d\n",
length, tempMap->enumStart, tempMap->enumLimit, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap enumStart and enumLimit */
ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode);
/* swap _offsetArray[] */
ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset),
outMap->getOffsetArray(), pErrorCode);
}
return size;
}
int32_t
NonContiguousEnumToOffset::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const NonContiguousEnumToOffset *inMap;
NonContiguousEnumToOffset *outMap, *tempMap;
int32_t size;
tempMap=(NonContiguousEnumToOffset *)(temp+pos);
if(tempMap->count!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const NonContiguousEnumToOffset *)(inBytes+pos);
outMap=(NonContiguousEnumToOffset *)(outBytes+pos);
tempMap->count=udata_readInt32(ds, inMap->count);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n"
" for pnames.icu NonContiguousEnumToOffset[%d] at %d\n",
length, tempMap->count, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap count and _enumArray[] */
length=(1+tempMap->count)*sizeof(EnumValue);
ds->swapArray32(ds, inMap, length,
outMap, pErrorCode);
/* swap _offsetArray[] */
pos+=length;
ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset),
outBytes+pos, pErrorCode);
}
return size;
}
struct NameAndIndex {
Offset name, index;
};
U_CDECL_BEGIN
typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
struct CompareContext {
const char *chars;
PropNameCompareFn *propCompare;
};
static int32_t U_CALLCONV
upname_compareRows(const void *context, const void *left, const void *right) {
CompareContext *cmp=(CompareContext *)context;
return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
cmp->chars+((const NameAndIndex *)right)->name);
}
U_CDECL_END
int32_t
NameToEnum::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const NameToEnum *inMap;
NameToEnum *outMap, *tempMap;
const EnumValue *inEnumArray;
EnumValue *outEnumArray;
const Offset *inNameArray;
Offset *outNameArray;
NameAndIndex *sortArray;
CompareContext cmp;
int32_t i, size, oldIndex;
tempMap=(NameToEnum *)(temp+pos);
if(tempMap->count!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const NameToEnum *)(inBytes+pos);
outMap=(NameToEnum *)(outBytes+pos);
tempMap->count=udata_readInt32(ds, inMap->count);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
" for pnames.icu NameToEnum[%d] at %d\n",
length, tempMap->count, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap count */
ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);
inEnumArray=inMap->getEnumArray();
outEnumArray=outMap->getEnumArray();
inNameArray=(const Offset *)(inEnumArray+tempMap->count);
outNameArray=(Offset *)(outEnumArray+tempMap->count);
if(ds->inCharset==ds->outCharset) {
/* no need to sort, just swap the enum/name arrays */
ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
return size;
}
/*
* The name and enum arrays are sorted by names and must be resorted
* if inCharset!=outCharset.
* We use the corresponding part of the temp array to sort an array
* of pairs of name offsets and sorting indexes.
* Then the sorting indexes are used to permutate-swap the name and enum arrays.
*
* The outBytes must already contain the swapped strings.
*/
sortArray=(NameAndIndex *)tempMap->getEnumArray();
for(i=0; i<tempMap->count; ++i) {
sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
sortArray[i].index=(Offset)i;
}
/*
* use a stable sort to avoid shuffling of equal strings,
* which makes testing harder
*/
cmp.chars=(const char *)outBytes;
if (ds->outCharset==U_ASCII_FAMILY) {
cmp.propCompare=uprv_compareASCIIPropertyNames;
}
else {
cmp.propCompare=uprv_compareEBCDICPropertyNames;
}
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
upname_compareRows, &cmp,
TRUE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n",
tempMap->count);
return 0;
}
/* copy/swap/permutate _enumArray[] and _nameArray[] */
if(inEnumArray!=outEnumArray) {
for(i=0; i<tempMap->count; ++i) {
oldIndex=sortArray[i].index;
ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
}
} else {
/*
* in-place swapping: need to permutate into a temporary array
* and then copy back to not destroy the data
*/
EnumValue *tempEnumArray;
Offset *oldIndexes;
/* write name offsets directly from sortArray */
for(i=0; i<tempMap->count; ++i) {
ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name);
}
/*
* compress the oldIndexes into a separate array to make space for tempEnumArray
* the tempMap _nameArray becomes oldIndexes[], getting the index
* values from the 2D sortArray[],
* while sortArray=tempMap _enumArray[] becomes tempEnumArray[]
* this saves us allocating more memory
*
* it works because sizeof(NameAndIndex)<=sizeof(EnumValue)
* and because the nameArray[] can be used for oldIndexes[]
*/
tempEnumArray=(EnumValue *)sortArray;
oldIndexes=(Offset *)(sortArray+tempMap->count);
/* copy sortArray[].index values into oldIndexes[] */
for(i=0; i<tempMap->count; ++i) {
oldIndexes[i]=sortArray[i].index;
}
/* permutate inEnumArray[] into tempEnumArray[] */
for(i=0; i<tempMap->count; ++i) {
ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode);
}
/* copy tempEnumArray[] to outEnumArray[] */
uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4);
}
}
return size;
}
int32_t
PropertyAliases::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
UErrorCode *pErrorCode) {
const PropertyAliases *inAliases;
PropertyAliases *outAliases;
PropertyAliases aliases;
const ValueMap *inValueMaps;
ValueMap *outValueMaps;
ValueMap valueMap;
int32_t i;
inAliases=(const PropertyAliases *)inBytes;
outAliases=(PropertyAliases *)outBytes;
/* read the input PropertyAliases - all 16-bit values */
for(i=0; i<(int32_t)sizeof(PropertyAliases)/2; ++i) {
((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]);
}
if(length>=0) {
if(length<aliases.total_size) {
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
/* copy the data for inaccessible bytes */
if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, aliases.total_size);
}
/* swap the PropertyAliases class fields */
ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode);
/* swap the name groups */
ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset,
aliases.stringPool_offset-aliases.nameGroupPool_offset,
outBytes+aliases.nameGroupPool_offset, pErrorCode);
/* swap the strings */
udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset,
aliases.total_size-aliases.stringPool_offset,
outBytes+aliases.stringPool_offset, pErrorCode);
/*
* alloc uint8_t temp[total_size] and reset it
* swap each top-level struct, put at least the count fields into temp
* use subclass-specific swap() functions
* enumerate value maps, for each
* if temp does not have count!=0 yet
* read count, put it into temp
* swap the array(s)
* resort strings in name->enum maps
* swap value maps
*/
LocalMemory<uint8_t> temp;
if(temp.allocateInsteadAndReset(aliases.total_size)==NULL) {
udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n",
aliases.total_size);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
/* swap properties->name groups map */
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp.getAlias(), aliases.enumToName_offset, pErrorCode);
/* swap name->properties map */
NameToEnum::swap(ds, inBytes, length, outBytes,
temp.getAlias(), aliases.nameToEnum_offset, pErrorCode);
/* swap properties->value maps map */
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp.getAlias(), aliases.enumToValue_offset, pErrorCode);
/* enumerate all ValueMaps and swap them */
inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset);
outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset);
for(i=0; i<aliases.valueMap_count; ++i) {
valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset);
valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset);
valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset);
if(valueMap.enumToName_offset!=0) {
EnumToOffset::swap(ds, inBytes, length, outBytes,
temp.getAlias(), valueMap.enumToName_offset,
pErrorCode);
} else if(valueMap.ncEnumToName_offset!=0) {
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp.getAlias(), valueMap.ncEnumToName_offset,
pErrorCode);
}
if(valueMap.nameToEnum_offset!=0) {
NameToEnum::swap(ds, inBytes, length, outBytes,
temp.getAlias(), valueMap.nameToEnum_offset,
pErrorCode);
}
}
/* swap the ValueMaps array itself */
ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap),
outValueMaps, pErrorCode);
/* name groups and strings were swapped above */
}
return aliases.total_size;
}
U_CAPI int32_t U_EXPORT2
upname_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const UDataInfo *pInfo;
int32_t headerSize;
const uint8_t *inBytes;
uint8_t *outBytes;
/* udata_swapDataHeader checks the arguments */
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
/* check data format and format version */
pInfo=(const UDataInfo *)((const char *)inData+4);
if(!(
pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x61 &&
pInfo->dataFormat[3]==0x6d &&
pInfo->formatVersion[0]==1
)) {
udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR;
return 0;
}
inBytes=(const uint8_t *)inData+headerSize;
outBytes=(uint8_t *)outData+headerSize;
if(length>=0) {
length-=headerSize;
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode);
}
//eof