/* ******************************************************************************* * * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: unames.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999oct04 * created by: Markus W. Scherer */ /* set import/export definitions */ #ifndef U_COMMON_IMPLEMENTATION # define U_COMMON_IMPLEMENTATION #endif #include "unicode/utypes.h" #include "umutex.h" #include "cmemory.h" #include "unicode/uchar.h" #include "unicode/udata.h" /* prototypes --------------------------------------------------------------- */ #define DATA_NAME "unames" #define DATA_TYPE "dat" #define GROUP_SHIFT 5 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT) #define GROUP_MASK (LINES_PER_GROUP-1) typedef struct { uint16_t groupMSB, offsetHigh, offsetLow; /* avoid padding */ } Group; typedef struct { uint32_t start, end; uint8_t type, variant; uint16_t size; } AlgorithmicRange; typedef struct { uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset; } UCharNames; static UDataMemory *uCharNamesData=NULL; static UCharNames *uCharNames=NULL; static UBool isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); static uint16_t getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength); static uint16_t expandGroupName(UCharNames *names, Group *group, uint16_t lineNumber, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength); static uint16_t expandName(UCharNames *names, uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength); static uint16_t getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength); /* public API --------------------------------------------------------------- */ U_CAPI UTextOffset U_EXPORT2 u_charName(uint32_t code, UCharNameChoice nameChoice, char *buffer, UTextOffset bufferLength, UErrorCode *pErrorCode) { AlgorithmicRange *algRange; uint32_t *p; uint32_t i; /* check the argument values */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || buffer==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(code>0x10ffff) { return 0; } /* load UCharNames from file if necessary */ if(uCharNames==NULL) { UCharNames *names; UDataMemory *data; /* open the data outside the mutex block */ data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } names=(UCharNames *)udata_getMemory(data); /* in the mutex block, set the data for this process */ { umtx_lock(NULL); if(uCharNames==NULL) { uCharNames=names; uCharNamesData=data; data=NULL; names=NULL; } umtx_unlock(NULL); } /* if a different thread set it first, then close the extra data */ if(data!=NULL) { udata_close(data); /* NULL if it was set correctly */ } } /* try algorithmic names first */ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); i=*p; algRange=(AlgorithmicRange *)(p+1); while(i>0) { if(algRange->start<=code && code<=algRange->end) { return getAlgName(algRange, code, nameChoice, buffer, (uint16_t)bufferLength); } algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); --i; } /* normal character name */ return getName(uCharNames, code, nameChoice, buffer, (uint16_t)bufferLength); } /* ### TBD */ U_CAPI UChar32 U_EXPORT2 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } *pErrorCode=U_UNSUPPORTED_ERROR; return 0xffff; } U_CAPI void U_EXPORT2 u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } *pErrorCode=U_UNSUPPORTED_ERROR; } /* implementation ----------------------------------------------------------- */ static UBool isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo) { return (UBool)( pInfo->size>=20 && pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY && pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==1); } static uint16_t getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), start=0, limit=*(uint16_t *)((char *)names+names->groupsOffset), number; Group *groups=(Group *)((char *)names+names->groupsOffset+2); /* binary search for the group of names that contains the one for code */ while(start<limit-1) { number=(uint16_t)((start+limit)/2); if(groupMSB<groups[number].groupMSB) { limit=number; } else { start=number; } } if(groupMSB==groups[start].groupMSB) { return expandGroupName(names, groups+start, (uint16_t)(code&GROUP_MASK), nameChoice, buffer, bufferLength); } else { /* group not found */ /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return 0; } } static uint16_t expandGroupName(UCharNames *names, Group *group, uint16_t lineNumber, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint8_t *s=(uint8_t *)names+names->groupStringOffset+ (group->offsetHigh<<16|group->offsetLow); /* read the length of this string and get the group strings offset */ uint16_t i=0, offset=0, length=0, nameOffset=0, nameLength=0; uint8_t lengthByte; /* all 32 lengths must be read to get the offset of the first group string */ while(i<LINES_PER_GROUP) { lengthByte=*s++; /* read even nibble - MSBs of lengthByte */ if(length>=12) { /* double-nibble length spread across two bytes */ length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); lengthByte&=0xf; } else if((lengthByte&0xf0)>=0xc0) { /* double-nibble length spread across this one byte */ length=(uint16_t)((lengthByte&0x3f)+12); } else { /* single-nibble length in MSBs */ length=(uint16_t)(lengthByte>>4); lengthByte&=0xf; } if(i==lineNumber) { nameOffset=offset; nameLength=length; } offset+=length; ++i; /* read odd nibble - LSBs of lengthByte */ if((lengthByte&0xf0)==0) { /* this nibble was not consumed for a double-nibble length above */ length=lengthByte; if(length<12) { /* single-nibble length in LSBs */ if(i==lineNumber) { nameOffset=offset; nameLength=length; } offset+=length; ++i; } } else { length=0; /* prevent double-nibble detection in the next iteration */ } } return expandName(names, s+nameOffset, nameLength, nameChoice, buffer, bufferLength); } #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ if((bufferLength)>0) { \ *(buffer)++=c; \ --(bufferLength); \ } \ ++(bufferPos); \ } static uint16_t expandName(UCharNames *names, uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t *tokens=(uint16_t *)names+8; uint16_t token, tokenCount=*tokens++, bufferPos=0; uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; uint8_t c; if(nameChoice!=U_UNICODE_CHAR_NAME) { /* skip the modern name */ while(nameLength>0) { --nameLength; if(*name++==';') { break; } } } /* write each letter directly, and write a token word per token */ while(nameLength>0) { --nameLength; c=*name++; if(c==';') { /* finished */ break; } if(c>=tokenCount) { /* implicit letter */ WRITE_CHAR(buffer, bufferLength, bufferPos, c); } else { token=tokens[c]; if(token==(uint16_t)(-2)) { /* this is a lead byte for a double-byte token */ token=tokens[c<<8|*name++]; --nameLength; } if(token==(uint16_t)(-1)) { /* explicit letter */ WRITE_CHAR(buffer, bufferLength, bufferPos, c); } else { /* write token word */ uint8_t *tokenString=tokenStrings+token; while((c=*tokenString++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } } } } /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return bufferPos; } static uint16_t getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t bufferPos=0; switch(range->type) { case 0: { /* name = prefix hex-digits */ char *s=(char *)(range+1); char c; uint16_t i, count; /* copy prefix */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } /* write hexadecimal code point value */ count=range->variant; /* zero-terminate */ if(count<bufferLength) { buffer[count]=0; } for(i=count; i>0;) { if(--i<bufferLength) { c=(char)(code&0xf); if(c<10) { c+='0'; } else { c+='A'-10; } buffer[i]=c; } code>>=4; } bufferPos+=count; break; } case 1: { /* name = prefix factorized-elements */ uint16_t *factors=(uint16_t *)(range+1); char *s=(char *)(factors+range->variant); char c; uint16_t indeces[8]; uint16_t i, count, factor; /* copy prefix */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } /* write elements according to the factors */ code-=range->start; /* * the factorized elements are determined by modulo arithmetic * with the factors of this algorithm * * note that for fewer operations, count is decremented here */ count=(uint16_t)(range->variant-1); for(i=count; i>0; --i) { factor=factors[i]; indeces[i]=(uint16_t)(code%factor); code/=factor; } /* * we don't need to calculate the last modulus because start<=code<=end * guarantees here that code<=factors[0] */ indeces[0]=(uint16_t)code; /* write each element */ for(;;) { /* skip indeces[i] strings */ factor=indeces[i]; while(factor>0) { while(*s++!=0) {} --factor; } /* write element */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } /* we do not need to perform the rest of this loop for i==count - break here */ if(i>=count) { break; } /* skip the rest of the strings for this factors[i] */ factor=(uint16_t)(factors[i]-indeces[i]-1); while(factor>0) { while(*s++!=0) {} --factor; } ++i; } /* zero-terminate */ if(bufferLength>0) { *buffer=0; } break; } default: /* undefined type */ /* zero-terminate */ if(bufferLength>0) { *buffer=0; } break; } return bufferPos; }