/* ****************************************************************************** * * Copyright (C) 1999-2001, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: unames.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999oct04 * created by: Markus W. Scherer */ /* set import/export definitions */ #ifndef U_COMMON_IMPLEMENTATION # define U_COMMON_IMPLEMENTATION #endif #include "unicode/utypes.h" #include "unicode/uchar.h" #include "unicode/udata.h" #include "unicode/utf.h" #include "ustr_imp.h" #include "umutex.h" #include "cmemory.h" #include "cstring.h" #include "ucln_cmn.h" /* prototypes ------------------------------------------------------------- */ static const char DATA_NAME[] = "unames"; static const char DATA_TYPE[] = "dat"; #define GROUP_SHIFT 5 #define LINES_PER_GROUP (1UL<=U_CHAR_NAME_CHOICE_COUNT || bufferLength<0 || (bufferLength>0 && buffer==NULL) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { return u_terminateChars(buffer, bufferLength, 0, pErrorCode); } length=0; /* try algorithmic names first */ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); i=*p; algRange=(AlgorithmicRange *)(p+1); while(i>0) { if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); break; } algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); --i; } if(i==0) { if (nameChoice == U_EXTENDED_CHAR_NAME) { length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); if (!length) { /* extended character name */ length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); } } else { /* normal character name */ length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); } } return u_terminateChars(buffer, bufferLength, length, pErrorCode); } U_CAPI UChar32 U_EXPORT2 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode) { char upper[120], lower[120]; FindName findName; AlgorithmicRange *algRange; uint32_t *p; uint32_t i; UChar32 cp = 0; char c0; UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return error; } if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return error; } if(!isDataLoaded(pErrorCode)) { return error; } /* construct the uppercase and lowercase of the name first */ for(i=0; i') { for (--i; lower[i] && lower[i] != '-'; --i); if (lower[i] == '-') { /* We've got a category. */ uint32_t cIdx; lower[i] = 0; for (++i; lower[i] != '>'; ++i) { if (lower[i] >= '0' && lower[i] <= '9') { cp = (cp << 4) + lower[i] - '0'; } else if (lower[i] >= 'a' && lower[i] <= 'f') { cp = (cp << 4) + lower[i] - 'a' + 10; } else { *pErrorCode = U_ILLEGAL_CHAR_FOUND; return error; } } /* Now validate the category name. We could use a binary search, or a trie, if we really wanted to. */ for (lower[i] = 0, cIdx = 0; cIdx < sizeof(charCatNames) / sizeof(*charCatNames); ++cIdx) { if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { if (getCharCat(cp) == cIdx) { return cp; } break; } } } } } *pErrorCode = U_ILLEGAL_CHAR_FOUND; return error; } /* try algorithmic names now */ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); i=*p; algRange=(AlgorithmicRange *)(p+1); while(i>0) { if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { return cp; } algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); --i; } /* normal character name */ findName.otherName=upper; findName.code=error; enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); if (findName.code == error) { *pErrorCode = U_ILLEGAL_CHAR_FOUND; } return findName.code; } U_CAPI void U_EXPORT2 u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode) { AlgorithmicRange *algRange; uint32_t *p; uint32_t i; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { limit = UCHAR_MAX_VALUE + 1; } if((uint32_t)start>=(uint32_t)limit) { return; } if(!isDataLoaded(pErrorCode)) { return; } /* interleave the data-driven ones with the algorithmic ones */ /* iterate over all algorithmic ranges; assume that they are in ascending order */ p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); i=*p; algRange=(AlgorithmicRange *)(p+1); while(i>0) { /* enumerate the character names before the current algorithmic range */ /* here: startstart) { if((uint32_t)limit<=algRange->start) { enumNames(uCharNames, start, limit, fn, context, nameChoice); return; } if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { return; } start=(UChar32)algRange->start; } /* enumerate the character names in the current algorithmic range */ /* here: algRange->start<=startend) { if((uint32_t)limit<=(algRange->end+1)) { enumAlgNames(algRange, start, limit, fn, context, nameChoice); return; } if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { return; } start=(UChar32)algRange->end+1; } /* continue to the next algorithmic range (here: startsize); --i; } /* enumerate the character names after the last algorithmic range */ enumNames(uCharNames, start, limit, fn, context, nameChoice); } /* implementation ----------------------------------------------------------- */ UBool unames_cleanup() { if(uCharNamesData) { udata_close(uCharNamesData); uCharNamesData = NULL; } if(uCharNames) { uCharNames = NULL; } return TRUE; } static UBool isDataLoaded(UErrorCode *pErrorCode) { /* load UCharNames from file if necessary */ if(uCharNames==NULL) { UCharNames *names; UDataMemory *data; /* open the data outside the mutex block */ data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { return FALSE; } names=(UCharNames *)udata_getMemory(data); /* in the mutex block, set the data for this process */ { umtx_lock(NULL); if(uCharNames==NULL) { uCharNames=names; uCharNamesData=data; data=NULL; names=NULL; } umtx_unlock(NULL); } /* if a different thread set it first, then close the extra data */ if(data!=NULL) { udata_close(data); /* NULL if it was set correctly */ } } return TRUE; } static UBool isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo) { return (UBool)( pInfo->size>=20 && pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY && pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==1); } /* * getGroup() does a binary search for the group that contains the * Unicode code point "code". * The return value is always a valid Group* that may contain "code" * or else is the highest group before "code". * If the lowest group is after "code", then that one is returned. */ static Group * getGroup(UCharNames *names, uint32_t code) { uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), start=0, limit=*(uint16_t *)((char *)names+names->groupsOffset), number; Group *groups=(Group *)((char *)names+names->groupsOffset+2); /* binary search for the group of names that contains the one for code */ while(start>GROUP_SHIFT)==group->groupMSB) { return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, buffer, bufferLength); } else { /* group not found */ /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return 0; } } /* * expandGroupLengths() reads a block of compressed lengths of 32 strings and * expands them into offsets and lengths for each string. * Lengths are stored with a variable-width encoding in consecutive nibbles: * If a nibble<0xc, then it is the length itself (0=empty string). * If a nibble>=0xc, then it forms a length value with the following nibble. * Calculation see below. * The offsets and lengths arrays must be at least 33 (one more) long because * there is no check here at the end if the last nibble is still used. */ static const uint8_t * expandGroupLengths(const uint8_t *s, uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { /* read the lengths of the 32 strings in this group and get each string's offset */ uint16_t i=0, offset=0, length=0; uint8_t lengthByte; /* all 32 lengths must be read to get the offset of the first group string */ while(i=12) { /* double-nibble length spread across two bytes */ length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); lengthByte&=0xf; } else if((lengthByte /* &0xf0 */)>=0xc0) { /* double-nibble length spread across this one byte */ length=(uint16_t)((lengthByte&0x3f)+12); } else { /* single-nibble length in MSBs */ length=(uint16_t)(lengthByte>>4); lengthByte&=0xf; } *offsets++=offset; *lengths++=length; offset+=length; ++i; /* read odd nibble - LSBs of lengthByte */ if((lengthByte&0xf0)==0) { /* this nibble was not consumed for a double-nibble length above */ length=lengthByte; if(length<12) { /* single-nibble length in LSBs */ *offsets++=offset; *lengths++=length; offset+=length; ++i; } } else { length=0; /* prevent double-nibble detection in the next iteration */ } } /* now, s is at the first group string */ return s; } static uint16_t expandGroupName(UCharNames *names, Group *group, uint16_t lineNumber, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; const uint8_t *s=(uint8_t *)names+names->groupStringOffset+ (group->offsetHigh<<16|group->offsetLow); s=expandGroupLengths(s, offsets, lengths); return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, buffer, bufferLength); } #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ if((bufferLength)>0) { \ *(buffer)++=c; \ --(bufferLength); \ } \ ++(bufferPos); \ } /* * Important: expandName() and compareName() are almost the same - * apply fixes to both. */ static uint16_t expandName(UCharNames *names, const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t *tokens=(uint16_t *)names+8; uint16_t token, tokenCount=*tokens++, bufferPos=0; uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; uint8_t c; if(nameChoice==U_UNICODE_10_CHAR_NAME) { /* * skip the modern name if it is not requested _and_ * if the semicolon byte value is a character, not a token number */ if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { while(nameLength>0) { --nameLength; if(*name++==';') { break; } } } else { /* * the semicolon byte value is a token number, therefore * only modern names are stored in unames.dat and there is no * such requested Unicode 1.0 name here */ nameLength=0; } } /* write each letter directly, and write a token word per token */ while(nameLength>0) { --nameLength; c=*name++; if(c>=tokenCount) { if(c!=';') { /* implicit letter */ WRITE_CHAR(buffer, bufferLength, bufferPos, c); } else { /* finished */ break; } } else { token=tokens[c]; if(token==(uint16_t)(-2)) { /* this is a lead byte for a double-byte token */ token=tokens[c<<8|*name++]; --nameLength; } if(token==(uint16_t)(-1)) { if(c!=';') { /* explicit letter */ WRITE_CHAR(buffer, bufferLength, bufferPos, c); } else { /* stop, but skip the semicolon if we are seeking extended names and there was no 2.0 name but there is a 1.0 name. */ if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { continue; } } /* finished */ break; } } else { /* write token word */ uint8_t *tokenString=tokenStrings+token; while((c=*tokenString++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } } } } /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return bufferPos; } /* * compareName() is almost the same as expandName() except that it compares * the currently expanded name to an input name. * It returns the match/no match result as soon as possible. */ static UBool compareName(UCharNames *names, const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, const char *otherName) { uint16_t *tokens=(uint16_t *)names+8; uint16_t token, tokenCount=*tokens++; uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; uint8_t c; const char *origOtherName = otherName; if(nameChoice==U_UNICODE_10_CHAR_NAME) { /* * skip the modern name if it is not requested _and_ * if the semicolon byte value is a character, not a token number */ if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { while(nameLength>0) { --nameLength; if(*name++==';') { break; } } } else { /* * the semicolon byte value is a token number, therefore * only modern names are stored in unames.dat and there is no * such requested Unicode 1.0 name here */ nameLength=0; } } /* compare each letter directly, and compare a token word per token */ while(nameLength>0) { --nameLength; c=*name++; if(c>=tokenCount) { if(c!=';') { /* implicit letter */ if((char)c!=*otherName++) { return FALSE; } } else { /* finished */ break; } } else { token=tokens[c]; if(token==(uint16_t)(-2)) { /* this is a lead byte for a double-byte token */ token=tokens[c<<8|*name++]; --nameLength; } if(token==(uint16_t)(-1)) { if(c!=';') { /* explicit letter */ if((char)c!=*otherName++) { return FALSE; } } else { /* stop, but skip the semicolon if we are seeking extended names and there was no 2.0 name but there is a 1.0 name. */ if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { continue; } } /* finished */ break; } } else { /* write token word */ uint8_t *tokenString=tokenStrings+token; while((c=*tokenString++)!=0) { if((char)c!=*otherName++) { return FALSE; } } } } } /* complete match? */ return (UBool)(*otherName==0); } /* * enumGroupNames() enumerates all the names in a 32-group * and either calls the enumerator function or finds a given input name. */ static UBool enumGroupNames(UCharNames *names, Group *group, UChar32 start, UChar32 end, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice) { uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; const uint8_t *s=(uint8_t *)names+names->groupStringOffset+ (group->offsetHigh<<16|group->offsetLow); s=expandGroupLengths(s, offsets, lengths); if(fn!=DO_FIND_NAME) { char buffer[200]; uint16_t length; while(start<=end) { length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; } /* here, we assume that the buffer is large enough */ if(length>0) { if(!fn(context, start, nameChoice, buffer, length)) { return FALSE; } } ++start; } } else { const char *otherName=((FindName *)context)->otherName; while(start<=end) { if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { ((FindName *)context)->code=start; return FALSE; } ++start; } } return TRUE; } /* * enumExtNames enumerate extended names. * It only needs to do it if it is called with a real function and not * with the dummy DO_FIND_NAME, because u_charFromName() does a check * for extended names by itself. */ static UBool enumExtNames(UChar32 start, UChar32 end, UEnumCharNamesFn *fn, void *context) { if(fn!=DO_FIND_NAME) { char buffer[200]; uint16_t length; while(start<=end) { buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; /* here, we assume that the buffer is large enough */ if(length>0) { if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { return FALSE; } } ++start; } } return TRUE; } static UBool enumNames(UCharNames *names, UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice) { uint16_t startGroupMSB, endGroupMSB, groupCount; Group *group, *groupLimit; startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); /* find the group that contains start, or the highest before it */ group=getGroup(names, start); if(startGroupMSB==endGroupMSB) { if(startGroupMSB==group->groupMSB) { /* if start and limit-1 are in the same group, then enumerate only in that one */ return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); } } else { groupCount=*(uint16_t *)((char *)names+names->groupsOffset); groupLimit=(Group *)((char *)names+names->groupsOffset+2)+groupCount; if(startGroupMSB==group->groupMSB) { /* enumerate characters in the partial start group */ if((start&GROUP_MASK)!=0) { if(!enumGroupNames(names, group, start, ((UChar32)startGroupMSB<group->groupMSB) { /* make sure that we start enumerating with the first group after start */ if (group + 1 < groupLimit && (group + 1)->groupMSB > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT; if (end > limit) { end = limit; } if (!enumExtNames(start, end - 1, fn, context)) { return FALSE; } } ++group; } /* enumerate entire groups between the start- and end-groups */ while(groupgroupMSBgroupMSB<groupMSB > group->groupMSB + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT; if (end > limit) { end = limit; } if (!enumExtNames((group->groupMSB + 1) << GROUP_SHIFT, end - 1, fn, context)) { return FALSE; } } ++group; } /* enumerate within the end group (group->groupMSB==endGroupMSB) */ if(groupgroupMSB==endGroupMSB) { return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice); } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { UChar32 next = ((group - 1)->groupMSB + 1) << GROUP_SHIFT; if (next > start) { start = next; } } else { return TRUE; } } /* we have not found a group, which means everything is made of extended names. */ if (nameChoice == U_EXTENDED_CHAR_NAME) { if (limit > UCHAR_MAX_VALUE + 1) { limit = UCHAR_MAX_VALUE + 1; } return enumExtNames(start, limit - 1, fn, context); } return TRUE; } /* * Important: * Parts of findAlgName() are almost the same as some of getAlgName(). * Fixes must be applied to both. */ static uint16_t getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength) { uint16_t bufferPos=0; /* * Do not write algorithmic Unicode 1.0 names because * Unihan names are the same as the modern ones, * extension A was only introduced with Unicode 3.0, and * the Hangul syllable block was moved and changed around Unicode 1.1.5. */ if(nameChoice==U_UNICODE_10_CHAR_NAME) { /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return 0; } switch(range->type) { case 0: { /* name = prefix hex-digits */ const char *s=(const char *)(range+1); char c; uint16_t i, count; /* copy prefix */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } /* write hexadecimal code point value */ count=range->variant; /* zero-terminate */ if(count0;) { if(--i>=4; } bufferPos+=count; break; } case 1: { /* name = prefix factorized-elements */ uint16_t indexes[8]; const uint16_t *factors=(const uint16_t *)(range+1); uint16_t count=range->variant; const char *s=(const char *)(factors+count); char c; /* copy prefix */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } bufferPos+=writeFactorSuffix(factors, count, s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); break; } default: /* undefined type */ /* zero-terminate */ if(bufferLength>0) { *buffer=0; } break; } return bufferPos; } static uint16_t writeFactorSuffix(const uint16_t *factors, uint16_t count, const char *s, /* suffix elements */ uint32_t code, uint16_t indexes[8], /* output fields from here */ const char *elementBases[8], const char *elements[8], char *buffer, uint16_t bufferLength) { uint16_t i, factor, bufferPos=0; char c; /* write elements according to the factors */ /* * the factorized elements are determined by modulo arithmetic * with the factors of this algorithm * * note that for fewer operations, count is decremented here */ --count; for(i=count; i>0; --i) { factor=factors[i]; indexes[i]=(uint16_t)(code%factor); code/=factor; } /* * we don't need to calculate the last modulus because start<=code<=end * guarantees here that code<=factors[0] */ indexes[0]=(uint16_t)code; /* write each element */ for(;;) { if(elementBases!=NULL) { *elementBases++=s; } /* skip indexes[i] strings */ factor=indexes[i]; while(factor>0) { while(*s++!=0) {} --factor; } if(elements!=NULL) { *elements++=s; } /* write element */ while((c=*s++)!=0) { WRITE_CHAR(buffer, bufferLength, bufferPos, c); } /* we do not need to perform the rest of this loop for i==count - break here */ if(i>=count) { break; } /* skip the rest of the strings for this factors[i] */ factor=(uint16_t)(factors[i]-indexes[i]-1); while(factor>0) { while(*s++!=0) {} --factor; } ++i; } /* zero-terminate */ if(bufferLength>0) { *buffer=0; } return bufferPos; } /* * Important: enumAlgNames() and findAlgName() are almost the same. * Any fix must be applied to both. */ static UBool enumAlgNames(AlgorithmicRange *range, UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice) { char buffer[200]; uint16_t length; if(nameChoice==U_UNICODE_10_CHAR_NAME) { return TRUE; } switch(range->type) { case 0: { char *s, *end; char c; /* get the full name of the start character */ length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); if(length<=0) { return TRUE; } /* call the enumerator function with this first character */ if(!fn(context, start, nameChoice, buffer, length)) { return FALSE; } /* go to the end of the name; all these names have the same length */ end=buffer; while(*end!=0) { ++end; } /* enumerate the rest of the names */ while(++startvariant; const char *s=(const char *)(factors+count); char *suffix, *t; uint16_t prefixLength, i, index; char c; /* name = prefix factorized-elements */ /* copy prefix */ suffix=buffer; prefixLength=0; while((c=*s++)!=0) { *suffix++=c; ++prefixLength; } /* append the suffix of the start character */ length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, s, (uint32_t)start-range->start, indexes, elementBases, elements, suffix, (uint16_t)(sizeof(buffer)-prefixLength))); /* call the enumerator function with this first character */ if(!fn(context, start, nameChoice, buffer, length)) { return FALSE; } /* enumerate the rest of the names */ while(++starttype) { case 0: { /* name = prefix hex-digits */ const char *s=(const char *)(range+1); char c; uint16_t i, count; /* compare prefix */ while((c=*s++)!=0) { if((char)c!=*otherName++) { return 0xffff; } } /* read hexadecimal code point value */ count=range->variant; code=0; for(i=0; istart<=(uint32_t)code && (uint32_t)code<=range->end) { return code; } break; } case 1: { char buffer[64]; uint16_t indexes[8]; const char *elementBases[8], *elements[8]; const uint16_t *factors=(const uint16_t *)(range+1); uint16_t count=range->variant; const char *s=(const char *)(factors+count), *t; UChar32 start, limit; uint16_t i, index; char c; /* name = prefix factorized-elements */ /* compare prefix */ while((c=*s++)!=0) { if((char)c!=*otherName++) { return 0xffff; } } start=(UChar32)range->start; limit=(UChar32)(range->end+1); /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ writeFactorSuffix(factors, count, s, 0, indexes, elementBases, elements, buffer, sizeof(buffer)); /* compare the first suffix */ if(0==uprv_strcmp(otherName, buffer)) { return start; } /* enumerate and compare the rest of the suffixes */ while(++start= sizeof(charCatNames) / sizeof(*charCatNames)) { return "unknown"; } else { return charCatNames[cat]; } } static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { const char *catname = getCharCatName(code); uint16_t length = 0; UChar32 cp; int ndigits, i; WRITE_CHAR(buffer, bufferLength, length, '<'); while (catname[length - 1]) { WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); } WRITE_CHAR(buffer, bufferLength, length, '-'); for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) ; if (ndigits < 4) ndigits = 4; for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { uint8_t v = (uint8_t)(cp & 0xf); buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); } buffer += ndigits; length += ndigits; WRITE_CHAR(buffer, bufferLength, length, '>'); return length; } /* * Hey, Emacs, please set the following: * * Local Variables: * indent-tabs-mode: nil * End: * */