c69c164be8
basic build now compiles w/o a single warning on Linux. One with --enable-strict is a different matter... X-SVN-Rev: 1124
447 lines
12 KiB
C
447 lines
12 KiB
C
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 1999, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
* file name: unames.c
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 1999oct04
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
/* set import/export definitions */
|
|
#ifndef U_COMMON_IMPLEMENTATION
|
|
# define U_COMMON_IMPLEMENTATION
|
|
#endif
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "umutex.h"
|
|
#include "cmemory.h"
|
|
#include "unicode/uchar.h"
|
|
#include "unicode/udata.h"
|
|
|
|
/* prototypes --------------------------------------------------------------- */
|
|
|
|
#define DATA_NAME "unames"
|
|
#define DATA_TYPE "dat"
|
|
|
|
#define GROUP_SHIFT 5
|
|
#define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
|
|
#define GROUP_MASK (LINES_PER_GROUP-1)
|
|
|
|
typedef struct {
|
|
uint16_t groupMSB,
|
|
offsetHigh, offsetLow; /* avoid padding */
|
|
} Group;
|
|
|
|
typedef struct {
|
|
uint32_t start, end;
|
|
uint8_t type, variant;
|
|
uint16_t size;
|
|
} AlgorithmicRange;
|
|
|
|
typedef struct {
|
|
uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
|
|
} UCharNames;
|
|
|
|
static UDataMemory *uCharNamesData=NULL;
|
|
static UCharNames *uCharNames=NULL;
|
|
|
|
static bool_t
|
|
isAcceptable(void *context,
|
|
const char *type, const char *name,
|
|
UDataInfo *pInfo);
|
|
|
|
static uint16_t
|
|
getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength);
|
|
|
|
static uint16_t
|
|
expandGroupName(UCharNames *names, Group *group,
|
|
uint16_t lineNumber, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength);
|
|
|
|
static uint16_t
|
|
expandName(UCharNames *names,
|
|
uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength);
|
|
|
|
static uint16_t
|
|
getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength);
|
|
|
|
/* public API --------------------------------------------------------------- */
|
|
|
|
U_CAPI UTextOffset U_EXPORT2
|
|
u_charName(uint32_t code, UCharNameChoice nameChoice,
|
|
char *buffer, UTextOffset bufferLength,
|
|
UErrorCode *pErrorCode) {
|
|
AlgorithmicRange *algRange;
|
|
uint32_t *p;
|
|
uint32_t i;
|
|
|
|
/* check the argument values */
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
} else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || buffer==NULL) {
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
if(code>0x10ffff) {
|
|
return 0;
|
|
}
|
|
|
|
/* load UCharNames from file if necessary */
|
|
if(uCharNames==NULL) {
|
|
UCharNames *names;
|
|
UDataMemory *data;
|
|
|
|
/* open the data outside the mutex block */
|
|
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
}
|
|
|
|
names=(UCharNames *)udata_getMemory(data);
|
|
|
|
/* in the mutex block, set the data for this process */
|
|
{
|
|
umtx_lock(NULL);
|
|
if(uCharNames==NULL) {
|
|
uCharNames=names;
|
|
uCharNamesData=data;
|
|
data=NULL;
|
|
names=NULL;
|
|
}
|
|
umtx_unlock(NULL);
|
|
}
|
|
|
|
/* if a different thread set it first, then close the extra data */
|
|
if(data!=NULL) {
|
|
udata_close(data); /* NULL if it was set correctly */
|
|
}
|
|
}
|
|
|
|
/* try algorithmic names first */
|
|
p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
|
|
i=*p;
|
|
algRange=(AlgorithmicRange *)(p+1);
|
|
while(i>0) {
|
|
if(algRange->start<=code && code<=algRange->end) {
|
|
return getAlgName(algRange, code, nameChoice, buffer, (uint16_t)bufferLength);
|
|
}
|
|
algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
|
|
--i;
|
|
}
|
|
|
|
/* normal character name */
|
|
return getName(uCharNames, code, nameChoice, buffer, (uint16_t)bufferLength);
|
|
}
|
|
|
|
/* implementation ----------------------------------------------------------- */
|
|
|
|
static bool_t
|
|
isAcceptable(void *context,
|
|
const char *type, const char *name,
|
|
UDataInfo *pInfo) {
|
|
return
|
|
pInfo->size>=20 &&
|
|
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
|
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
|
pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
|
|
pInfo->dataFormat[1]==0x6e &&
|
|
pInfo->dataFormat[2]==0x61 &&
|
|
pInfo->dataFormat[3]==0x6d &&
|
|
pInfo->formatVersion[0]==1;
|
|
}
|
|
|
|
static uint16_t
|
|
getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength) {
|
|
uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
|
|
start=0,
|
|
limit=*(uint16_t *)((char *)names+names->groupsOffset),
|
|
number;
|
|
Group *groups=(Group *)((char *)names+names->groupsOffset+2);
|
|
|
|
/* binary search for the group of names that contains the one for code */
|
|
while(start<limit-1) {
|
|
number=(start+limit)/2;
|
|
if(groupMSB<groups[number].groupMSB) {
|
|
limit=number;
|
|
} else {
|
|
start=number;
|
|
}
|
|
}
|
|
|
|
if(groupMSB==groups[start].groupMSB) {
|
|
return expandGroupName(names, groups+start, (uint16_t)(code&GROUP_MASK), nameChoice,
|
|
buffer, bufferLength);
|
|
} else {
|
|
/* group not found */
|
|
/* zero-terminate */
|
|
if(bufferLength>0) {
|
|
*buffer=0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static uint16_t
|
|
expandGroupName(UCharNames *names, Group *group,
|
|
uint16_t lineNumber, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength) {
|
|
uint8_t *s=(uint8_t *)names+names->groupStringOffset+
|
|
(group->offsetHigh<<16|group->offsetLow);
|
|
|
|
/* read the length of this string and get the group strings offset */
|
|
uint16_t i=0, offset=0, length=0, nameOffset=0, nameLength=0;
|
|
uint8_t lengthByte;
|
|
|
|
/* all 32 lengths must be read to get the offset of the first group string */
|
|
while(i<LINES_PER_GROUP) {
|
|
lengthByte=*s++;
|
|
|
|
/* read even nibble - MSBs of lengthByte */
|
|
if(length>=12) {
|
|
/* double-nibble length spread across two bytes */
|
|
length=((length&0x3)<<4|lengthByte>>4)+12;
|
|
lengthByte&=0xf;
|
|
} else if((lengthByte&0xf0)>=0xc0) {
|
|
/* double-nibble length spread across this one byte */
|
|
length=(lengthByte&0x3f)+12;
|
|
} else {
|
|
/* single-nibble length in MSBs */
|
|
length=lengthByte>>4;
|
|
lengthByte&=0xf;
|
|
}
|
|
|
|
if(i==lineNumber) {
|
|
nameOffset=offset;
|
|
nameLength=length;
|
|
}
|
|
|
|
offset+=length;
|
|
++i;
|
|
|
|
/* read odd nibble - LSBs of lengthByte */
|
|
if((lengthByte&0xf0)==0) {
|
|
/* this nibble was not consumed for a double-nibble length above */
|
|
length=lengthByte;
|
|
if(length<12) {
|
|
/* single-nibble length in LSBs */
|
|
if(i==lineNumber) {
|
|
nameOffset=offset;
|
|
nameLength=length;
|
|
}
|
|
|
|
offset+=length;
|
|
++i;
|
|
}
|
|
} else {
|
|
length=0; /* prevent double-nibble detection in the next iteration */
|
|
}
|
|
}
|
|
|
|
return expandName(names, s+nameOffset, nameLength, nameChoice,
|
|
buffer, bufferLength);
|
|
}
|
|
|
|
#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
|
|
if((bufferLength)>0) { \
|
|
*(buffer)++=c; \
|
|
--(bufferLength); \
|
|
} \
|
|
++(bufferPos); \
|
|
}
|
|
|
|
static uint16_t
|
|
expandName(UCharNames *names,
|
|
uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength) {
|
|
uint16_t *tokens=(uint16_t *)names+8;
|
|
uint16_t token, tokenCount=*tokens++, bufferPos=0;
|
|
uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
|
|
uint8_t c;
|
|
|
|
if(nameChoice!=U_UNICODE_CHAR_NAME) {
|
|
/* skip the modern name */
|
|
while(nameLength>0) {
|
|
--nameLength;
|
|
if(*name++==';') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* write each letter directly, and write a token word per token */
|
|
while(nameLength>0) {
|
|
--nameLength;
|
|
c=*name++;
|
|
|
|
if(c==';') {
|
|
/* finished */
|
|
break;
|
|
}
|
|
|
|
if(c>=tokenCount) {
|
|
/* implicit letter */
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
} else {
|
|
token=tokens[c];
|
|
if(token==(uint16_t)(-2)) {
|
|
/* this is a lead byte for a double-byte token */
|
|
token=tokens[c<<8|*name++];
|
|
--nameLength;
|
|
}
|
|
if(token==(uint16_t)(-1)) {
|
|
/* explicit letter */
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
} else {
|
|
/* write token word */
|
|
uint8_t *tokenString=tokenStrings+token;
|
|
while((c=*tokenString++)!=0) {
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* zero-terminate */
|
|
if(bufferLength>0) {
|
|
*buffer=0;
|
|
}
|
|
|
|
return bufferPos;
|
|
}
|
|
|
|
static uint16_t
|
|
getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
|
|
char *buffer, uint16_t bufferLength) {
|
|
uint16_t bufferPos=0;
|
|
|
|
switch(range->type) {
|
|
case 0: {
|
|
/* name = prefix hex-digits */
|
|
char *s=(char *)(range+1);
|
|
char c;
|
|
|
|
uint16_t i, count;
|
|
|
|
/* copy prefix */
|
|
while((c=*s++)!=0) {
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
}
|
|
|
|
/* write hexadecimal code point value */
|
|
count=range->variant;
|
|
|
|
/* zero-terminate */
|
|
if(count<bufferLength) {
|
|
buffer[count]=0;
|
|
}
|
|
|
|
for(i=count; i>0;) {
|
|
if(--i<bufferLength) {
|
|
c=(char)code&0xf;
|
|
if(c<10) {
|
|
c+='0';
|
|
} else {
|
|
c+='A'-10;
|
|
}
|
|
buffer[i]=c;
|
|
}
|
|
code>>=4;
|
|
}
|
|
|
|
bufferPos+=count;
|
|
break;
|
|
}
|
|
case 1: {
|
|
/* name = prefix factorized-elements */
|
|
uint16_t *factors=(uint16_t *)(range+1);
|
|
char *s=(char *)(factors+range->variant);
|
|
char c;
|
|
|
|
uint16_t indeces[8];
|
|
uint16_t i, count, factor;
|
|
|
|
/* copy prefix */
|
|
while((c=*s++)!=0) {
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
}
|
|
|
|
/* write elements according to the factors */
|
|
code-=range->start;
|
|
|
|
/*
|
|
* the factorized elements are determined by modulo arithmetic
|
|
* with the factors of this algorithm
|
|
*
|
|
* note that for fewer operations, count is decremented here
|
|
*/
|
|
count=range->variant-1;
|
|
for(i=count; i>0; --i) {
|
|
factor=factors[i];
|
|
indeces[i]=(uint16_t)(code%factor);
|
|
code/=factor;
|
|
}
|
|
/*
|
|
* we don't need to calculate the last modulus because start<=code<=end
|
|
* guarantees here that code<=factors[0]
|
|
*/
|
|
indeces[0]=(uint16_t)code;
|
|
|
|
/* write each element */
|
|
for(;;) {
|
|
/* skip indeces[i] strings */
|
|
factor=indeces[i];
|
|
while(factor>0) {
|
|
while(*s++!=0) {}
|
|
--factor;
|
|
}
|
|
|
|
/* write element */
|
|
while((c=*s++)!=0) {
|
|
WRITE_CHAR(buffer, bufferLength, bufferPos, c);
|
|
}
|
|
|
|
/* we do not need to perform the rest of this loop for i==count - break here */
|
|
if(i>=count) {
|
|
break;
|
|
}
|
|
|
|
/* skip the rest of the strings for this factors[i] */
|
|
factor=factors[i]-indeces[i]-1;
|
|
while(factor>0) {
|
|
while(*s++!=0) {}
|
|
--factor;
|
|
}
|
|
|
|
++i;
|
|
}
|
|
|
|
/* zero-terminate */
|
|
if(bufferLength>0) {
|
|
*buffer=0;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
/* undefined type */
|
|
/* zero-terminate */
|
|
if(bufferLength>0) {
|
|
*buffer=0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return bufferPos;
|
|
}
|