ICU-4287 move bidi/shaping properties to ubidi.icu
X-SVN-Rev: 17055
This commit is contained in:
parent
c159da9c5e
commit
739b6a5220
9
.gitignore
vendored
9
.gitignore
vendored
@ -240,6 +240,15 @@ icu4c/source/tools/dumpce/*.html
|
||||
icu4c/source/tools/dumpce/*.o
|
||||
icu4c/source/tools/dumpce/Makefile
|
||||
icu4c/source/tools/dumpce/dumpce
|
||||
icu4c/source/tools/genbidi/*.d
|
||||
icu4c/source/tools/genbidi/*.o
|
||||
icu4c/source/tools/genbidi/*.pdb
|
||||
icu4c/source/tools/genbidi/*.plg
|
||||
icu4c/source/tools/genbidi/Debug
|
||||
icu4c/source/tools/genbidi/Makefile
|
||||
icu4c/source/tools/genbidi/Release
|
||||
icu4c/source/tools/genbidi/genbidi
|
||||
icu4c/source/tools/genbidi/genbidi.[0-9]
|
||||
icu4c/source/tools/genbrk/*.d
|
||||
icu4c/source/tools/genbrk/*.o
|
||||
icu4c/source/tools/genbrk/*.pdb
|
||||
|
@ -198,6 +198,24 @@ Package=<4>
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "genbidi"=..\tools\genbidi\genbidi.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name common
|
||||
End Project Dependency
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name toolutil
|
||||
End Project Dependency
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "genbrk"=..\tools\genbrk\genbrk.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
|
@ -140,6 +140,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makedata", "..\data\makedat
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C} = {F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3} = {6F744648-D15F-478A-90C6-58E353B5DDB3}
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61} = {DB312A49-12A9-4E07-9E96-451DC2D8FF61}
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62} = {DB312A49-12A9-4E07-9E96-451DC2D8FF62}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891} = {77C78066-746F-4EA6-B3FE-B8C8A4A97891}
|
||||
{203EC78A-0531-43F0-A636-285439BDE025} = {203EC78A-0531-43F0-A636-285439BDE025}
|
||||
@ -202,6 +203,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\io
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbidi", "..\tools\genbidi\genbidi.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF62}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencase", "..\tools\gencase\gencase.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF61}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
@ -338,6 +345,10 @@ Global
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug.Build.0 = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release.ActiveCfg = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release.Build.0 = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug.ActiveCfg = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug.Build.0 = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release.ActiveCfg = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
EndGlobalSection
|
||||
|
@ -2538,6 +2538,14 @@ InputPath=.\unicode\symtable.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ubidi_props.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ubidi_props.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucase.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
509
icu4c/source/common/ubidi_props.c
Normal file
509
icu4c/source/common/ubidi_props.c
Normal file
@ -0,0 +1,509 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi_props.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004dec30
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Low-level Unicode bidi/shaping properties access.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/udata.h" /* UDataInfo */
|
||||
#include "ucmndata.h" /* DataHeader */
|
||||
#include "udatamem.h"
|
||||
#include "umutex.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
#include "utrie.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "ucln_cmn.h"
|
||||
|
||||
struct UBiDiProps {
|
||||
UDataMemory *mem;
|
||||
const int32_t *indexes;
|
||||
const uint32_t *mirrors;
|
||||
|
||||
UTrie trie;
|
||||
uint8_t formatVersion[4];
|
||||
};
|
||||
|
||||
/* data loading etc. -------------------------------------------------------- */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
|
||||
pInfo->dataFormat[1]==UBIDI_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UBIDI_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UBIDI_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
UBiDiProps *bdp=(UBiDiProps *)context;
|
||||
uprv_memcpy(bdp->formatVersion, pInfo->formatVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UBiDiProps *
|
||||
ubidi_openData(UBiDiProps *bdpProto,
|
||||
const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UBiDiProps *bdp;
|
||||
int32_t size;
|
||||
|
||||
bdpProto->indexes=(const int32_t *)bin;
|
||||
if( (length>=0 && length<16*4) ||
|
||||
bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16
|
||||
) {
|
||||
/* length or indexes[] too short for minimum indexes[] length of 16 */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4;
|
||||
if(length>=0) {
|
||||
if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) {
|
||||
length-=size;
|
||||
} else {
|
||||
/* length too short for indexes[] or for the whole data length */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bin+=size;
|
||||
/* from here on, assume that the sizes of the items fit into the total length */
|
||||
|
||||
/* unserialize the trie, after indexes[] */
|
||||
size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE];
|
||||
utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
bin+=size;
|
||||
|
||||
/* get mirrors[] */
|
||||
size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
bdpProto->mirrors=(const uint32_t *)bin;
|
||||
bin+=size;
|
||||
|
||||
/* allocate, copy, and return the new UBiDiProps */
|
||||
bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps));
|
||||
if(bdp==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
} else {
|
||||
uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps));
|
||||
return bdp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_openProps(UErrorCode *pErrorCode) {
|
||||
UBiDiProps bdpProto={ NULL }, *bdp;
|
||||
|
||||
bdpProto.mem=udata_openChoice(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, isAcceptable, &bdpProto, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bdp=ubidi_openData(
|
||||
&bdpProto,
|
||||
udata_getMemory(bdpProto.mem),
|
||||
udata_getLength(bdpProto.mem),
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_close(bdpProto.mem);
|
||||
return NULL;
|
||||
} else {
|
||||
return bdp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UBiDiProps bdpProto={ NULL };
|
||||
const DataHeader *hdr;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(bin==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check the header */
|
||||
if(length>=0 && length<20) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
hdr=(const DataHeader *)bin;
|
||||
if(
|
||||
!(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 &&
|
||||
hdr->info.isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
isAcceptable(&bdpProto, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &hdr->info))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bin+=hdr->dataHeader.headerSize;
|
||||
if(length>=0) {
|
||||
length-=hdr->dataHeader.headerSize;
|
||||
}
|
||||
return ubidi_openData(&bdpProto, bin, length, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_closeProps(UBiDiProps *bdp) {
|
||||
if(bdp!=NULL) {
|
||||
udata_close(bdp->mem);
|
||||
uprv_free(bdp);
|
||||
}
|
||||
}
|
||||
|
||||
/* UBiDiProps singleton ----------------------------------------------------- */
|
||||
|
||||
static UBiDiProps *gBdp=NULL;
|
||||
static UErrorCode gErrorCode=U_ZERO_ERROR;
|
||||
static int8_t gHaveData=0;
|
||||
|
||||
static UBool U_CALLCONV
|
||||
ubidi_cleanup(void) {
|
||||
ubidi_closeProps(gBdp);
|
||||
gBdp=NULL;
|
||||
gErrorCode=U_ZERO_ERROR;
|
||||
gHaveData=0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_getSingleton(UErrorCode *pErrorCode) {
|
||||
int8_t haveData;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UMTX_CHECK(NULL, gHaveData, haveData);
|
||||
|
||||
if(haveData>0) {
|
||||
/* data was loaded */
|
||||
return gBdp;
|
||||
} else if(haveData<0) {
|
||||
/* data loading failed */
|
||||
*pErrorCode=gErrorCode;
|
||||
return NULL;
|
||||
} else /* haveData==0 */ {
|
||||
/* load the data */
|
||||
UBiDiProps *bdp=ubidi_openProps(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
gHaveData=-1;
|
||||
gErrorCode=*pErrorCode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set the static variables */
|
||||
umtx_lock(NULL);
|
||||
if(gBdp==NULL) {
|
||||
gBdp=bdp;
|
||||
bdp=NULL;
|
||||
gHaveData=1;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
ubidi_closeProps(bdp);
|
||||
return gBdp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unicode bidi/shaping data swapping --------------------------------------- */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubidi_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UDataInfo *pInfo;
|
||||
int32_t headerSize;
|
||||
|
||||
const uint8_t *inBytes;
|
||||
uint8_t *outBytes;
|
||||
|
||||
const int32_t *inIndexes;
|
||||
int32_t indexes[16];
|
||||
|
||||
int32_t i, offset, count, size;
|
||||
|
||||
/* udata_swapDataHeader checks the arguments */
|
||||
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check data format and format version */
|
||||
pInfo=(const UDataInfo *)((const char *)inData+4);
|
||||
if(!(
|
||||
pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
|
||||
pInfo->dataFormat[1]==UBIDI_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UBIDI_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UBIDI_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
)) {
|
||||
udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
pInfo->formatVersion[0]);
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inBytes=(const uint8_t *)inData+headerSize;
|
||||
outBytes=(uint8_t *)outData+headerSize;
|
||||
|
||||
inIndexes=(const int32_t *)inBytes;
|
||||
|
||||
if(length>=0) {
|
||||
length-=headerSize;
|
||||
if(length<16*4) {
|
||||
udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
|
||||
for(i=0; i<16; ++i) {
|
||||
indexes[i]=udata_readInt32(ds, inIndexes[i]);
|
||||
}
|
||||
|
||||
/* get the total length of the data */
|
||||
size=indexes[UBIDI_IX_LENGTH];
|
||||
|
||||
if(length>=0) {
|
||||
if(length<size) {
|
||||
udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* copy the data for inaccessible bytes */
|
||||
if(inBytes!=outBytes) {
|
||||
uprv_memcpy(outBytes, inBytes, size);
|
||||
}
|
||||
|
||||
offset=0;
|
||||
|
||||
/* swap the int32_t indexes[] */
|
||||
count=indexes[UBIDI_IX_INDEX_TOP]*4;
|
||||
ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
|
||||
offset+=count;
|
||||
|
||||
/* swap the UTrie */
|
||||
count=indexes[UBIDI_IX_TRIE_SIZE];
|
||||
utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
offset+=count;
|
||||
|
||||
/* swap the uint32_t mirrors[] */
|
||||
count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
|
||||
ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
offset+=count;
|
||||
|
||||
U_ASSERT(offset==size);
|
||||
}
|
||||
|
||||
return headerSize+size;
|
||||
}
|
||||
|
||||
/* set of property starts for UnicodeSet ------------------------------------ */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
||||
/* add the start code point to the USet */
|
||||
const USetAdder *sa=(const USetAdder *)context;
|
||||
sa->add(sa->set, start);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
int32_t i, length;
|
||||
UChar32 c;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
utrie_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
/* add the code points from the bidi mirroring table */
|
||||
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
for(i=0; i<length; ++i) {
|
||||
c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
|
||||
sa->addRange(sa->set, c, c+1);
|
||||
}
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
||||
/* (none right now) */
|
||||
}
|
||||
|
||||
/* data access primitives --------------------------------------------------- */
|
||||
|
||||
/* UTRIE_GET32() itself validates c */
|
||||
#define GET_PROPS(bdp, c, result) \
|
||||
UTRIE_GET32(&(bdp)->trie, c, result);
|
||||
|
||||
/* property access functions ------------------------------------------------ */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
|
||||
int32_t max;
|
||||
|
||||
if(bdp==NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
return (max&UBIDI_CLASS_MASK);
|
||||
case UCHAR_JOINING_GROUP:
|
||||
return (max&UBIDI_JG_MASK)>>UBIDI_JG_SHIFT;
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT;
|
||||
default:
|
||||
return -1; /* undefined */
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UCharDirection)UBIDI_GET_CLASS(props);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
int32_t delta;
|
||||
|
||||
GET_PROPS(bdp, c, props);
|
||||
delta=((int32_t)props)>>UBIDI_MIRROR_DELTA_SHIFT;
|
||||
if(delta!=UBIDI_ESC_MIRROR_DELTA) {
|
||||
return c+delta;
|
||||
} else {
|
||||
/* look for mirror code point in the mirrors[] table */
|
||||
const uint32_t *mirrors;
|
||||
uint32_t m;
|
||||
int32_t i, length;
|
||||
UChar32 c2;
|
||||
|
||||
mirrors=bdp->mirrors;
|
||||
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
|
||||
/* linear search */
|
||||
for(i=0; i<length; ++i) {
|
||||
m=mirrors[i];
|
||||
c2=UBIDI_GET_MIRROR_CODE_POINT(m);
|
||||
if(c==c2) {
|
||||
/* found c, return its mirror code point using the index in m */
|
||||
return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]);
|
||||
} else if(c<c2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* c not found, return it itself */
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
U_CAPI UJoiningType U_EXPORT2
|
||||
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
|
||||
}
|
||||
|
||||
U_CAPI UJoiningGroup U_EXPORT2
|
||||
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(bdp, c, props);
|
||||
return (UJoiningGroup)((props&UBIDI_JG_MASK)>>UBIDI_JG_SHIFT);
|
||||
}
|
||||
|
||||
/* public API (see uchar.h) ------------------------------------------------- */
|
||||
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
u_charDirection(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
if(bdp!=NULL) {
|
||||
return ubidi_getClass(bdp, c);
|
||||
} else {
|
||||
return U_LEFT_TO_RIGHT;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isMirrored(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
return (UBool)(bdp!=NULL && ubidi_isMirrored(bdp, c));
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_charMirror(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
if(bdp!=NULL) {
|
||||
return ubidi_getMirror(bdp, c);
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
147
icu4c/source/common/ubidi_props.h
Normal file
147
icu4c/source/common/ubidi_props.h
Normal file
@ -0,0 +1,147 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi_props.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004dec30
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Low-level Unicode bidi/shaping properties access.
|
||||
*/
|
||||
|
||||
#ifndef __UBIDI_PROPS_H__
|
||||
#define __UBIDI_PROPS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/* library API -------------------------------------------------------------- */
|
||||
|
||||
struct UBiDiProps;
|
||||
typedef struct UBiDiProps UBiDiProps;
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_openProps(UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_closeProps(UBiDiProps *bdp);
|
||||
|
||||
|
||||
U_CAPI UBiDiProps * U_EXPORT2
|
||||
ubidi_getSingleton(UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubidi_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/* property access functions */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which);
|
||||
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
ubidi_getClass(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UJoiningType U_EXPORT2
|
||||
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
U_CAPI UJoiningGroup U_EXPORT2
|
||||
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c);
|
||||
|
||||
/* file definitions --------------------------------------------------------- */
|
||||
|
||||
#define UBIDI_DATA_NAME "ubidi"
|
||||
#define UBIDI_DATA_TYPE "icu"
|
||||
|
||||
/* format "BiDi" */
|
||||
#define UBIDI_FMT_0 0x42
|
||||
#define UBIDI_FMT_1 0x69
|
||||
#define UBIDI_FMT_2 0x44
|
||||
#define UBIDI_FMT_3 0x69
|
||||
|
||||
/* indexes into indexes[] */
|
||||
enum {
|
||||
UBIDI_IX_INDEX_TOP,
|
||||
UBIDI_IX_LENGTH,
|
||||
UBIDI_IX_TRIE_SIZE,
|
||||
UBIDI_IX_MIRROR_LENGTH,
|
||||
|
||||
UBIDI_MAX_VALUES_INDEX=15,
|
||||
UBIDI_IX_TOP=16
|
||||
};
|
||||
|
||||
/* definitions for 32-bit bidi/shaping properties word ---------------------- */
|
||||
|
||||
enum {
|
||||
/* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
|
||||
UBIDI_JT_SHIFT=5, /* joining type: 3 bits (7..5) */
|
||||
UBIDI_JG_SHIFT=8, /* joining group: 6 bits (13..8) */
|
||||
|
||||
/* UBIDI__SHIFT=14, reserved: 12 bits (25..14) */
|
||||
|
||||
UBIDI_JOIN_CONTROL_SHIFT=26,
|
||||
UBIDI_BIDI_CONTROL_SHIFT=27,
|
||||
|
||||
UBIDI_IS_MIRRORED_SHIFT=28, /* 'is mirrored' */
|
||||
UBIDI_MIRROR_DELTA_SHIFT=29 /* bidi mirroring delta: 3 bits (31..29) */
|
||||
};
|
||||
|
||||
#define UBIDI_CLASS_MASK 0x0000001f
|
||||
#define UBIDI_JT_MASK 0x000000e0
|
||||
#define UBIDI_JG_MASK 0x00003f00
|
||||
|
||||
#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK)
|
||||
#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1)
|
||||
|
||||
enum {
|
||||
UBIDI_ESC_MIRROR_DELTA=-4,
|
||||
UBIDI_MIN_MIRROR_DELTA=-3,
|
||||
UBIDI_MAX_MIRROR_DELTA=3
|
||||
};
|
||||
|
||||
/* definitions for 32-bit mirror table entry -------------------------------- */
|
||||
|
||||
enum {
|
||||
/* the source Unicode code point takes 21 bits (20..0) */
|
||||
UBIDI_MIRROR_INDEX_SHIFT=21,
|
||||
UBIDI_MAX_MIRROR_INDEX=0x7ff
|
||||
};
|
||||
|
||||
#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff)
|
||||
|
||||
#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT)
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
3
icu4c/source/configure
vendored
3
icu4c/source/configure
vendored
@ -4075,6 +4075,7 @@ case "${host}" in
|
||||
tools/gennames/gennames tools/gentest/gentest \
|
||||
tools/pkgdata/pkgdata tools/genprops/genprops \
|
||||
tools/gencase/gencase \
|
||||
tools/genbidi/genbidi \
|
||||
tools/gensprep/gensprep
|
||||
do
|
||||
if test -f $file.c; then
|
||||
@ -4236,6 +4237,7 @@ trap 'rm -fr `echo "icudefs.mk \
|
||||
tools/gennorm/Makefile \
|
||||
tools/genprops/Makefile \
|
||||
tools/gencase/Makefile \
|
||||
tools/genbidi/Makefile \
|
||||
tools/genpname/Makefile \
|
||||
tools/genbrk/Makefile \
|
||||
tools/gensprep/Makefile \
|
||||
@ -4440,6 +4442,7 @@ CONFIG_FILES=\${CONFIG_FILES-"icudefs.mk \
|
||||
tools/gennorm/Makefile \
|
||||
tools/genprops/Makefile \
|
||||
tools/gencase/Makefile \
|
||||
tools/genbidi/Makefile \
|
||||
tools/genpname/Makefile \
|
||||
tools/genbrk/Makefile \
|
||||
tools/gensprep/Makefile \
|
||||
|
@ -932,6 +932,7 @@ case "${host}" in
|
||||
tools/gennames/gennames tools/gentest/gentest \
|
||||
tools/pkgdata/pkgdata tools/genprops/genprops \
|
||||
tools/gencase/gencase \
|
||||
tools/genbidi/genbidi \
|
||||
tools/gensprep/gensprep
|
||||
do
|
||||
if test -f $file.c; then
|
||||
@ -995,6 +996,7 @@ AC_OUTPUT([icudefs.mk \
|
||||
tools/gennorm/Makefile \
|
||||
tools/genprops/Makefile \
|
||||
tools/gencase/Makefile \
|
||||
tools/genbidi/Makefile \
|
||||
tools/genpname/Makefile \
|
||||
tools/genbrk/Makefile \
|
||||
tools/gensprep/Makefile \
|
||||
|
@ -163,7 +163,7 @@ package390: $(OUTTMPDIR)/icudata390.lst $(OUTTMPDIR)/icudata.lst ./icupkg.inc pa
|
||||
# X_FILES = full paths (for dependency)
|
||||
|
||||
## DAT files - Misc. data files.
|
||||
DAT_FILES_SHORT=uprops.icu ucase.icu pnames.icu unames.icu unorm.icu cnvalias.icu ucadata.icu invuca.icu uidna.spp
|
||||
DAT_FILES_SHORT=uprops.icu ucase.icu ubidi.icu pnames.icu unames.icu unorm.icu cnvalias.icu ucadata.icu invuca.icu uidna.spp
|
||||
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)
|
||||
|
||||
## BRK files
|
||||
@ -285,6 +285,10 @@ $(BUILD_DIRS):
|
||||
$(BUILDDIR)/uprops.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/BidiMirroring.txt $(BINDIR)/genprops$(EXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu
|
||||
$(INVOKE) $(BINDIR)/genprops -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
|
||||
|
||||
# ubidi.icu
|
||||
$(BUILDDIR)/ubidi.icu: $(UNICODEDATADIR)/UnicodeData.txt $(BINDIR)/genbidi$(EXEEXT)
|
||||
$(INVOKE) $(BINDIR)/genbidi -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
|
||||
|
||||
# ucase.icu
|
||||
$(BUILDDIR)/ucase.icu: $(UNICODEDATADIR)/UnicodeData.txt $(BINDIR)/gencase$(EXEEXT)
|
||||
$(INVOKE) $(BINDIR)/gencase -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
|
||||
@ -412,7 +416,7 @@ icupkg.inc: $(srcdir)/icupkg.inc.in $(top_builddir)/config.status
|
||||
###########
|
||||
########### 390 (z/OS) support
|
||||
UCMFILES390=ebcdic-xml-us.ucm ibm-37_P100-1995.ucm ibm-1047_P100-1995.ucm ibm-4909_P100-1999.ucm
|
||||
ALLFILES390=uprops.icu ucase.icu pnames.icu unorm.icu cnvalias.icu $(UCMFILES390:.ucm=.cnv)
|
||||
ALLFILES390=uprops.icu ucase.icu ubidi.icu pnames.icu unorm.icu cnvalias.icu $(UCMFILES390:.ucm=.cnv)
|
||||
|
||||
$(OUTTMPDIR)/icudata390.lst: $(SRCLISTDEPS)
|
||||
@echo "generating $@ (list of 390 data files)"
|
||||
|
@ -319,13 +319,14 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat"
|
||||
# move the .dll and .lib files to their final destination afterwards.
|
||||
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
|
||||
#
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\ubidi.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
@echo Building icu data
|
||||
cd "$(ICUBLD)"
|
||||
@"$(ICUP)\bin\pkgdata" -f -e $(U_ICUDATA_NAME) -v $(ICU_PACKAGE_MODE) -M"PKGDATA_LDFLAGS=/base:0x4ad00000" -c -p $(ICUPKG) -T "$(ICUTMP)" -L $(U_ICUDATA_NAME) -d "$(ICUBLD)" -s . <<"$(ICUTMP)\pkgdatain.txt"
|
||||
unorm.icu
|
||||
uprops.icu
|
||||
ucase.icu
|
||||
ubidi.icu
|
||||
pnames.icu
|
||||
unames.icu
|
||||
ucadata.icu
|
||||
@ -500,6 +501,11 @@ res_index {
|
||||
@echo Creating data file for Unicode Character Properties
|
||||
@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -s "$(ICUUNIDATA)" -d "$(ICUBLD)"
|
||||
|
||||
# Targets for ubidi.icu
|
||||
"$(ICUBLD)\ubidi.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\genbidi\$(CFG)\genbidi.exe"
|
||||
@echo Creating data file for Unicode BiDi/Shaping Properties
|
||||
@"$(ICUTOOLS)\genbidi\$(CFG)\genbidi" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -s "$(ICUUNIDATA)" -d "$(ICUBLD)"
|
||||
|
||||
# Targets for ucase.icu
|
||||
"$(ICUBLD)\ucase.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gencase\$(CFG)\gencase.exe"
|
||||
@echo Creating data file for Unicode Case Mapping Properties
|
||||
@ -528,6 +534,6 @@ res_index {
|
||||
|
||||
$(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
|
||||
|
||||
$(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\unorm.icu"
|
||||
$(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\ubidi.icu" "$(ICUBLD)\unorm.icu"
|
||||
|
||||
$(BRK_SOURCE) : "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu"
|
||||
$(BRK_SOURCE) : "$(ICUBLD)\uprops.icu" "$(ICUBLD)\ucase.icu" "$(ICUBLD)\ubidi.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu"
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "putilimp.h"
|
||||
#include "uparse.h"
|
||||
#include "ucase.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "uprops.h"
|
||||
#include "uset_imp.h"
|
||||
#include "usc_impl.h"
|
||||
@ -58,6 +59,7 @@ static void TestPropertyNames(void);
|
||||
static void TestPropertyValues(void);
|
||||
static void TestConsistency(void);
|
||||
static void TestUCase(void);
|
||||
static void TestUBiDiProps(void);
|
||||
|
||||
/* internal methods used */
|
||||
static int32_t MakeProp(char* str);
|
||||
@ -147,6 +149,7 @@ void addUnicodeTest(TestNode** root)
|
||||
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
|
||||
addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
|
||||
addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
|
||||
addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
|
||||
}
|
||||
|
||||
/*==================================================== */
|
||||
@ -1764,6 +1767,7 @@ TestMirroring() {
|
||||
|
||||
log_verbose("Testing u_charMirror()\n");
|
||||
if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
|
||||
u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
|
||||
u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab
|
||||
)
|
||||
) {
|
||||
@ -2843,3 +2847,34 @@ static void TestUCase() {
|
||||
ucase_close(csp);
|
||||
udata_close(pData);
|
||||
}
|
||||
|
||||
/* API coverage for ubidi_props.c */
|
||||
static void TestUBiDiProps() {
|
||||
UDataMemory *pData;
|
||||
UBiDiProps *bdp;
|
||||
UErrorCode errorCode;
|
||||
|
||||
/* coverage for ubidi_openBinary() */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
|
||||
u_errorName(errorCode));
|
||||
udata_close(pData);
|
||||
return;
|
||||
}
|
||||
|
||||
if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
|
||||
log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
|
||||
}
|
||||
|
||||
ubidi_closeProps(bdp);
|
||||
udata_close(pData);
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ include $(top_builddir)/icudefs.mk
|
||||
subdir = tools
|
||||
|
||||
SUBDIRS = ctestfw toolutil makeconv genrb genuca genbrk \
|
||||
genprops gencase gennames genpname gennorm gencmn gencnval gensprep genccode pkgdata \
|
||||
genprops gencase genbidi gennames genpname gennorm gencmn gencnval gensprep genccode pkgdata \
|
||||
gentest icuswap
|
||||
|
||||
## List of phony targets
|
||||
|
9
icu4c/source/tools/genbidi/.cvsignore
Normal file
9
icu4c/source/tools/genbidi/.cvsignore
Normal file
@ -0,0 +1,9 @@
|
||||
Debug
|
||||
Release
|
||||
Makefile
|
||||
*.d
|
||||
*.o
|
||||
*.pdb
|
||||
*.plg
|
||||
genbidi
|
||||
genbidi.[0-9]
|
100
icu4c/source/tools/genbidi/Makefile.in
Normal file
100
icu4c/source/tools/genbidi/Makefile.in
Normal file
@ -0,0 +1,100 @@
|
||||
## Makefile.in for ICU - tools/genbidi
|
||||
## Copyright (c) 1999-2004, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
## Steven R. Loomis
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
##
|
||||
|
||||
TARGET_STUB_NAME = genbidi
|
||||
|
||||
SECTION = 8
|
||||
|
||||
MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
|
||||
|
||||
## Build directory information
|
||||
subdir = tools/genbidi
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(MAN_FILES)
|
||||
|
||||
## Target information
|
||||
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
|
||||
|
||||
CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
|
||||
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
|
||||
|
||||
OBJECTS = genbidi.o store.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check \
|
||||
check-local install-man
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET) $(MAN_FILES)
|
||||
|
||||
install-local: all-local install-man
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
|
||||
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
|
||||
|
||||
# man page
|
||||
install-man: $(MAN_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
|
||||
%.$(SECTION): $(srcdir)/%.$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
# build postscript and pdf formats
|
||||
#$(TARGET).ps: $(TARGET).$(SECTION)
|
||||
# groff -man < $< > $@
|
||||
|
||||
#$(TARGET).pdf: $(TARGET).ps
|
||||
# ps2pdf $< $@
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(TARGET) $(OBJECTS)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
128
icu4c/source/tools/genbidi/genbidi.8.in
Normal file
128
icu4c/source/tools/genbidi/genbidi.8.in
Normal file
@ -0,0 +1,128 @@
|
||||
.\" Hey, Emacs! This is -*-nroff-*- you know...
|
||||
.\"
|
||||
.\" genbidi.8: manual page for the genbidi utility
|
||||
.\"
|
||||
.\" Copyright (C) 2004 IBM, Inc. and others.
|
||||
.\"
|
||||
.TH GENCASE 8 "30 December 2004" "ICU MANPAGE" "ICU @VERSION@ Manual"
|
||||
.SH NAME
|
||||
.B genbidi
|
||||
\- compile bidi/shaping properties from the Unicode Character Database
|
||||
.SH SYNOPSIS
|
||||
.B genbidi
|
||||
[
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
]
|
||||
[
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
]
|
||||
[
|
||||
.BI "\-u\fP, \fB\-\-unicode" " version"
|
||||
]
|
||||
[
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
]
|
||||
[
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
]
|
||||
[
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
]
|
||||
[
|
||||
.BI "\-i\fP, \fB\-\-icudatadir" " path"
|
||||
]
|
||||
[
|
||||
.I suffix
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.B genbidi
|
||||
reads some of the Unicode Character Database files and compiles their
|
||||
information information into a binary form.
|
||||
The resulting file,
|
||||
.BR ubidi.icu ,
|
||||
can then be read directly by ICU, or used by
|
||||
.BR pkgdata (8)
|
||||
for incorporation into a larger archive or library.
|
||||
.LP
|
||||
The files read by
|
||||
.B genbidi
|
||||
are described in the
|
||||
.B FILES
|
||||
section. If
|
||||
.I suffix
|
||||
is passed on the command line, the names of these files will actually
|
||||
be changed to include a dash followed by
|
||||
.I suffix
|
||||
in their basename. For example, the file
|
||||
.B UnicodeData.txt
|
||||
would be looked for under the name
|
||||
.BR UnicodeData\-\fIsuffix\fP.txt .
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
Print help about usage and exit.
|
||||
.TP
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
Display extra informative messages during execution.
|
||||
.TP
|
||||
.BI "\-u\fP, \fB\-\-unicode" " version"
|
||||
Specify which
|
||||
.I version
|
||||
of Unicode the Unicode Character Database refers to.
|
||||
Defaults to
|
||||
.BR 3.0.0 .
|
||||
.TP
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
Include a copyright notice into the binary data.
|
||||
.TP
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
Set the source directory to
|
||||
.IR source .
|
||||
The default source directory is the current working directory.
|
||||
.TP
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
Set the destination directory to
|
||||
.IR destination .
|
||||
The default destination directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.TP
|
||||
.BI "\-i\fP, \fB\-\-icudatadir" " path"
|
||||
Set the directory for loading ICU data files to
|
||||
.IR path .
|
||||
The default ICU data directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.SH ENVIRONMENT
|
||||
.TP 10
|
||||
.B ICU_DATA
|
||||
Specifies the directory containing ICU data. Defaults to
|
||||
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
|
||||
Some tools in ICU depend on the presence of the trailing slash. It is thus
|
||||
important to make sure that it is present if
|
||||
.B ICU_DATA
|
||||
is set.
|
||||
.SH FILES
|
||||
The following files are read by
|
||||
.B genbidi
|
||||
and are looked for in the
|
||||
.I source
|
||||
directory.
|
||||
.TP 20
|
||||
.B UnicodeData.txt
|
||||
The main file in the Unicode Character Database. Contains character
|
||||
properties, combining classes information, decompositions, names,
|
||||
etc.\|.\|..
|
||||
.TP
|
||||
.B PropList.txt
|
||||
Listing of auxiliary binary character properties.
|
||||
.TP
|
||||
.B DerivedJoiningGroup.txt
|
||||
Unicode Joining_Group property for Arabic shaping.
|
||||
.TP
|
||||
.B DerivedJoiningType.txt
|
||||
Unicode Joining_Type property for Arabic shaping.
|
||||
.SH VERSION
|
||||
@VERSION@
|
||||
.SH COPYRIGHT
|
||||
Copyright (C) 2004 IBM, Inc. and others.
|
||||
.SH SEE ALSO
|
||||
.BR pkgdata (8)
|
706
icu4c/source/tools/genbidi/genbidi.c
Normal file
706
icu4c/source/tools/genbidi/genbidi.c
Normal file
@ -0,0 +1,706 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genbidi.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004dec30
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This program reads several of the Unicode character database text files,
|
||||
* parses them, and extracts the bidi/shaping properties for each character.
|
||||
* It then writes a binary file containing the properties
|
||||
* that is designed to be used directly for random-access to
|
||||
* the properties of each Unicode character.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uclean.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uarrsort.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
#include "propsvec.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "genbidi.h"
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
uint32_t *pv;
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static UBool
|
||||
isToken(const char *token, const char *s);
|
||||
|
||||
static void
|
||||
parseBidiMirroring(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
/* miscellaneous ------------------------------------------------------------ */
|
||||
|
||||
/* TODO: more common code, move functions to uparse.h|c */
|
||||
|
||||
static char *
|
||||
trimTerminateField(char *s, char *limit) {
|
||||
/* trim leading whitespace */
|
||||
s=(char *)u_skipWhitespace(s);
|
||||
|
||||
/* trim trailing whitespace */
|
||||
while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
|
||||
--limit;
|
||||
}
|
||||
*limit=0;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static void
|
||||
parseTwoFieldFile(char *filename, char *basename,
|
||||
const char *ucdFile, const char *suffix,
|
||||
UParseLineFn *lineFn,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
bidiClassLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* parse files with single enumerated properties ---------------------------- */
|
||||
|
||||
/* TODO: more common code, move functions to uparse.h|c */
|
||||
|
||||
struct SingleEnum {
|
||||
const char *ucdFile, *propName;
|
||||
UProperty prop;
|
||||
int32_t vecWord, vecShift;
|
||||
uint32_t vecMask;
|
||||
};
|
||||
typedef struct SingleEnum SingleEnum;
|
||||
|
||||
static void
|
||||
parseSingleEnumFile(char *filename, char *basename, const char *suffix,
|
||||
const SingleEnum *sen,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static const SingleEnum jtSingleEnum={
|
||||
"DerivedJoiningType", "joining type",
|
||||
UCHAR_JOINING_TYPE,
|
||||
0, UBIDI_JT_SHIFT, UBIDI_JT_MASK
|
||||
};
|
||||
|
||||
static const SingleEnum jgSingleEnum={
|
||||
"DerivedJoiningGroup", "joining group",
|
||||
UCHAR_JOINING_GROUP,
|
||||
0, UBIDI_JG_SHIFT, UBIDI_JG_MASK
|
||||
};
|
||||
|
||||
static void U_CALLCONV
|
||||
singleEnumLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
const SingleEnum *sen;
|
||||
char *s;
|
||||
uint32_t start, limit, uv;
|
||||
int32_t value;
|
||||
|
||||
sen=(const SingleEnum *)context;
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse property alias */
|
||||
s=trimTerminateField(fields[1][0], fields[1][1]);
|
||||
value=u_getPropertyValueEnum(sen->prop, s);
|
||||
if(value<0) {
|
||||
if(sen->prop==UCHAR_BLOCK) {
|
||||
if(isToken("Greek", s)) {
|
||||
value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */
|
||||
} else if(isToken("Combining Marks for Symbols", s)) {
|
||||
value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
|
||||
} else if(isToken("Private Use", s)) {
|
||||
value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */
|
||||
}
|
||||
}
|
||||
}
|
||||
if(value<0) {
|
||||
fprintf(stderr, "genbidi error: unknown %s name in %s.txt field 1 at %s\n",
|
||||
sen->propName, sen->ucdFile, s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
uv=(uint32_t)(value<<sen->vecShift);
|
||||
if((uv&sen->vecMask)!=uv) {
|
||||
fprintf(stderr, "genbidi error: %s value overflow (0x%x) at %s\n",
|
||||
sen->propName, (int)uv, s);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, limit, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
|
||||
sen->propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseSingleEnumFile(char *filename, char *basename, const char *suffix,
|
||||
const SingleEnum *sen,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, sen->ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, singleEnumLineFn, (void *)sen, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", sen->ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
/* parse files with multiple binary properties ------------------------------ */
|
||||
|
||||
/* TODO: more common code, move functions to uparse.h|c */
|
||||
|
||||
/* TODO: similar to genbidi/props2.c but not the same; same as in gencase/gencase.c */
|
||||
|
||||
struct Binary {
|
||||
const char *propName;
|
||||
int32_t vecWord;
|
||||
uint32_t vecValue, vecMask;
|
||||
};
|
||||
typedef struct Binary Binary;
|
||||
|
||||
struct Binaries {
|
||||
const char *ucdFile;
|
||||
const Binary *binaries;
|
||||
int32_t binariesCount;
|
||||
};
|
||||
typedef struct Binaries Binaries;
|
||||
|
||||
static const Binary
|
||||
propListNames[]={
|
||||
{ "Bidi_Control", 0, U_MASK(UBIDI_BIDI_CONTROL_SHIFT), U_MASK(UBIDI_BIDI_CONTROL_SHIFT) },
|
||||
{ "Join_Control", 0, U_MASK(UBIDI_JOIN_CONTROL_SHIFT), U_MASK(UBIDI_JOIN_CONTROL_SHIFT) }
|
||||
};
|
||||
|
||||
static const Binaries
|
||||
propListBinaries={
|
||||
"PropList", propListNames, LENGTHOF(propListNames)
|
||||
};
|
||||
|
||||
static void U_CALLCONV
|
||||
binariesLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
const Binaries *bin;
|
||||
char *s;
|
||||
uint32_t start, limit;
|
||||
int32_t i;
|
||||
|
||||
bin=(const Binaries *)context;
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse binary property name */
|
||||
s=(char *)u_skipWhitespace(fields[1][0]);
|
||||
for(i=0;; ++i) {
|
||||
if(i==bin->binariesCount) {
|
||||
/* ignore unrecognized properties */
|
||||
return;
|
||||
}
|
||||
if(isToken(bin->binaries[i].propName, s)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(bin->binaries[i].vecMask==0) {
|
||||
fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n",
|
||||
(int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
|
||||
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseBinariesFile(char *filename, char *basename, const char *suffix,
|
||||
const Binaries *bin,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, bin->ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
enum {
|
||||
HELP_H,
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR,
|
||||
UNICODE_VERSION,
|
||||
ICUDATADIR
|
||||
};
|
||||
|
||||
/* Keep these values in sync with the above enums */
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
|
||||
UOPTION_ICUDATADIR
|
||||
};
|
||||
|
||||
extern int
|
||||
main(int argc, char* argv[]) {
|
||||
char filename[300];
|
||||
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
|
||||
char *basename=NULL;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
U_MAIN_INIT_ARGS(argc, argv);
|
||||
|
||||
/* preset then read command line options */
|
||||
options[DESTDIR].value=u_getDataDirectory();
|
||||
options[SOURCEDIR].value="";
|
||||
options[UNICODE_VERSION].value="";
|
||||
options[ICUDATADIR].value=u_getDataDirectory();
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
fprintf(stderr,
|
||||
"error in command line argument \"%s\"\n",
|
||||
argv[-argc]);
|
||||
}
|
||||
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
/*
|
||||
* Broken into chucks because the C89 standard says the minimum
|
||||
* required supported string length is 509 bytes.
|
||||
*/
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-options] [suffix]\n"
|
||||
"\n"
|
||||
"read the UnicodeData.txt file and other Unicode properties files and\n"
|
||||
"create a binary file " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE " with the bidi/shaping properties\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
"Options:\n"
|
||||
"\t-h or -? or --help this usage text\n"
|
||||
"\t-v or --verbose verbose output\n"
|
||||
"\t-c or --copyright include a copyright notice\n"
|
||||
"\t-u or --unicode Unicode version, followed by the version like 3.0.0\n");
|
||||
fprintf(stderr,
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
||||
"\t followed by path, defaults to %s\n"
|
||||
"\tsuffix suffix that is to be appended with a '-'\n"
|
||||
"\t to the source file basenames before opening;\n"
|
||||
"\t 'genbidi new' will read UnicodeData-new.txt etc.\n",
|
||||
u_getDataDirectory());
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
/* get the options values */
|
||||
beVerbose=options[VERBOSE].doesOccur;
|
||||
haveCopyright=options[COPYRIGHT].doesOccur;
|
||||
srcDir=options[SOURCEDIR].value;
|
||||
destDir=options[DESTDIR].value;
|
||||
|
||||
if(argc>=2) {
|
||||
suffix=argv[1];
|
||||
} else {
|
||||
suffix=NULL;
|
||||
}
|
||||
|
||||
if(options[UNICODE_VERSION].doesOccur) {
|
||||
setUnicodeVersion(options[UNICODE_VERSION].value);
|
||||
}
|
||||
/* else use the default dataVersion in store.c */
|
||||
|
||||
if (options[ICUDATADIR].doesOccur) {
|
||||
u_setDataDirectory(options[ICUDATADIR].value);
|
||||
}
|
||||
|
||||
/* prepare the filename beginning with the source dir */
|
||||
uprv_strcpy(filename, srcDir);
|
||||
basename=filename+uprv_strlen(filename);
|
||||
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
||||
*basename++=U_FILE_SEP_CHAR;
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
pv=upvec_open(1, 10000);
|
||||
|
||||
/* process BidiMirroring.txt */
|
||||
writeUCDFilename(basename, "BidiMirroring", suffix);
|
||||
parseBidiMirroring(filename, &errorCode);
|
||||
|
||||
/* process additional properties files */
|
||||
*basename=0;
|
||||
|
||||
parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);
|
||||
|
||||
parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, &errorCode);
|
||||
|
||||
parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, &errorCode);
|
||||
|
||||
/* process UnicodeData.txt */
|
||||
writeUCDFilename(basename, "UnicodeData", suffix);
|
||||
parseDB(filename, &errorCode);
|
||||
|
||||
/* set proper bidi class for unassigned code points (Cn) */
|
||||
parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, &errorCode);
|
||||
|
||||
/* process parsed data */
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
/* write the properties data file */
|
||||
generateData(destDir);
|
||||
}
|
||||
|
||||
u_cleanup();
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
writeUCDFilename(char *basename, const char *filename, const char *suffix) {
|
||||
int32_t length=(int32_t)uprv_strlen(filename);
|
||||
uprv_strcpy(basename, filename);
|
||||
if(suffix!=NULL) {
|
||||
basename[length++]='-';
|
||||
uprv_strcpy(basename+length, suffix);
|
||||
length+=(int32_t)uprv_strlen(suffix);
|
||||
}
|
||||
uprv_strcpy(basename+length, ".txt");
|
||||
}
|
||||
|
||||
/* TODO: move to toolutil */
|
||||
static UBool
|
||||
isToken(const char *token, const char *s) {
|
||||
const char *z;
|
||||
int32_t j;
|
||||
|
||||
s=u_skipWhitespace(s);
|
||||
for(j=0;; ++j) {
|
||||
if(token[j]!=0) {
|
||||
if(s[j]!=token[j]) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
z=u_skipWhitespace(s+j);
|
||||
if(*z==';' || *z==0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
|
||||
const char *t, *z;
|
||||
int32_t i, j;
|
||||
|
||||
s=u_skipWhitespace(s);
|
||||
for(i=0; i<countTokens; ++i) {
|
||||
t=tokens[i];
|
||||
if(t!=NULL) {
|
||||
for(j=0;; ++j) {
|
||||
if(t[j]!=0) {
|
||||
if(s[j]!=t[j]) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
z=u_skipWhitespace(s+j);
|
||||
if(*z==';' || *z==0 || *z=='#' || *z=='\r' || *z=='\n') {
|
||||
return i;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* parser for BidiMirroring.txt --------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
mirrorLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *end;
|
||||
UChar32 src, mirror;
|
||||
|
||||
src=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
mirror=(UChar32)uprv_strtoul(fields[1][0], &end, 16);
|
||||
if(end<=fields[1][0] || end!=fields[1][1]) {
|
||||
fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
addMirror(src, mirror);
|
||||
}
|
||||
|
||||
static void
|
||||
parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode);
|
||||
}
|
||||
|
||||
/* parser for UnicodeData.txt ----------------------------------------------- */
|
||||
|
||||
const char *const
|
||||
bidiNames[U_CHAR_DIRECTION_COUNT]={
|
||||
"L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S",
|
||||
"WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN"
|
||||
};
|
||||
|
||||
static void U_CALLCONV
|
||||
unicodeDataLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *end;
|
||||
UErrorCode errorCode;
|
||||
UChar32 c;
|
||||
int32_t i;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
|
||||
/* get the character code, field 0 */
|
||||
c=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "genbidi: syntax error in field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get BiDi class, field 4 */
|
||||
i=getTokenIndex(bidiNames, U_CHAR_DIRECTION_COUNT, fields[4][0]);
|
||||
if(i>=0) {
|
||||
if(!upvec_setValue(pv, c, c+1, 0, (uint32_t)i, UBIDI_CLASS_MASK, &errorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set bidi class for U+%04lx, code: %s\n",
|
||||
(long)c, u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "genbidi: unknown BiDi class \"%s\" at U+%04lx\n",
|
||||
fields[4][0], (long)c);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get Mirrored flag, field 9 */
|
||||
if(*fields[9][0]=='Y') {
|
||||
if(!upvec_setValue(pv, c, c+1, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
|
||||
(long)c, u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
} else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') {
|
||||
fprintf(stderr, "genbidi: syntax error in field 9 at U+%04lx\n",
|
||||
(long)c);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode) {
|
||||
/* default Bidi classes for unassigned code points */
|
||||
static const UChar32 defaultBidi[][3]={ /* { start, end, class } */
|
||||
/* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */
|
||||
{ 0x0590, 0x05FF, U_RIGHT_TO_LEFT },
|
||||
{ 0x07C0, 0x08FF, U_RIGHT_TO_LEFT },
|
||||
{ 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT },
|
||||
{ 0x10800, 0x10FFF, U_RIGHT_TO_LEFT },
|
||||
|
||||
/* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */
|
||||
{ 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC },
|
||||
{ 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC }
|
||||
|
||||
/* L otherwise */
|
||||
};
|
||||
|
||||
char *fields[15][2];
|
||||
UChar32 start, end;
|
||||
int32_t i;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set default Bidi classes for unassigned code points.
|
||||
* See the documentation for Bidi_Class in UCD.html in the Unicode data.
|
||||
* http://www.unicode.org/Public/
|
||||
*/
|
||||
for(i=0; i<LENGTHOF(defaultBidi); ++i) {
|
||||
start=defaultBidi[i][0];
|
||||
end=defaultBidi[i][1];
|
||||
if(!upvec_setValue(pv, start, end+1, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
|
||||
(long)start, (long)end, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* DerivedBidiClass.txt ----------------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
bidiClassLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *s;
|
||||
uint32_t oldStart, start, limit, value, props32;
|
||||
UBool didSet;
|
||||
|
||||
/* get the code point range */
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse bidi class */
|
||||
s=trimTerminateField(fields[1][0], fields[1][1]);
|
||||
value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s);
|
||||
if((int32_t)value<0) {
|
||||
fprintf(stderr, "genbidi error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
didSet=FALSE;
|
||||
oldStart=start;
|
||||
for(; start<limit; ++start) {
|
||||
props32=upvec_getValue(pv, start, 0);
|
||||
|
||||
/* ignore if this bidi class is already set */
|
||||
if(value==UBIDI_GET_CLASS(props32)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* set only if the bidi class is 0 (L);
|
||||
* intended to set only for unassigned code points (Cn)
|
||||
* but we don't have general categories (like Cn) available here
|
||||
*/
|
||||
if(UBIDI_GET_CLASS(props32)!=0) {
|
||||
/* error if this one contradicts what we parsed from UnicodeData.txt */
|
||||
fprintf(stderr, "genbidi error: different bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* set bidi class for Cn according to DerivedBidiClass.txt */
|
||||
didSet=TRUE;
|
||||
}
|
||||
|
||||
if(didSet) {
|
||||
if(beVerbose) {
|
||||
printf("setting U+%04x..U+%04x bidi class %d\n", (int)oldStart, (int)limit-1, (int)value);
|
||||
}
|
||||
if(!upvec_setValue(pv, oldStart, limit, 0, value, UBIDI_CLASS_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n",
|
||||
(int)oldStart, (int)limit-1, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
194
icu4c/source/tools/genbidi/genbidi.dsp
Normal file
194
icu4c/source/tools/genbidi/genbidi.dsp
Normal file
@ -0,0 +1,194 @@
|
||||
# Microsoft Developer Studio Project File - Name="genbidi" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=genbidi - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "genbidi.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "genbidi.mak" CFG="genbidi - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "genbidi - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "genbidi - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "genbidi - Win64 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "genbidi - Win64 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "genbidi - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /G6 /MD /Za /W3 /GX /O2 /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release" /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\genbidi.exe
|
||||
InputPath=.\Release\genbidi.exe
|
||||
InputName=genbidi
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "genbidi - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /G6 /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 icuucd.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug" /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\genbidi.exe
|
||||
InputPath=.\Debug\genbidi.exe
|
||||
InputName=genbidi
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "genbidi - Win64 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /I "..\toolutil" /I "..\..\common" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release" /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\genbidi.exe
|
||||
InputPath=.\Release\genbidi.exe
|
||||
InputName=genbidi
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "genbidi - Win64 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /I "..\toolutil" /I "..\..\common" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GZ /Wp64 /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
|
||||
# ADD LINK32 icuucd.lib icutud.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug" /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\genbidi.exe
|
||||
InputPath=.\Debug\genbidi.exe
|
||||
InputName=genbidi
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "genbidi - Win32 Release"
|
||||
# Name "genbidi - Win32 Debug"
|
||||
# Name "genbidi - Win64 Release"
|
||||
# Name "genbidi - Win64 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\genbidi.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\genbidi.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\store.c
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
47
icu4c/source/tools/genbidi/genbidi.h
Normal file
47
icu4c/source/tools/genbidi/genbidi.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genbidi.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004dec30
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __GENBIDI_H__
|
||||
#define __GENBIDI_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/* genbidi ------------------------------------------------------------------ */
|
||||
|
||||
/* global flags */
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
|
||||
/* properties vectors in genbidi.c */
|
||||
extern uint32_t *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
writeUCDFilename(char *basename, const char *filename, const char *suffix);
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v);
|
||||
|
||||
extern void
|
||||
addMirror(UChar32 src, UChar32 mirror);
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
167
icu4c/source/tools/genbidi/genbidi.vcproj
Normal file
167
icu4c/source/tools/genbidi/genbidi.vcproj
Normal file
@ -0,0 +1,167 @@
|
||||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="7.10"
|
||||
Name="genbidi"
|
||||
ProjectGUID="{DB312A49-12A9-4E07-9E96-451DC2D8FF62}"
|
||||
SccProjectName=""
|
||||
SccLocalPath="">
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"/>
|
||||
</Platforms>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory=".\Release"
|
||||
IntermediateDirectory=".\Release"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
InlineFunctionExpansion="2"
|
||||
ImproveFloatingPointConsistency="TRUE"
|
||||
AdditionalIncludeDirectories="..\toolutil,..\..\common"
|
||||
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
|
||||
StringPooling="TRUE"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="TRUE"
|
||||
DisableLanguageExtensions="TRUE"
|
||||
PrecompiledHeaderFile=".\Release/genbidi.pch"
|
||||
AssemblerListingLocation=".\Release/"
|
||||
ObjectFile=".\Release/"
|
||||
ProgramDataBaseFileName=".\Release/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy "$(TargetPath)" ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(TargetFileName)"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile=".\Release/genbidi.exe"
|
||||
LinkIncremental="1"
|
||||
SuppressStartupBanner="TRUE"
|
||||
ProgramDatabaseFile=".\Release/genbidi.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Release/genbidi.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
<Tool
|
||||
Name="VCManagedWrapperGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory=".\Debug"
|
||||
IntermediateDirectory=".\Debug"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
ImproveFloatingPointConsistency="TRUE"
|
||||
OptimizeForProcessor="2"
|
||||
AdditionalIncludeDirectories="..\toolutil,..\..\common"
|
||||
PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
DisableLanguageExtensions="TRUE"
|
||||
PrecompiledHeaderFile=".\Debug/genbidi.pch"
|
||||
AssemblerListingLocation=".\Debug/"
|
||||
ObjectFile=".\Debug/"
|
||||
ProgramDataBaseFileName=".\Debug/"
|
||||
BrowseInformation="1"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
DebugInformationFormat="4"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy "$(TargetPath)" ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(TargetFileName)"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile=".\Debug/genbidi.exe"
|
||||
LinkIncremental="2"
|
||||
SuppressStartupBanner="TRUE"
|
||||
GenerateDebugInformation="TRUE"
|
||||
ProgramDatabaseFile=".\Debug/genbidi.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Debug/genbidi.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
<Tool
|
||||
Name="VCManagedWrapperGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="c;cpp;rc">
|
||||
<File
|
||||
RelativePath=".\genbidi.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\store.c">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h">
|
||||
<File
|
||||
RelativePath=".\genbidi.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
356
icu4c/source/tools/genbidi/store.c
Normal file
356
icu4c/source/tools/genbidi/store.c
Normal file
@ -0,0 +1,356 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: store.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004dec30
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store Unicode bidi/shaping properties efficiently for
|
||||
* random access.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "utrie.h"
|
||||
#include "uarrsort.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unewdata.h"
|
||||
#include "propsvec.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "genbidi.h"
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* Unicode bidi/shaping properties file format ---------------------------------
|
||||
|
||||
The file format prepared and written here contains several data
|
||||
structures that store indexes or data.
|
||||
|
||||
Before the data contents described below, there are the headers required by
|
||||
the udata API for loading ICU data. Especially, a UDataInfo structure
|
||||
precedes the actual data. It contains platform properties values and the
|
||||
file format version.
|
||||
|
||||
The following is a description of format version 1.0 .
|
||||
|
||||
The file contains the following structures:
|
||||
|
||||
const int32_t indexes[i0] with values i0, i1, ...:
|
||||
(see UBIDI_IX_... constants for names of indexes)
|
||||
|
||||
i0 indexLength; -- length of indexes[] (UBIDI_IX_TOP)
|
||||
i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
|
||||
i2 trieSize; -- size in bytes of the case mapping properties trie
|
||||
i3 mirrorLength; -- length in uint32_t of the bidi mirroring array
|
||||
|
||||
i4..i14 reservedIndexes; -- reserved values; 0 for now
|
||||
|
||||
i15 maxValues; -- maximum code values for enumerated properties
|
||||
|
||||
Serialized trie, see utrie.h;
|
||||
|
||||
const uint32_t mirrors[mirrorLength];
|
||||
|
||||
|
||||
Trie data word:
|
||||
Bits
|
||||
31..29 signed delta to bidi mirroring code point
|
||||
(add delta to input code point)
|
||||
0 no such code point (source maps to itself)
|
||||
-3..-1, 1..3 delta
|
||||
-4 look in mirrors table
|
||||
28 is mirrored
|
||||
27 Bidi_Control
|
||||
26 Join_Control
|
||||
25..14 reserved (0)
|
||||
13.. 8 Joining_Group
|
||||
7.. 5 Joining_Type
|
||||
4.. 0 BiDi category
|
||||
|
||||
|
||||
Mirrors:
|
||||
Stores some of the bidi mirroring data, where each code point maps to
|
||||
at most one other.
|
||||
Most code points do not have a mirroring code point; most that do have a signed
|
||||
delta stored in the trie data value. Only those where the delta does not fit
|
||||
into the trie data are stored in this table.
|
||||
|
||||
Logically, this is a two-column table with source and mirror code points.
|
||||
|
||||
Physically, the table is compressed by taking advantage of the fact that each
|
||||
mirror code point is also a source code point
|
||||
(each of them is a mirror of the other).
|
||||
Therefore, both logical columns contain the same set of code points, which needs
|
||||
to be stored only once.
|
||||
|
||||
The table stores source code points, and also for each the index of its mirror
|
||||
code point in the same table, in a simple array of uint32_t.
|
||||
Bits
|
||||
31..21 index to mirror code point (unsigned)
|
||||
20.. 0 source code point
|
||||
|
||||
The table is sorted by source code points.
|
||||
----------------------------------------------------------------------------- */
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
static UDataInfo dataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
U_SIZEOF_UCHAR,
|
||||
0,
|
||||
|
||||
/* dataFormat="BiDi" */
|
||||
{ UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
|
||||
{ 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
|
||||
{ 4, 0, 1, 0 } /* dataVersion */
|
||||
};
|
||||
|
||||
/* exceptions values */
|
||||
static uint32_t mirrors[UBIDI_MAX_MIRROR_INDEX+1][2];
|
||||
static uint16_t mirrorTop=0;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v) {
|
||||
UVersionInfo version;
|
||||
u_versionFromString(version, v);
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
/* bidi mirroring table ----------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
addMirror(UChar32 src, UChar32 mirror) {
|
||||
UErrorCode errorCode;
|
||||
int32_t delta;
|
||||
|
||||
delta=mirror-src;
|
||||
if(delta==0) {
|
||||
return; /* mapping to self=no mapping */
|
||||
}
|
||||
|
||||
if(delta<UBIDI_MIN_MIRROR_DELTA || UBIDI_MAX_MIRROR_DELTA<delta) {
|
||||
/* delta does not fit into the trie properties value, store in the mirrors[] table */
|
||||
if(mirrorTop==LENGTHOF(mirrors)) {
|
||||
fprintf(stderr, "genbidi error: too many long-distance mirroring mappings\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
|
||||
/* possible: search the table so far and see if src is already listed */
|
||||
|
||||
mirrors[mirrorTop][0]=(uint32_t)src;
|
||||
mirrors[mirrorTop][1]=(uint32_t)mirror;
|
||||
++mirrorTop;
|
||||
|
||||
/* set an escape marker in src's properties */
|
||||
delta=UBIDI_ESC_MIRROR_DELTA;
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(
|
||||
!upvec_setValue(
|
||||
pv, src, src+1, 0,
|
||||
(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT, (uint32_t)(-1)<<UBIDI_MIRROR_DELTA_SHIFT,
|
||||
&errorCode)
|
||||
) {
|
||||
fprintf(stderr, "genbidi error: unable to set mirroring delta, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
compareMirror(const void *context, const void *left, const void *right) {
|
||||
UChar32 l, r;
|
||||
|
||||
l=UBIDI_GET_MIRROR_CODE_POINT(((const uint32_t *)left)[0]);
|
||||
r=UBIDI_GET_MIRROR_CODE_POINT(((const uint32_t *)right)[0]);
|
||||
return l-r;
|
||||
}
|
||||
|
||||
static void
|
||||
makeMirror() {
|
||||
uint32_t *reducedMirror;
|
||||
UErrorCode errorCode;
|
||||
int32_t i, j, start, limit, step;
|
||||
uint32_t c;
|
||||
|
||||
/* sort the mirroring table by source code points */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
uprv_sortArray(mirrors, mirrorTop, 8,
|
||||
compareMirror, NULL, FALSE, &errorCode);
|
||||
|
||||
/*
|
||||
* reduce the 2-column table to a single column
|
||||
* by putting the index to the mirror entry into the source entry
|
||||
*
|
||||
* first:
|
||||
* find each mirror code point in the source column and set each other's indexes
|
||||
*
|
||||
* second:
|
||||
* reduce the table, combine the source code points with their indexes
|
||||
* and store as a simple array of uint32_t
|
||||
*/
|
||||
for(i=0; i<mirrorTop; ++i) {
|
||||
c=mirrors[i][1]; /* mirror code point */
|
||||
if(c>0x1fffff) {
|
||||
continue; /* this entry already has an index */
|
||||
}
|
||||
|
||||
/* search for the mirror code point in the source column */
|
||||
if(c<mirrors[i][0]) {
|
||||
/* search before i */
|
||||
start=i-1;
|
||||
limit=-1;
|
||||
step=-1;
|
||||
} else {
|
||||
start=i+1;
|
||||
limit=mirrorTop;
|
||||
step=1;
|
||||
}
|
||||
|
||||
for(j=start;; j+=step) {
|
||||
if(j==limit) {
|
||||
fprintf(stderr,
|
||||
"genbidi error: bidi mirror does not roundtrip - %04lx->%04lx->?\n",
|
||||
(long)mirrors[i][0], (long)mirrors[i][1]);
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if(c==mirrors[j][0]) {
|
||||
/*
|
||||
* found the mirror code point c in the source column,
|
||||
* set both entries' indexes to each other
|
||||
*/
|
||||
if(UBIDI_GET_MIRROR_CODE_POINT(mirrors[i][0])!=UBIDI_GET_MIRROR_CODE_POINT(mirrors[j][1])) {
|
||||
/* roundtrip check fails */
|
||||
fprintf(stderr,
|
||||
"genbidi error: bidi mirrors do not roundtrip - %04lx->%04lx->%04lx\n",
|
||||
(long)mirrors[i][0], (long)mirrors[i][1], (long)mirrors[j][1]);
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
mirrors[i][1]|=(uint32_t)j<<UBIDI_MIRROR_INDEX_SHIFT;
|
||||
mirrors[j][1]|=(uint32_t)i<<UBIDI_MIRROR_INDEX_SHIFT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now the second step, the actual reduction of the table */
|
||||
reducedMirror=mirrors[0];
|
||||
for(i=0; i<mirrorTop; ++i) {
|
||||
reducedMirror[i]=mirrors[i][0]|(mirrors[i][1]&~0x1fffff);
|
||||
}
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate output data ----------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir) {
|
||||
static int32_t indexes[UBIDI_IX_TOP]={
|
||||
UBIDI_IX_TOP
|
||||
};
|
||||
static uint8_t trieBlock[40000];
|
||||
|
||||
const uint32_t *row;
|
||||
UChar32 start, limit;
|
||||
int32_t i;
|
||||
|
||||
UNewDataMemory *pData;
|
||||
UNewTrie *pTrie;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t trieSize;
|
||||
long dataLength;
|
||||
|
||||
makeMirror();
|
||||
|
||||
pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
|
||||
if(pTrie==NULL) {
|
||||
fprintf(stderr, "genbidi error: unable to create a UNewTrie\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
|
||||
for(i=0; (row=upvec_getRow(pv, i, &start, &limit))!=NULL; ++i) {
|
||||
if(!utrie_setRange32(pTrie, start, limit, *row, TRUE)) {
|
||||
fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, FALSE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
indexes[UBIDI_IX_TRIE_SIZE]=trieSize;
|
||||
indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop;
|
||||
indexes[UBIDI_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+4*mirrorTop;
|
||||
|
||||
if(beVerbose) {
|
||||
printf("trie size in bytes: %5d\n", (int)trieSize);
|
||||
printf("size in bytes of mirroring table: %5d\n", 4*mirrorTop);
|
||||
printf("data size: %5d\n", (int)indexes[UBIDI_IX_LENGTH]);
|
||||
}
|
||||
|
||||
indexes[UBIDI_MAX_VALUES_INDEX]=
|
||||
((int32_t)U_CHAR_DIRECTION_COUNT-1)|
|
||||
(((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)|
|
||||
(((int32_t)U_JG_COUNT-1)<<UBIDI_JG_SHIFT);
|
||||
|
||||
/* write the data */
|
||||
pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||
udata_writeBlock(pData, trieBlock, trieSize);
|
||||
udata_writeBlock(pData, mirrors, 4*mirrorTop);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
if(dataLength!=indexes[UBIDI_IX_LENGTH]) {
|
||||
fprintf(stderr, "genbidi: data length %ld != calculated size %d\n",
|
||||
dataLength, (int)indexes[UBIDI_IX_LENGTH]);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
utrie_close(pTrie);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
@ -40,6 +40,7 @@
|
||||
#include "ucnv_io.h"
|
||||
#include "uprops.h"
|
||||
#include "ucase.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "ucol_swp.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unormimp.h"
|
||||
@ -315,6 +316,9 @@ static const struct {
|
||||
{ { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
|
||||
ucase_swap }, /* dataFormat="cAsE" */
|
||||
|
||||
{ { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
|
||||
ubidi_swap }, /* dataFormat="BiDi" */
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
{ { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user