ICU-6453 upvec_ introduce UPropsVectors type, clean up API a bit
X-SVN-Rev: 24866
This commit is contained in:
parent
362d08e00b
commit
35f8ea1402
@ -13,7 +13,7 @@
|
||||
* created on: 2002feb22
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store additional Unicode character properties in bit set vectors.
|
||||
* Store bits (Unicode character properties) in bit set vectors.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -24,22 +24,81 @@
|
||||
#include "uarrsort.h"
|
||||
#include "propsvec.h"
|
||||
|
||||
struct UPropsVectors {
|
||||
uint32_t *v;
|
||||
int32_t columns; /* number of columns, plus two for start & limit values */
|
||||
int32_t maxRows;
|
||||
int32_t rows;
|
||||
int32_t prevRow; /* search optimization: remember last row seen */
|
||||
UBool isCompacted;
|
||||
};
|
||||
|
||||
#define UPVEC_INITIAL_ROWS (1<<14)
|
||||
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<17)
|
||||
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
|
||||
|
||||
U_CAPI UPropsVectors * U_EXPORT2
|
||||
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
|
||||
UPropsVectors *pv;
|
||||
uint32_t *v, *row;
|
||||
uint32_t cp;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(columns<1) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
|
||||
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
|
||||
if(pv==NULL || v==NULL) {
|
||||
uprv_free(pv);
|
||||
uprv_free(v);
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(pv, 0, sizeof(UPropsVectors));
|
||||
pv->v=v;
|
||||
pv->columns=columns+=2; /* count range start and limit columns */
|
||||
pv->maxRows=UPVEC_INITIAL_ROWS;
|
||||
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
|
||||
|
||||
/* set the all-Unicode row and the special-value rows */
|
||||
row=pv->v;
|
||||
uprv_memset(row, 0, pv->rows*columns*4);
|
||||
row[0]=0;
|
||||
row[1]=0x110000;
|
||||
row+=columns;
|
||||
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
|
||||
row[0]=cp;
|
||||
row[1]=cp+1;
|
||||
row+=columns;
|
||||
}
|
||||
return pv;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_close(UPropsVectors *pv) {
|
||||
if(pv!=NULL) {
|
||||
uprv_free(pv->v);
|
||||
uprv_free(pv);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
_findRow(uint32_t *pv, UChar32 rangeStart) {
|
||||
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
|
||||
uint32_t *row;
|
||||
int32_t *hdr;
|
||||
int32_t columns, i, start, limit, prevRow, rows;
|
||||
|
||||
hdr=(int32_t *)pv;
|
||||
columns=hdr[UPVEC_COLUMNS];
|
||||
limit=hdr[UPVEC_ROWS];
|
||||
prevRow=hdr[UPVEC_PREV_ROW];
|
||||
rows=hdr[UPVEC_ROWS];
|
||||
pv+=UPVEC_HEADER_LENGTH;
|
||||
columns=pv->columns;
|
||||
rows=limit=pv->rows;
|
||||
prevRow=pv->prevRow;
|
||||
|
||||
/* check the vicinity of the last-seen row */
|
||||
if(prevRow<rows) {
|
||||
row=pv+prevRow*columns;
|
||||
row=pv->v+prevRow*columns;
|
||||
if(rangeStart>=(UChar32)row[0]) {
|
||||
if(rangeStart<(UChar32)row[1]) {
|
||||
/* same row as last seen */
|
||||
@ -49,7 +108,7 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
|
||||
rangeStart>=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1]
|
||||
) {
|
||||
/* next row after the last one */
|
||||
hdr[UPVEC_PREV_ROW]=prevRow;
|
||||
pv->prevRow=prevRow;
|
||||
return row;
|
||||
}
|
||||
}
|
||||
@ -59,11 +118,11 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
|
||||
start=0;
|
||||
while(start<limit-1) {
|
||||
i=(start+limit)/2;
|
||||
row=pv+i*columns;
|
||||
row=pv->v+i*columns;
|
||||
if(rangeStart<(UChar32)row[0]) {
|
||||
limit=i;
|
||||
} else if(rangeStart<(UChar32)row[1]) {
|
||||
hdr[UPVEC_PREV_ROW]=i;
|
||||
pv->prevRow=i;
|
||||
return row;
|
||||
} else {
|
||||
start=i;
|
||||
@ -71,54 +130,12 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
|
||||
}
|
||||
|
||||
/* must be found because all ranges together always cover all of Unicode */
|
||||
hdr[UPVEC_PREV_ROW]=start;
|
||||
return pv+start*columns;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_open(int32_t columns, int32_t maxRows) {
|
||||
uint32_t *pv, *row;
|
||||
uint32_t cp;
|
||||
int32_t length;
|
||||
|
||||
if(columns<1 || maxRows<1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
columns+=2; /* count range start and limit columns */
|
||||
length=UPVEC_HEADER_LENGTH+maxRows*columns;
|
||||
pv=(uint32_t *)uprv_malloc(length*4);
|
||||
if(pv!=NULL) {
|
||||
/* set header */
|
||||
pv[UPVEC_COLUMNS]=(uint32_t)columns;
|
||||
pv[UPVEC_MAXROWS]=(uint32_t)maxRows;
|
||||
pv[UPVEC_ROWS]=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
|
||||
pv[UPVEC_PREV_ROW]=0;
|
||||
|
||||
/* set the all-Unicode row and the special-value rows */
|
||||
row=pv+UPVEC_HEADER_LENGTH;
|
||||
uprv_memset(row, 0, pv[UPVEC_ROWS]*columns*4);
|
||||
row[0]=0;
|
||||
row[1]=0x110000;
|
||||
row+=columns;
|
||||
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
|
||||
row[0]=cp;
|
||||
row[1]=cp+1;
|
||||
row+=columns;
|
||||
}
|
||||
}
|
||||
return pv;
|
||||
pv->prevRow=start;
|
||||
return pv->v+start*columns;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_close(uint32_t *pv) {
|
||||
if(pv!=NULL) {
|
||||
uprv_free(pv);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
upvec_setValue(uint32_t *pv,
|
||||
upvec_setValue(UPropsVectors *pv,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t column,
|
||||
uint32_t value, uint32_t mask,
|
||||
@ -129,21 +146,24 @@ upvec_setValue(uint32_t *pv,
|
||||
UBool splitFirstRow, splitLastRow;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return FALSE;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if( pv==NULL ||
|
||||
start<0 || start>end || end>UPVEC_MAX_CP ||
|
||||
column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS]
|
||||
column<0 || column>=(pv->columns-2)
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
return;
|
||||
}
|
||||
if(pv->isCompacted) {
|
||||
*pErrorCode=U_NO_WRITE_PERMISSION;
|
||||
return;
|
||||
}
|
||||
limit=end+1;
|
||||
|
||||
/* initialize */
|
||||
columns=(int32_t)pv[UPVEC_COLUMNS];
|
||||
columns=pv->columns;
|
||||
column+=2; /* skip range start and limit columns */
|
||||
value&=mask;
|
||||
|
||||
@ -187,21 +207,39 @@ upvec_setValue(uint32_t *pv,
|
||||
if(splitFirstRow || splitLastRow) {
|
||||
int32_t count, rows;
|
||||
|
||||
rows=(int32_t)pv[UPVEC_ROWS];
|
||||
if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
rows=pv->rows;
|
||||
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
|
||||
uint32_t *newVectors;
|
||||
int32_t newMaxRows;
|
||||
|
||||
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
|
||||
newMaxRows=UPVEC_MEDIUM_ROWS;
|
||||
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
|
||||
newMaxRows=UPVEC_MAX_ROWS;
|
||||
} else {
|
||||
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
|
||||
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
|
||||
if(newVectors==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_free(pv->v);
|
||||
pv->v=newVectors;
|
||||
pv->maxRows=newMaxRows;
|
||||
}
|
||||
|
||||
/* count the number of row cells to move after the last row, and move them */
|
||||
count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns));
|
||||
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
|
||||
if(count>0) {
|
||||
uprv_memmove(
|
||||
lastRow+(1+splitFirstRow+splitLastRow)*columns,
|
||||
lastRow+columns,
|
||||
count*4);
|
||||
}
|
||||
pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow;
|
||||
pv->rows=rows+splitFirstRow+splitLastRow;
|
||||
|
||||
/* split the first row, and move the firstRow pointer to the second part */
|
||||
if(splitFirstRow) {
|
||||
@ -226,7 +264,7 @@ upvec_setValue(uint32_t *pv,
|
||||
}
|
||||
|
||||
/* set the "row last seen" to the last row for the range */
|
||||
pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns);
|
||||
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
|
||||
|
||||
/* set the input value in all remaining rows */
|
||||
firstRow+=column;
|
||||
@ -239,37 +277,36 @@ upvec_setValue(uint32_t *pv,
|
||||
}
|
||||
firstRow+=columns;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) {
|
||||
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
|
||||
uint32_t *row;
|
||||
|
||||
if(pv==NULL || c<0 || c>UPVEC_MAX_CP) {
|
||||
if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
|
||||
return 0;
|
||||
}
|
||||
row=_findRow(pv, c);
|
||||
row=_findRow((UPropsVectors *)pv, c);
|
||||
return row[2+column];
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
||||
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
|
||||
UChar32 *pRangeStart, UChar32 *pRangeEnd) {
|
||||
uint32_t *row;
|
||||
int32_t columns;
|
||||
|
||||
if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) {
|
||||
if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
columns=(int32_t)pv[UPVEC_COLUMNS];
|
||||
row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns;
|
||||
columns=pv->columns;
|
||||
row=pv->v+rowIndex*columns;
|
||||
if(pRangeStart!=NULL) {
|
||||
*pRangeStart=row[0];
|
||||
*pRangeStart=(UChar32)row[0];
|
||||
}
|
||||
if(pRangeEnd!=NULL) {
|
||||
*pRangeEnd=row[1]-1;
|
||||
*pRangeEnd=(UChar32)row[1]-1;
|
||||
}
|
||||
return row+2;
|
||||
}
|
||||
@ -277,10 +314,10 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
||||
static int32_t U_CALLCONV
|
||||
upvec_compareRows(const void *context, const void *l, const void *r) {
|
||||
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
|
||||
const uint32_t *pv=(const uint32_t *)context;
|
||||
const UPropsVectors *pv=(const UPropsVectors *)context;
|
||||
int32_t i, count, columns;
|
||||
|
||||
count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */
|
||||
count=columns=pv->columns; /* includes start/limit columns */
|
||||
|
||||
/* start comparing after start/limit but wrap around to them */
|
||||
i=2;
|
||||
@ -296,38 +333,38 @@ upvec_compareRows(const void *context, const void *l, const void *r) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
|
||||
uint32_t *row;
|
||||
int32_t i, columns, valueColumns, rows, count;
|
||||
UChar32 start, limit;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(pv==NULL || handler==NULL) {
|
||||
if(handler==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
if(pv->isCompacted) {
|
||||
return;
|
||||
}
|
||||
|
||||
rows=(int32_t)pv[UPVEC_ROWS];
|
||||
if(rows==0) {
|
||||
return 0;
|
||||
}
|
||||
/* Set the flag now: Sorting and compacting destroys the builder data structure. */
|
||||
pv->isCompacted=TRUE;
|
||||
|
||||
row=pv+UPVEC_HEADER_LENGTH;
|
||||
columns=(int32_t)pv[UPVEC_COLUMNS];
|
||||
rows=pv->rows;
|
||||
columns=pv->columns;
|
||||
valueColumns=columns-2; /* not counting start & limit */
|
||||
|
||||
/* sort the properties vectors to find unique vector values */
|
||||
if(rows>1) {
|
||||
uprv_sortArray(row, rows, columns*4,
|
||||
uprv_sortArray(pv->v, rows, columns*4,
|
||||
upvec_compareRows, pv, FALSE, pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -335,6 +372,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
|
||||
* This has to do almost the same work as the compaction below,
|
||||
* to find the indexes where the special-value rows will move.
|
||||
*/
|
||||
row=pv->v;
|
||||
count=-valueColumns;
|
||||
for(i=0; i<rows; ++i) {
|
||||
start=(UChar32)row[0];
|
||||
@ -347,7 +385,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
|
||||
if(start>=UPVEC_FIRST_SPECIAL_CP) {
|
||||
handler(context, start, start, count, row+2, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -361,7 +399,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
|
||||
handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
|
||||
count, row-valueColumns, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -371,7 +409,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
|
||||
* This destroys the Properties Vector structure and replaces it
|
||||
* with an array of just vector values.
|
||||
*/
|
||||
row=pv+UPVEC_HEADER_LENGTH;
|
||||
row=pv->v;
|
||||
count=-valueColumns;
|
||||
for(i=0; i<rows; ++i) {
|
||||
/* fetch these first before memmove() may overwrite them */
|
||||
@ -379,30 +417,53 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
|
||||
limit=(UChar32)row[1];
|
||||
|
||||
/* add a new values vector if it is different from the current one */
|
||||
if(count<0 || 0!=uprv_memcmp(row+2, pv+count, valueColumns*4)) {
|
||||
if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
|
||||
count+=valueColumns;
|
||||
uprv_memmove(pv+count, row+2, valueColumns*4);
|
||||
uprv_memmove(pv->v+count, row+2, valueColumns*4);
|
||||
}
|
||||
|
||||
if(start<UPVEC_FIRST_SPECIAL_CP) {
|
||||
handler(context, start, limit-1, count, pv+count, valueColumns, pErrorCode);
|
||||
handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
row+=columns;
|
||||
}
|
||||
|
||||
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
|
||||
return count+valueColumns;
|
||||
/* count is at the beginning of the last vector, add one to include that last vector */
|
||||
pv->rows=count/valueColumns+1;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
|
||||
if(!pv->isCompacted) {
|
||||
return NULL;
|
||||
}
|
||||
if(pRows!=NULL) {
|
||||
*pRows=pv->rows;
|
||||
}
|
||||
if(pColumns!=NULL) {
|
||||
*pColumns=pv->columns-2;
|
||||
}
|
||||
return pv->v;
|
||||
}
|
||||
|
||||
U_CAPI UTrie2 * U_EXPORT2
|
||||
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
|
||||
UPVecToUTrie2Context toUTrie2={ NULL };
|
||||
upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
|
||||
utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
utrie2_close(toUTrie2.trie);
|
||||
toUTrie2.trie=NULL;
|
||||
}
|
||||
return toUTrie2.trie;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO(markus): Add upvec_compactToUTrie2WithRowIndexes() function that returns
|
||||
* a UTrie2 and does not require the caller to pass in a callback function.
|
||||
*
|
||||
* Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
|
||||
* TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
|
||||
* some 16-bit field and builds and returns a UTrie2.
|
||||
*/
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* created on: 2002feb22
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store additional Unicode character properties in bit set vectors.
|
||||
* Store bits (Unicode character properties) in bit set vectors.
|
||||
*/
|
||||
|
||||
#ifndef __UPROPSVEC_H__
|
||||
@ -25,11 +25,10 @@
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/*
|
||||
/**
|
||||
* Unicode Properties Vectors associated with code point ranges.
|
||||
* Stored in an array of uint32_t.
|
||||
*
|
||||
* The array starts with a header, then rows of integers store
|
||||
* Rows of uint32_t integers in a contiguous array store
|
||||
* the range limits and the properties vectors.
|
||||
*
|
||||
* In each row, row[0] contains the start code point and
|
||||
@ -41,15 +40,8 @@ U_CDECL_BEGIN
|
||||
* It would be possible to store only one range boundary per row,
|
||||
* but self-contained rows allow to later sort them by contents.
|
||||
*/
|
||||
enum {
|
||||
/* stores number of columns, plus two for start & limit values */
|
||||
UPVEC_COLUMNS,
|
||||
UPVEC_MAXROWS,
|
||||
UPVEC_ROWS,
|
||||
/* search optimization: remember last row seen */
|
||||
UPVEC_PREV_ROW,
|
||||
UPVEC_HEADER_LENGTH
|
||||
};
|
||||
struct UPropsVectors;
|
||||
typedef struct UPropsVectors UPropsVectors;
|
||||
|
||||
/*
|
||||
* Special pseudo code points for storing the initialValue and the errorValue,
|
||||
@ -67,28 +59,39 @@ enum {
|
||||
*/
|
||||
#define UPVEC_START_REAL_VALUES_CP 0x200000
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_open(int32_t columns, int32_t maxRows);
|
||||
U_CAPI UPropsVectors * U_EXPORT2
|
||||
upvec_open(int32_t columns, UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_close(uint32_t *pv);
|
||||
upvec_close(UPropsVectors *pv);
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
upvec_setValue(uint32_t *pv,
|
||||
/*
|
||||
* In rows for code points [start..end], select the column,
|
||||
* reset the mask bits and set the value bits (ANDed with the mask).
|
||||
*
|
||||
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_setValue(UPropsVectors *pv,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t column,
|
||||
uint32_t value, uint32_t mask,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Logically const but must not be used on the same pv concurrently!
|
||||
* Always returns 0 if called after upvec_compact().
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
upvec_getValue(uint32_t *pv, UChar32 c, int32_t column);
|
||||
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
|
||||
|
||||
/*
|
||||
* pRangeStart and pRangeEnd can be NULL.
|
||||
* @return NULL if rowIndex out of range and for illegal arguments
|
||||
* @return NULL if rowIndex out of range and for illegal arguments,
|
||||
* or if called after upvec_compact()
|
||||
*/
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
||||
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
|
||||
UChar32 *pRangeStart, UChar32 *pRangeEnd);
|
||||
|
||||
/*
|
||||
@ -98,7 +101,7 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
||||
* - store them contiguously from the beginning of the memory
|
||||
* - for each (non-unique) row, call the handler function
|
||||
*
|
||||
* The handler's rowIndex is the uint32_t index of the row in the compacted
|
||||
* The handler's rowIndex is the index of the row in the compacted
|
||||
* memory block.
|
||||
* (Therefore, it starts at 0 increases in increments of the columns value.)
|
||||
*
|
||||
@ -109,19 +112,28 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
||||
* and the row is arbitrary (but not NULL).
|
||||
* Then, in the second phase, the handler is called for each row of real values.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
typedef void U_CALLCONV
|
||||
UPVecCompactHandler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CDECL_END
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
|
||||
/*
|
||||
* Get the vectors array after calling upvec_compact().
|
||||
* Returns NULL if called before upvec_compact().
|
||||
*/
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
|
||||
|
||||
/*
|
||||
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
|
||||
* vectors array, and freeze the trie.
|
||||
*/
|
||||
U_CAPI UTrie2 * U_EXPORT2
|
||||
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
|
||||
|
||||
struct UPVecToUTrieContext {
|
||||
UNewTrie *newTrie;
|
||||
|
@ -63,6 +63,7 @@ struct UConverterSelector {
|
||||
|
||||
/* internal function */
|
||||
static void generateSelectorData(UConverterSelector* result,
|
||||
UPropsVectors *upvec,
|
||||
const USet* excludedCodePoints,
|
||||
const UConverterUnicodeSet whichSet,
|
||||
UErrorCode* status);
|
||||
@ -203,7 +204,9 @@ U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
|
||||
}
|
||||
|
||||
newSelector->encodingsCount = converterListSize;
|
||||
generateSelectorData(newSelector, excludedCodePoints, whichSet, status);
|
||||
UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
|
||||
generateSelectorData(newSelector, upvec, excludedCodePoints, whichSet, status);
|
||||
upvec_close(upvec);
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
// at this point, we know pv and encodings have been allocated. No harm in
|
||||
@ -223,7 +226,7 @@ U_CAPI void ucnvsel_close(UConverterSelector *sel) {
|
||||
}
|
||||
uprv_free(sel->encodings[0]);
|
||||
uprv_free(sel->encodings);
|
||||
upvec_close(sel->pv);
|
||||
uprv_free(sel->pv);
|
||||
utrie2_close(sel->trie);
|
||||
uprv_free(sel);
|
||||
}
|
||||
@ -480,21 +483,19 @@ U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
|
||||
|
||||
/* internal function! */
|
||||
static void generateSelectorData(UConverterSelector* result,
|
||||
UPropsVectors *upvec,
|
||||
const USet* excludedCodePoints,
|
||||
const UConverterUnicodeSet whichSet,
|
||||
UErrorCode* status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t columns = (result->encodingsCount+31)/32;
|
||||
|
||||
// 66000 as suggested by Markus [I suggest something like 66000 which
|
||||
// exceeds the number of BMP code points. There will be fewer ranges of
|
||||
// combinations of encodings. (I believe there are no encodings that have
|
||||
// interesting mappings for supplementary code points. All encodings either
|
||||
// support all of them or none of them.)]
|
||||
result->pv = upvec_open(columns, 66000); // create for all
|
||||
// unicode codepoints, and have space for all those bits needed!
|
||||
// set errorValue to all-ones
|
||||
for (int32_t col = 0 ; col < columns; col++) {
|
||||
upvec_setValue(result->pv, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
|
||||
upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
|
||||
col, ~0, ~0, status);
|
||||
}
|
||||
|
||||
@ -505,7 +506,6 @@ static void generateSelectorData(UConverterSelector* result,
|
||||
int32_t j;
|
||||
UConverter* test_converter = ucnv_open(result->encodings[i], status);
|
||||
if (U_FAILURE(*status)) {
|
||||
// status will propagate back to user
|
||||
return;
|
||||
}
|
||||
USet* unicode_point_set;
|
||||
@ -513,6 +513,10 @@ static void generateSelectorData(UConverterSelector* result,
|
||||
|
||||
ucnv_getUnicodeSet(test_converter, unicode_point_set,
|
||||
whichSet, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
ucnv_close(test_converter);
|
||||
return;
|
||||
}
|
||||
|
||||
column = i / 32;
|
||||
mask = 1 << (i%32);
|
||||
@ -529,18 +533,17 @@ static void generateSelectorData(UConverterSelector* result,
|
||||
// this will be reached for the converters that fill the set with
|
||||
// strings. Those should be ignored by our system
|
||||
} else {
|
||||
upvec_setValue(result->pv, start_char, end_char, column, ~0, mask,
|
||||
upvec_setValue(upvec, start_char, end_char, column, ~0, mask,
|
||||
status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
ucnv_close(test_converter);
|
||||
uset_close(unicode_point_set);
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// handle excluded encodings! Simply set their values to all 1's in the upvec
|
||||
if (excludedCodePoints) {
|
||||
int32_t item_count = uset_getItemCount(excludedCodePoints);
|
||||
@ -550,30 +553,29 @@ static void generateSelectorData(UConverterSelector* result,
|
||||
|
||||
uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
|
||||
status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
} else {
|
||||
for (int32_t col = 0 ; col < columns; col++) {
|
||||
upvec_setValue(result->pv, start_char, end_char, col, ~0, ~0,
|
||||
status);
|
||||
}
|
||||
for (int32_t col = 0 ; col < columns; col++) {
|
||||
upvec_setValue(upvec, start_char, end_char, col, ~0, ~0,
|
||||
status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// alright. Now, let's put things in the same exact form you'd get when you
|
||||
// unserialize things.
|
||||
UPVecToUTrie2Context toUTrie2={ NULL };
|
||||
result->pvCount = upvec_compact(result->pv, upvec_compactToUTrie2Handler,
|
||||
&toUTrie2, status);
|
||||
result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
result->trie = toUTrie2.trie;
|
||||
utrie2_freeze(result->trie, UTRIE2_16_VALUE_BITS, status);
|
||||
uint32_t *memory = upvec_getArray(upvec, &result->pvCount, NULL);
|
||||
result->pvCount *= columns;
|
||||
result->pv = (uint32_t *)uprv_malloc(result->pvCount * 4);
|
||||
if (result->pv == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_memcpy(result->pv, memory, result->pvCount * 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// a bunch of functions for the enumeration thingie! Nothing fancy here. Just
|
||||
// iterate over the selected encodings
|
||||
struct Enumerator {
|
||||
|
@ -39,7 +39,7 @@
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
uint32_t *pv;
|
||||
UPropsVectors *pv;
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
@ -169,7 +169,8 @@ singleEnumLineFn(void *context,
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
|
||||
sen->propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -260,7 +261,8 @@ binariesLineFn(void *context,
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
|
||||
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -394,7 +396,7 @@ main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
pv=upvec_open(2, 10000);
|
||||
pv=upvec_open(2, &errorCode);
|
||||
|
||||
/* process BidiMirroring.txt */
|
||||
writeUCDFilename(basename, "BidiMirroring", suffix);
|
||||
@ -522,7 +524,8 @@ unicodeDataLineFn(void *context,
|
||||
|
||||
/* get Mirrored flag, field 9 */
|
||||
if(*fields[9][0]=='Y') {
|
||||
if(!upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) {
|
||||
upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
|
||||
(long)c, u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
@ -576,7 +579,8 @@ parseDB(const char *filename, UErrorCode *pErrorCode) {
|
||||
for(i=0; i<LENGTHOF(defaultBidi); ++i) {
|
||||
start=defaultBidi[i][0];
|
||||
end=defaultBidi[i][1];
|
||||
if(!upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
|
||||
(long)start, (long)end, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -614,7 +618,8 @@ bidiClassLineFn(void *context,
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n",
|
||||
(int)start, (int)end, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2005, International Business Machines
|
||||
* Copyright (C) 2004-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -18,6 +18,7 @@
|
||||
#define __GENBIDI_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "propsvec.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
@ -27,7 +28,7 @@ U_CDECL_BEGIN
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
|
||||
/* properties vectors in genbidi.c */
|
||||
extern uint32_t *pv;
|
||||
extern UPropsVectors *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
|
@ -183,12 +183,11 @@ addMirror(UChar32 src, UChar32 mirror) {
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(
|
||||
!upvec_setValue(
|
||||
upvec_setValue(
|
||||
pv, src, src, 0,
|
||||
(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT, (uint32_t)(-1)<<UBIDI_MIRROR_DELTA_SHIFT,
|
||||
&errorCode)
|
||||
) {
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genbidi error: unable to set mirroring delta, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
|
@ -40,7 +40,7 @@
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
uint32_t *pv;
|
||||
UPropsVectors *pv;
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
@ -155,7 +155,8 @@ binariesLineFn(void *context,
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set %s, code: %s\n",
|
||||
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -290,7 +291,7 @@ main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
pv=upvec_open(2, 10000);
|
||||
pv=upvec_open(2, &errorCode);
|
||||
caseSensitive=uset_open(1, 0); /* empty set (start>end) */
|
||||
|
||||
/* process SpecialCasing.txt */
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "utrie.h"
|
||||
#include "propsvec.h"
|
||||
#include "ucase.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -92,7 +93,7 @@ typedef struct {
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
|
||||
/* properties vectors in gencase.c */
|
||||
extern uint32_t *pv;
|
||||
extern UPropsVectors *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
|
@ -408,12 +408,13 @@ setProps(Props *p) {
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if( value!=oldValue &&
|
||||
!upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode)
|
||||
) {
|
||||
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
if(value!=oldValue) {
|
||||
upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* add the multi-character case folding to the "unfold" data */
|
||||
@ -428,7 +429,8 @@ setProps(Props *p) {
|
||||
extern void
|
||||
addCaseSensitive(UChar32 first, UChar32 last) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(!upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
|
||||
upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
@ -573,7 +575,8 @@ addClosureMapping(UChar32 src, UChar32 dest) {
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(!upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode)) {
|
||||
upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
|
@ -339,7 +339,8 @@ unicodeDataLineFn(void *context,
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
if(!upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) {
|
||||
upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
@ -544,7 +545,8 @@ repeatAreaProps() {
|
||||
|
||||
/* Hangul have canonical decompositions */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(!upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) {
|
||||
upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -19,6 +19,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "utrie.h"
|
||||
#include "propsvec.h"
|
||||
|
||||
/* file definitions */
|
||||
#define DATA_NAME "uprops"
|
||||
@ -39,7 +40,7 @@ extern const char *const
|
||||
genCategoryNames[];
|
||||
|
||||
/* properties vectors in props2.c */
|
||||
extern uint32_t *pv;
|
||||
extern UPropsVectors *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
|
@ -35,8 +35,7 @@
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
static UNewTrie *newTrie;
|
||||
uint32_t *pv;
|
||||
static int32_t pvCount;
|
||||
UPropsVectors *pv;
|
||||
|
||||
/* miscellaneous ------------------------------------------------------------ */
|
||||
|
||||
@ -208,7 +207,8 @@ singleEnumLineFn(void *context,
|
||||
/* Also set bits for initialValue and errorValue. */
|
||||
end=UPVEC_MAX_CP;
|
||||
}
|
||||
if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
|
||||
sen->propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -370,7 +370,8 @@ binariesLineFn(void *context,
|
||||
/* Also set bits for initialValue and errorValue. */
|
||||
end=UPVEC_MAX_CP;
|
||||
}
|
||||
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
|
||||
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
@ -408,7 +409,12 @@ parseBinariesFile(char *filename, char *basename, const char *suffix,
|
||||
|
||||
U_CFUNC void
|
||||
initAdditionalProperties() {
|
||||
pv=upvec_open(UPROPS_VECTOR_WORDS, 20000);
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "error: upvec_open() failed - %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
@ -484,11 +490,11 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
* W for plane 2
|
||||
*/
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
if( !upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode)
|
||||
) {
|
||||
upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
|
||||
upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
|
||||
upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
|
||||
upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
@ -498,7 +504,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
|
||||
{
|
||||
UPVecToUTrieContext toUTrie={ NULL, 50000 /* capacity */, 0, TRUE /* latin1Linear */ };
|
||||
pvCount=upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode);
|
||||
upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
||||
u_errorName(*pErrorCode));
|
||||
@ -554,7 +560,8 @@ ageLineFn(void *context,
|
||||
/* Also set bits for initialValue and errorValue. */
|
||||
end=UPVEC_MAX_CP;
|
||||
}
|
||||
if(!upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode)) {
|
||||
upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
@ -693,9 +700,14 @@ numericLineFn(void *context,
|
||||
|
||||
U_CFUNC int32_t
|
||||
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
|
||||
uint32_t *pvArray;
|
||||
int32_t pvRows, pvCount;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
pvArray=upvec_getArray(pv, &pvRows, NULL);
|
||||
pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=utrie_serialize(newTrie, p, capacity, NULL, TRUE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
@ -783,15 +795,15 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
|
||||
if(f!=NULL) {
|
||||
usrc_writeArray(f,
|
||||
"static const uint32_t propsVectors[%ld]={\n",
|
||||
pv, 32, pvCount,
|
||||
pvArray, 32, pvCount,
|
||||
"};\n\n");
|
||||
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
|
||||
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
|
||||
} else {
|
||||
uprv_memcpy(p, pv, pvCount*4);
|
||||
uprv_memcpy(p, pvArray, pvCount*4);
|
||||
}
|
||||
if(beVerbose) {
|
||||
printf("number of additional props vectors: %5u\n", (int)pvCount/UPROPS_VECTOR_WORDS);
|
||||
printf("number of additional props vectors: %5u\n", (int)pvRows);
|
||||
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
|
||||
}
|
||||
}
|
||||
|
@ -192,7 +192,7 @@ usrc_writeUTrie2Struct(FILE *f,
|
||||
" 0x%lx,\n" /* errorValue */
|
||||
" 0x%lx,\n" /* highStart */
|
||||
" 0x%lx,\n" /* highValueIndex */
|
||||
" NULL, 0, FALSE, FALSE, 0, NULL",
|
||||
" NULL, 0, FALSE, FALSE, 0, NULL\n",
|
||||
(long)pTrie->indexLength, (long)pTrie->dataLength,
|
||||
(short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
|
||||
(long)pTrie->initialValue, (long)pTrie->errorValue,
|
||||
|
Loading…
Reference in New Issue
Block a user