ICU-6453 upvec_ introduce UPropsVectors type, clean up API a bit

X-SVN-Rev: 24866
This commit is contained in:
Markus Scherer 2008-10-23 06:00:59 +00:00
parent 362d08e00b
commit 35f8ea1402
13 changed files with 313 additions and 213 deletions

View File

@ -13,7 +13,7 @@
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store additional Unicode character properties in bit set vectors.
* Store bits (Unicode character properties) in bit set vectors.
*/
#include <stdlib.h>
@ -24,22 +24,81 @@
#include "uarrsort.h"
#include "propsvec.h"
struct UPropsVectors {
uint32_t *v;
int32_t columns; /* number of columns, plus two for start & limit values */
int32_t maxRows;
int32_t rows;
int32_t prevRow; /* search optimization: remember last row seen */
UBool isCompacted;
};
#define UPVEC_INITIAL_ROWS (1<<14)
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<17)
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
UPropsVectors *pv;
uint32_t *v, *row;
uint32_t cp;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(columns<1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
if(pv==NULL || v==NULL) {
uprv_free(pv);
uprv_free(v);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(pv, 0, sizeof(UPropsVectors));
pv->v=v;
pv->columns=columns+=2; /* count range start and limit columns */
pv->maxRows=UPVEC_INITIAL_ROWS;
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
/* set the all-Unicode row and the special-value rows */
row=pv->v;
uprv_memset(row, 0, pv->rows*columns*4);
row[0]=0;
row[1]=0x110000;
row+=columns;
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
row[0]=cp;
row[1]=cp+1;
row+=columns;
}
return pv;
}
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv) {
if(pv!=NULL) {
uprv_free(pv->v);
uprv_free(pv);
}
}
static uint32_t *
_findRow(uint32_t *pv, UChar32 rangeStart) {
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
uint32_t *row;
int32_t *hdr;
int32_t columns, i, start, limit, prevRow, rows;
hdr=(int32_t *)pv;
columns=hdr[UPVEC_COLUMNS];
limit=hdr[UPVEC_ROWS];
prevRow=hdr[UPVEC_PREV_ROW];
rows=hdr[UPVEC_ROWS];
pv+=UPVEC_HEADER_LENGTH;
columns=pv->columns;
rows=limit=pv->rows;
prevRow=pv->prevRow;
/* check the vicinity of the last-seen row */
if(prevRow<rows) {
row=pv+prevRow*columns;
row=pv->v+prevRow*columns;
if(rangeStart>=(UChar32)row[0]) {
if(rangeStart<(UChar32)row[1]) {
/* same row as last seen */
@ -49,7 +108,7 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
rangeStart>=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1]
) {
/* next row after the last one */
hdr[UPVEC_PREV_ROW]=prevRow;
pv->prevRow=prevRow;
return row;
}
}
@ -59,11 +118,11 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
start=0;
while(start<limit-1) {
i=(start+limit)/2;
row=pv+i*columns;
row=pv->v+i*columns;
if(rangeStart<(UChar32)row[0]) {
limit=i;
} else if(rangeStart<(UChar32)row[1]) {
hdr[UPVEC_PREV_ROW]=i;
pv->prevRow=i;
return row;
} else {
start=i;
@ -71,54 +130,12 @@ _findRow(uint32_t *pv, UChar32 rangeStart) {
}
/* must be found because all ranges together always cover all of Unicode */
hdr[UPVEC_PREV_ROW]=start;
return pv+start*columns;
}
U_CAPI uint32_t * U_EXPORT2
upvec_open(int32_t columns, int32_t maxRows) {
uint32_t *pv, *row;
uint32_t cp;
int32_t length;
if(columns<1 || maxRows<1) {
return NULL;
}
columns+=2; /* count range start and limit columns */
length=UPVEC_HEADER_LENGTH+maxRows*columns;
pv=(uint32_t *)uprv_malloc(length*4);
if(pv!=NULL) {
/* set header */
pv[UPVEC_COLUMNS]=(uint32_t)columns;
pv[UPVEC_MAXROWS]=(uint32_t)maxRows;
pv[UPVEC_ROWS]=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
pv[UPVEC_PREV_ROW]=0;
/* set the all-Unicode row and the special-value rows */
row=pv+UPVEC_HEADER_LENGTH;
uprv_memset(row, 0, pv[UPVEC_ROWS]*columns*4);
row[0]=0;
row[1]=0x110000;
row+=columns;
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
row[0]=cp;
row[1]=cp+1;
row+=columns;
}
}
return pv;
pv->prevRow=start;
return pv->v+start*columns;
}
U_CAPI void U_EXPORT2
upvec_close(uint32_t *pv) {
if(pv!=NULL) {
uprv_free(pv);
}
}
U_CAPI UBool U_EXPORT2
upvec_setValue(uint32_t *pv,
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
@ -129,21 +146,24 @@ upvec_setValue(uint32_t *pv,
UBool splitFirstRow, splitLastRow;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return FALSE;
if(U_FAILURE(*pErrorCode)) {
return;
}
if( pv==NULL ||
start<0 || start>end || end>UPVEC_MAX_CP ||
column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS]
column<0 || column>=(pv->columns-2)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
return;
}
if(pv->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
limit=end+1;
/* initialize */
columns=(int32_t)pv[UPVEC_COLUMNS];
columns=pv->columns;
column+=2; /* skip range start and limit columns */
value&=mask;
@ -187,21 +207,39 @@ upvec_setValue(uint32_t *pv,
if(splitFirstRow || splitLastRow) {
int32_t count, rows;
rows=(int32_t)pv[UPVEC_ROWS];
if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
rows=pv->rows;
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
uint32_t *newVectors;
int32_t newMaxRows;
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
newMaxRows=UPVEC_MEDIUM_ROWS;
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
newMaxRows=UPVEC_MAX_ROWS;
} else {
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
if(newVectors==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_free(pv->v);
pv->v=newVectors;
pv->maxRows=newMaxRows;
}
/* count the number of row cells to move after the last row, and move them */
count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns));
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
if(count>0) {
uprv_memmove(
lastRow+(1+splitFirstRow+splitLastRow)*columns,
lastRow+columns,
count*4);
}
pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow;
pv->rows=rows+splitFirstRow+splitLastRow;
/* split the first row, and move the firstRow pointer to the second part */
if(splitFirstRow) {
@ -226,7 +264,7 @@ upvec_setValue(uint32_t *pv,
}
/* set the "row last seen" to the last row for the range */
pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns);
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
/* set the input value in all remaining rows */
firstRow+=column;
@ -239,37 +277,36 @@ upvec_setValue(uint32_t *pv,
}
firstRow+=columns;
}
return TRUE;
}
U_CAPI uint32_t U_EXPORT2
upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) {
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
uint32_t *row;
if(pv==NULL || c<0 || c>UPVEC_MAX_CP) {
if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
return 0;
}
row=_findRow(pv, c);
row=_findRow((UPropsVectors *)pv, c);
return row[2+column];
}
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(uint32_t *pv, int32_t rowIndex,
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd) {
uint32_t *row;
int32_t columns;
if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) {
if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
return NULL;
}
columns=(int32_t)pv[UPVEC_COLUMNS];
row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns;
columns=pv->columns;
row=pv->v+rowIndex*columns;
if(pRangeStart!=NULL) {
*pRangeStart=row[0];
*pRangeStart=(UChar32)row[0];
}
if(pRangeEnd!=NULL) {
*pRangeEnd=row[1]-1;
*pRangeEnd=(UChar32)row[1]-1;
}
return row+2;
}
@ -277,10 +314,10 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
static int32_t U_CALLCONV
upvec_compareRows(const void *context, const void *l, const void *r) {
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
const uint32_t *pv=(const uint32_t *)context;
const UPropsVectors *pv=(const UPropsVectors *)context;
int32_t i, count, columns;
count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */
count=columns=pv->columns; /* includes start/limit columns */
/* start comparing after start/limit but wrap around to them */
i=2;
@ -296,38 +333,38 @@ upvec_compareRows(const void *context, const void *l, const void *r) {
return 0;
}
U_CAPI int32_t U_EXPORT2
upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
uint32_t *row;
int32_t i, columns, valueColumns, rows, count;
UChar32 start, limit;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
if(U_FAILURE(*pErrorCode)) {
return;
}
if(pv==NULL || handler==NULL) {
if(handler==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
return;
}
if(pv->isCompacted) {
return;
}
rows=(int32_t)pv[UPVEC_ROWS];
if(rows==0) {
return 0;
}
/* Set the flag now: Sorting and compacting destroys the builder data structure. */
pv->isCompacted=TRUE;
row=pv+UPVEC_HEADER_LENGTH;
columns=(int32_t)pv[UPVEC_COLUMNS];
rows=pv->rows;
columns=pv->columns;
valueColumns=columns-2; /* not counting start & limit */
/* sort the properties vectors to find unique vector values */
if(rows>1) {
uprv_sortArray(row, rows, columns*4,
uprv_sortArray(pv->v, rows, columns*4,
upvec_compareRows, pv, FALSE, pErrorCode);
}
if(U_FAILURE(*pErrorCode)) {
return 0;
return;
}
/*
@ -335,6 +372,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
* This has to do almost the same work as the compaction below,
* to find the indexes where the special-value rows will move.
*/
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
start=(UChar32)row[0];
@ -347,7 +385,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
if(start>=UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, start, count, row+2, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
return;
}
}
@ -361,7 +399,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
count, row-valueColumns, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
return;
}
/*
@ -371,7 +409,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
* This destroys the Properties Vector structure and replaces it
* with an array of just vector values.
*/
row=pv+UPVEC_HEADER_LENGTH;
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
/* fetch these first before memmove() may overwrite them */
@ -379,30 +417,53 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC
limit=(UChar32)row[1];
/* add a new values vector if it is different from the current one */
if(count<0 || 0!=uprv_memcmp(row+2, pv+count, valueColumns*4)) {
if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
count+=valueColumns;
uprv_memmove(pv+count, row+2, valueColumns*4);
uprv_memmove(pv->v+count, row+2, valueColumns*4);
}
if(start<UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, limit-1, count, pv+count, valueColumns, pErrorCode);
handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
return;
}
}
row+=columns;
}
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
return count+valueColumns;
/* count is at the beginning of the last vector, add one to include that last vector */
pv->rows=count/valueColumns+1;
}
U_CAPI uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
if(!pv->isCompacted) {
return NULL;
}
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return pv->v;
}
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
UPVecToUTrie2Context toUTrie2={ NULL };
upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
utrie2_close(toUTrie2.trie);
toUTrie2.trie=NULL;
}
return toUTrie2.trie;
}
/*
* TODO(markus): Add upvec_compactToUTrie2WithRowIndexes() function that returns
* a UTrie2 and does not require the caller to pass in a callback function.
*
* Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
* TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
* some 16-bit field and builds and returns a UTrie2.
*/

View File

@ -13,7 +13,7 @@
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store additional Unicode character properties in bit set vectors.
* Store bits (Unicode character properties) in bit set vectors.
*/
#ifndef __UPROPSVEC_H__
@ -25,11 +25,10 @@
U_CDECL_BEGIN
/*
/**
* Unicode Properties Vectors associated with code point ranges.
* Stored in an array of uint32_t.
*
* The array starts with a header, then rows of integers store
* Rows of uint32_t integers in a contiguous array store
* the range limits and the properties vectors.
*
* In each row, row[0] contains the start code point and
@ -41,15 +40,8 @@ U_CDECL_BEGIN
* It would be possible to store only one range boundary per row,
* but self-contained rows allow to later sort them by contents.
*/
enum {
/* stores number of columns, plus two for start & limit values */
UPVEC_COLUMNS,
UPVEC_MAXROWS,
UPVEC_ROWS,
/* search optimization: remember last row seen */
UPVEC_PREV_ROW,
UPVEC_HEADER_LENGTH
};
struct UPropsVectors;
typedef struct UPropsVectors UPropsVectors;
/*
* Special pseudo code points for storing the initialValue and the errorValue,
@ -67,28 +59,39 @@ enum {
*/
#define UPVEC_START_REAL_VALUES_CP 0x200000
U_CAPI uint32_t * U_EXPORT2
upvec_open(int32_t columns, int32_t maxRows);
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
upvec_close(uint32_t *pv);
upvec_close(UPropsVectors *pv);
U_CAPI UBool U_EXPORT2
upvec_setValue(uint32_t *pv,
/*
* In rows for code points [start..end], select the column,
* reset the mask bits and set the value bits (ANDed with the mask).
*
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
*/
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode);
/*
* Logically const but must not be used on the same pv concurrently!
* Always returns 0 if called after upvec_compact().
*/
U_CAPI uint32_t U_EXPORT2
upvec_getValue(uint32_t *pv, UChar32 c, int32_t column);
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
/*
* pRangeStart and pRangeEnd can be NULL.
* @return NULL if rowIndex out of range and for illegal arguments
* @return NULL if rowIndex out of range and for illegal arguments,
* or if called after upvec_compact()
*/
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(uint32_t *pv, int32_t rowIndex,
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd);
/*
@ -98,7 +101,7 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
* - store them contiguously from the beginning of the memory
* - for each (non-unique) row, call the handler function
*
* The handler's rowIndex is the uint32_t index of the row in the compacted
* The handler's rowIndex is the index of the row in the compacted
* memory block.
* (Therefore, it starts at 0 increases in increments of the columns value.)
*
@ -109,19 +112,28 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex,
* and the row is arbitrary (but not NULL).
* Then, in the second phase, the handler is called for each row of real values.
*/
U_CDECL_BEGIN
typedef void U_CALLCONV
UPVecCompactHandler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
U_CDECL_END
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
U_CAPI int32_t U_EXPORT2
upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
/*
* Get the vectors array after calling upvec_compact().
* Returns NULL if called before upvec_compact().
*/
U_CAPI uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
/*
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
* vectors array, and freeze the trie.
*/
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
struct UPVecToUTrieContext {
UNewTrie *newTrie;

View File

@ -63,6 +63,7 @@ struct UConverterSelector {
/* internal function */
static void generateSelectorData(UConverterSelector* result,
UPropsVectors *upvec,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet,
UErrorCode* status);
@ -203,7 +204,9 @@ U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
}
newSelector->encodingsCount = converterListSize;
generateSelectorData(newSelector, excludedCodePoints, whichSet, status);
UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
generateSelectorData(newSelector, upvec, excludedCodePoints, whichSet, status);
upvec_close(upvec);
if (U_FAILURE(*status)) {
// at this point, we know pv and encodings have been allocated. No harm in
@ -223,7 +226,7 @@ U_CAPI void ucnvsel_close(UConverterSelector *sel) {
}
uprv_free(sel->encodings[0]);
uprv_free(sel->encodings);
upvec_close(sel->pv);
uprv_free(sel->pv);
utrie2_close(sel->trie);
uprv_free(sel);
}
@ -480,21 +483,19 @@ U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
/* internal function! */
static void generateSelectorData(UConverterSelector* result,
UPropsVectors *upvec,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet,
UErrorCode* status) {
if (U_FAILURE(*status)) {
return;
}
int32_t columns = (result->encodingsCount+31)/32;
// 66000 as suggested by Markus [I suggest something like 66000 which
// exceeds the number of BMP code points. There will be fewer ranges of
// combinations of encodings. (I believe there are no encodings that have
// interesting mappings for supplementary code points. All encodings either
// support all of them or none of them.)]
result->pv = upvec_open(columns, 66000); // create for all
// unicode codepoints, and have space for all those bits needed!
// set errorValue to all-ones
for (int32_t col = 0 ; col < columns; col++) {
upvec_setValue(result->pv, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
col, ~0, ~0, status);
}
@ -505,7 +506,6 @@ static void generateSelectorData(UConverterSelector* result,
int32_t j;
UConverter* test_converter = ucnv_open(result->encodings[i], status);
if (U_FAILURE(*status)) {
// status will propagate back to user
return;
}
USet* unicode_point_set;
@ -513,6 +513,10 @@ static void generateSelectorData(UConverterSelector* result,
ucnv_getUnicodeSet(test_converter, unicode_point_set,
whichSet, status);
if (U_FAILURE(*status)) {
ucnv_close(test_converter);
return;
}
column = i / 32;
mask = 1 << (i%32);
@ -529,18 +533,17 @@ static void generateSelectorData(UConverterSelector* result,
// this will be reached for the converters that fill the set with
// strings. Those should be ignored by our system
} else {
upvec_setValue(result->pv, start_char, end_char, column, ~0, mask,
upvec_setValue(upvec, start_char, end_char, column, ~0, mask,
status);
if (U_FAILURE(*status)) {
return;
}
}
}
ucnv_close(test_converter);
uset_close(unicode_point_set);
if (U_FAILURE(*status)) {
return;
}
}
// handle excluded encodings! Simply set their values to all 1's in the upvec
if (excludedCodePoints) {
int32_t item_count = uset_getItemCount(excludedCodePoints);
@ -550,30 +553,29 @@ static void generateSelectorData(UConverterSelector* result,
uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
status);
if (U_FAILURE(*status)) {
return;
} else {
for (int32_t col = 0 ; col < columns; col++) {
upvec_setValue(result->pv, start_char, end_char, col, ~0, ~0,
status);
}
for (int32_t col = 0 ; col < columns; col++) {
upvec_setValue(upvec, start_char, end_char, col, ~0, ~0,
status);
}
}
}
// alright. Now, let's put things in the same exact form you'd get when you
// unserialize things.
UPVecToUTrie2Context toUTrie2={ NULL };
result->pvCount = upvec_compact(result->pv, upvec_compactToUTrie2Handler,
&toUTrie2, status);
result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
if (U_SUCCESS(*status)) {
result->trie = toUTrie2.trie;
utrie2_freeze(result->trie, UTRIE2_16_VALUE_BITS, status);
uint32_t *memory = upvec_getArray(upvec, &result->pvCount, NULL);
result->pvCount *= columns;
result->pv = (uint32_t *)uprv_malloc(result->pvCount * 4);
if (result->pv == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(result->pv, memory, result->pvCount * 4);
}
}
// a bunch of functions for the enumeration thingie! Nothing fancy here. Just
// iterate over the selected encodings
struct Enumerator {

View File

@ -39,7 +39,7 @@
/* data --------------------------------------------------------------------- */
uint32_t *pv;
UPropsVectors *pv;
UBool beVerbose=FALSE, haveCopyright=TRUE;
@ -169,7 +169,8 @@ singleEnumLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
sen->propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -260,7 +261,8 @@ binariesLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -394,7 +396,7 @@ main(int argc, char* argv[]) {
}
/* initialize */
pv=upvec_open(2, 10000);
pv=upvec_open(2, &errorCode);
/* process BidiMirroring.txt */
writeUCDFilename(basename, "BidiMirroring", suffix);
@ -522,7 +524,8 @@ unicodeDataLineFn(void *context,
/* get Mirrored flag, field 9 */
if(*fields[9][0]=='Y') {
if(!upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) {
upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
(long)c, u_errorName(errorCode));
exit(errorCode);
@ -576,7 +579,8 @@ parseDB(const char *filename, UErrorCode *pErrorCode) {
for(i=0; i<LENGTHOF(defaultBidi); ++i) {
start=defaultBidi[i][0];
end=defaultBidi[i][1];
if(!upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode)) {
upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
(long)start, (long)end, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -614,7 +618,8 @@ bidiClassLineFn(void *context,
exit(U_PARSE_ERROR);
}
if(!upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode)) {
upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n",
(int)start, (int)end, u_errorName(*pErrorCode));
exit(*pErrorCode);

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2005, International Business Machines
* Copyright (C) 2004-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -18,6 +18,7 @@
#define __GENBIDI_H__
#include "unicode/utypes.h"
#include "propsvec.h"
U_CDECL_BEGIN
@ -27,7 +28,7 @@ U_CDECL_BEGIN
extern UBool beVerbose, haveCopyright;
/* properties vectors in genbidi.c */
extern uint32_t *pv;
extern UPropsVectors *pv;
/* prototypes */
U_CFUNC void

View File

@ -183,12 +183,11 @@ addMirror(UChar32 src, UChar32 mirror) {
}
errorCode=U_ZERO_ERROR;
if(
!upvec_setValue(
upvec_setValue(
pv, src, src, 0,
(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT, (uint32_t)(-1)<<UBIDI_MIRROR_DELTA_SHIFT,
&errorCode)
) {
&errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genbidi error: unable to set mirroring delta, code: %s\n",
u_errorName(errorCode));
exit(errorCode);

View File

@ -40,7 +40,7 @@
/* data --------------------------------------------------------------------- */
uint32_t *pv;
UPropsVectors *pv;
UBool beVerbose=FALSE, haveCopyright=TRUE;
@ -155,7 +155,8 @@ binariesLineFn(void *context,
exit(U_INTERNAL_PROGRAM_ERROR);
}
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "gencase error: unable to set %s, code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -290,7 +291,7 @@ main(int argc, char* argv[]) {
}
/* initialize */
pv=upvec_open(2, 10000);
pv=upvec_open(2, &errorCode);
caseSensitive=uset_open(1, 0); /* empty set (start>end) */
/* process SpecialCasing.txt */

View File

@ -19,6 +19,7 @@
#include "unicode/utypes.h"
#include "utrie.h"
#include "propsvec.h"
#include "ucase.h"
U_CDECL_BEGIN
@ -92,7 +93,7 @@ typedef struct {
extern UBool beVerbose, haveCopyright;
/* properties vectors in gencase.c */
extern uint32_t *pv;
extern UPropsVectors *pv;
/* prototypes */
U_CFUNC void

View File

@ -408,12 +408,13 @@ setProps(Props *p) {
}
errorCode=U_ZERO_ERROR;
if( value!=oldValue &&
!upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode)
) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
if(value!=oldValue) {
upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
/* add the multi-character case folding to the "unfold" data */
@ -428,7 +429,8 @@ setProps(Props *p) {
extern void
addCaseSensitive(UChar32 first, UChar32 last) {
UErrorCode errorCode=U_ZERO_ERROR;
if(!upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
@ -573,7 +575,8 @@ addClosureMapping(UChar32 src, UChar32 dest) {
}
errorCode=U_ZERO_ERROR;
if(!upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode)) {
upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
exit(errorCode);

View File

@ -339,7 +339,8 @@ unicodeDataLineFn(void *context,
exit(U_PARSE_ERROR);
}
}
if(!upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) {
upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode));
exit(*pErrorCode);
}
@ -544,7 +545,8 @@ repeatAreaProps() {
/* Hangul have canonical decompositions */
errorCode=U_ZERO_ERROR;
if(!upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) {
upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode));
exit(errorCode);
}

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2005, International Business Machines
* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -19,6 +19,7 @@
#include "unicode/utypes.h"
#include "utrie.h"
#include "propsvec.h"
/* file definitions */
#define DATA_NAME "uprops"
@ -39,7 +40,7 @@ extern const char *const
genCategoryNames[];
/* properties vectors in props2.c */
extern uint32_t *pv;
extern UPropsVectors *pv;
/* prototypes */
U_CFUNC void

View File

@ -35,8 +35,7 @@
/* data --------------------------------------------------------------------- */
static UNewTrie *newTrie;
uint32_t *pv;
static int32_t pvCount;
UPropsVectors *pv;
/* miscellaneous ------------------------------------------------------------ */
@ -208,7 +207,8 @@ singleEnumLineFn(void *context,
/* Also set bits for initialValue and errorValue. */
end=UPVEC_MAX_CP;
}
if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) {
upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
sen->propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -370,7 +370,8 @@ binariesLineFn(void *context,
/* Also set bits for initialValue and errorValue. */
end=UPVEC_MAX_CP;
}
if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode)) {
upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to set %s code: %s\n",
bin->binaries[i].propName, u_errorName(*pErrorCode));
exit(*pErrorCode);
@ -408,7 +409,12 @@ parseBinariesFile(char *filename, char *basename, const char *suffix,
U_CFUNC void
initAdditionalProperties() {
pv=upvec_open(UPROPS_VECTOR_WORDS, 20000);
UErrorCode errorCode=U_ZERO_ERROR;
pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: upvec_open() failed - %s\n", u_errorName(errorCode));
exit(errorCode);
}
}
U_CFUNC void
@ -484,11 +490,11 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
* W for plane 2
*/
*pErrorCode=U_ZERO_ERROR;
if( !upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
!upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
!upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
!upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode)
) {
upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode));
exit(*pErrorCode);
}
@ -498,7 +504,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
{
UPVecToUTrieContext toUTrie={ NULL, 50000 /* capacity */, 0, TRUE /* latin1Linear */ };
pvCount=upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode);
upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
u_errorName(*pErrorCode));
@ -554,7 +560,8 @@ ageLineFn(void *context,
/* Also set bits for initialValue and errorValue. */
end=UPVEC_MAX_CP;
}
if(!upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode)) {
upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode));
exit(*pErrorCode);
}
@ -693,9 +700,14 @@ numericLineFn(void *context,
U_CFUNC int32_t
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
uint32_t *pvArray;
int32_t pvRows, pvCount;
int32_t length;
UErrorCode errorCode;
pvArray=upvec_getArray(pv, &pvRows, NULL);
pvCount=pvRows*UPROPS_VECTOR_WORDS;
errorCode=U_ZERO_ERROR;
length=utrie_serialize(newTrie, p, capacity, NULL, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
@ -783,15 +795,15 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
if(f!=NULL) {
usrc_writeArray(f,
"static const uint32_t propsVectors[%ld]={\n",
pv, 32, pvCount,
pvArray, 32, pvCount,
"};\n\n");
fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
} else {
uprv_memcpy(p, pv, pvCount*4);
uprv_memcpy(p, pvArray, pvCount*4);
}
if(beVerbose) {
printf("number of additional props vectors: %5u\n", (int)pvCount/UPROPS_VECTOR_WORDS);
printf("number of additional props vectors: %5u\n", (int)pvRows);
printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
}
}

View File

@ -192,7 +192,7 @@ usrc_writeUTrie2Struct(FILE *f,
" 0x%lx,\n" /* errorValue */
" 0x%lx,\n" /* highStart */
" 0x%lx,\n" /* highValueIndex */
" NULL, 0, FALSE, FALSE, 0, NULL",
" NULL, 0, FALSE, FALSE, 0, NULL\n",
(long)pTrie->indexLength, (long)pTrie->dataLength,
(short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
(long)pTrie->initialValue, (long)pTrie->errorValue,