2002-02-25 18:44:07 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2008-08-04 21:56:02 +00:00
|
|
|
* Copyright (C) 2002-2008, International Business Machines
|
2002-02-25 18:44:07 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: propsvec.c
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2002feb22
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* Store additional Unicode character properties in bit set vectors.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
#include "utrie.h"
|
2004-08-30 22:11:48 +00:00
|
|
|
#include "uarrsort.h"
|
2002-02-25 18:44:07 +00:00
|
|
|
#include "propsvec.h"
|
|
|
|
|
|
|
|
static uint32_t *
|
2004-08-30 22:11:48 +00:00
|
|
|
_findRow(uint32_t *pv, UChar32 rangeStart) {
|
2002-02-25 18:44:07 +00:00
|
|
|
uint32_t *row;
|
2004-08-30 22:11:48 +00:00
|
|
|
int32_t *hdr;
|
|
|
|
int32_t columns, i, start, limit, prevRow, rows;
|
|
|
|
|
|
|
|
hdr=(int32_t *)pv;
|
|
|
|
columns=hdr[UPVEC_COLUMNS];
|
|
|
|
limit=hdr[UPVEC_ROWS];
|
|
|
|
prevRow=hdr[UPVEC_PREV_ROW];
|
|
|
|
rows=hdr[UPVEC_ROWS];
|
2002-02-25 18:44:07 +00:00
|
|
|
pv+=UPVEC_HEADER_LENGTH;
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
/* check the vicinity of the last-seen row */
|
|
|
|
if(prevRow<rows) {
|
|
|
|
row=pv+prevRow*columns;
|
|
|
|
if(rangeStart>=(UChar32)row[0]) {
|
|
|
|
if(rangeStart<(UChar32)row[1]) {
|
|
|
|
/* same row as last seen */
|
|
|
|
return row;
|
|
|
|
} else if(
|
|
|
|
++prevRow<rows &&
|
|
|
|
rangeStart>=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1]
|
|
|
|
) {
|
|
|
|
/* next row after the last one */
|
|
|
|
hdr[UPVEC_PREV_ROW]=prevRow;
|
|
|
|
return row;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-25 18:44:07 +00:00
|
|
|
/* do a binary search for the start of the range */
|
|
|
|
start=0;
|
|
|
|
while(start<limit-1) {
|
|
|
|
i=(start+limit)/2;
|
|
|
|
row=pv+i*columns;
|
2004-08-30 22:11:48 +00:00
|
|
|
if(rangeStart<(UChar32)row[0]) {
|
2002-02-25 18:44:07 +00:00
|
|
|
limit=i;
|
2004-08-30 22:11:48 +00:00
|
|
|
} else if(rangeStart<(UChar32)row[1]) {
|
|
|
|
hdr[UPVEC_PREV_ROW]=i;
|
2002-02-25 18:44:07 +00:00
|
|
|
return row;
|
|
|
|
} else {
|
|
|
|
start=i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* must be found because all ranges together always cover all of Unicode */
|
2004-08-30 22:11:48 +00:00
|
|
|
hdr[UPVEC_PREV_ROW]=start;
|
2002-02-25 18:44:07 +00:00
|
|
|
return pv+start*columns;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
U_CAPI uint32_t * U_EXPORT2
|
2002-02-25 18:44:07 +00:00
|
|
|
upvec_open(int32_t columns, int32_t maxRows) {
|
|
|
|
uint32_t *pv, *row;
|
|
|
|
int32_t length;
|
|
|
|
|
|
|
|
if(columns<1 || maxRows<1) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
columns+=2; /* count range start and limit columns */
|
|
|
|
length=UPVEC_HEADER_LENGTH+maxRows*columns;
|
|
|
|
pv=(uint32_t *)uprv_malloc(length*4);
|
|
|
|
if(pv!=NULL) {
|
|
|
|
/* set header */
|
|
|
|
pv[UPVEC_COLUMNS]=(uint32_t)columns;
|
|
|
|
pv[UPVEC_MAXROWS]=(uint32_t)maxRows;
|
|
|
|
pv[UPVEC_ROWS]=1;
|
2004-08-30 22:11:48 +00:00
|
|
|
pv[UPVEC_PREV_ROW]=0;
|
2002-02-25 18:44:07 +00:00
|
|
|
|
|
|
|
/* set initial row */
|
|
|
|
row=pv+UPVEC_HEADER_LENGTH;
|
|
|
|
*row++=0;
|
|
|
|
*row++=0x110000;
|
|
|
|
columns-=2;
|
|
|
|
do {
|
|
|
|
*row++=0;
|
|
|
|
} while(--columns>0);
|
|
|
|
}
|
|
|
|
return pv;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2002-02-25 18:44:07 +00:00
|
|
|
upvec_close(uint32_t *pv) {
|
2004-08-30 22:11:48 +00:00
|
|
|
if(pv!=NULL) {
|
2002-02-25 18:44:07 +00:00
|
|
|
uprv_free(pv);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
U_CAPI UBool U_EXPORT2
|
2002-02-25 18:44:07 +00:00
|
|
|
upvec_setValue(uint32_t *pv,
|
2004-08-30 22:11:48 +00:00
|
|
|
UChar32 start, UChar32 limit,
|
2002-02-25 18:44:07 +00:00
|
|
|
int32_t column,
|
|
|
|
uint32_t value, uint32_t mask,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
uint32_t *firstRow, *lastRow;
|
|
|
|
int32_t columns;
|
|
|
|
UBool splitFirstRow, splitLastRow;
|
|
|
|
|
|
|
|
/* argument checking */
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( pv==NULL ||
|
2004-08-30 22:11:48 +00:00
|
|
|
start<0 || start>limit || limit>0x110000 ||
|
2002-02-25 18:44:07 +00:00
|
|
|
column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS]
|
|
|
|
) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
if(start==limit) {
|
|
|
|
/* empty range, nothing to do */
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize */
|
2004-08-30 22:11:48 +00:00
|
|
|
columns=(int32_t)pv[UPVEC_COLUMNS];
|
2002-02-25 18:44:07 +00:00
|
|
|
column+=2; /* skip range start and limit columns */
|
|
|
|
value&=mask;
|
|
|
|
|
|
|
|
/* find the rows whose ranges overlap with the input range */
|
|
|
|
|
|
|
|
/* find the first row, always successful */
|
|
|
|
firstRow=_findRow(pv, start);
|
|
|
|
|
|
|
|
/* find the last row, always successful */
|
|
|
|
lastRow=firstRow;
|
2004-08-30 22:11:48 +00:00
|
|
|
while(limit>(UChar32)lastRow[1]) {
|
2002-02-25 18:44:07 +00:00
|
|
|
lastRow+=columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rows need to be split if they partially overlap with the
|
|
|
|
* input range (only possible for the first and last rows)
|
|
|
|
* and if their value differs from the input value.
|
|
|
|
*/
|
2004-08-30 22:11:48 +00:00
|
|
|
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
|
|
|
|
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
|
2002-02-25 18:44:07 +00:00
|
|
|
|
|
|
|
/* split first/last rows if necessary */
|
|
|
|
if(splitFirstRow || splitLastRow) {
|
|
|
|
int32_t count, rows;
|
|
|
|
|
|
|
|
rows=(int32_t)pv[UPVEC_ROWS];
|
|
|
|
if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) {
|
|
|
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* count the number of row cells to move after the last row, and move them */
|
2002-05-23 22:10:23 +00:00
|
|
|
count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns));
|
2002-02-25 18:44:07 +00:00
|
|
|
if(count>0) {
|
|
|
|
uprv_memmove(
|
|
|
|
lastRow+(1+splitFirstRow+splitLastRow)*columns,
|
|
|
|
lastRow+columns,
|
|
|
|
count*4);
|
|
|
|
}
|
|
|
|
pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow;
|
|
|
|
|
|
|
|
/* split the first row, and move the firstRow pointer to the second part */
|
|
|
|
if(splitFirstRow) {
|
|
|
|
/* copy all affected rows up one and move the lastRow pointer */
|
2002-05-23 22:10:23 +00:00
|
|
|
count = (int32_t)((lastRow-firstRow)+columns);
|
2002-02-25 18:44:07 +00:00
|
|
|
uprv_memmove(firstRow+columns, firstRow, count*4);
|
|
|
|
lastRow+=columns;
|
|
|
|
|
|
|
|
/* split the range and move the firstRow pointer */
|
2004-08-30 22:11:48 +00:00
|
|
|
firstRow[1]=firstRow[columns]=(uint32_t)start;
|
2002-02-25 18:44:07 +00:00
|
|
|
firstRow+=columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* split the last row */
|
|
|
|
if(splitLastRow) {
|
|
|
|
/* copy the last row data */
|
|
|
|
uprv_memcpy(lastRow+columns, lastRow, columns*4);
|
|
|
|
|
|
|
|
/* split the range and move the firstRow pointer */
|
2004-08-30 22:11:48 +00:00
|
|
|
lastRow[1]=lastRow[columns]=(uint32_t)limit;
|
2002-02-25 18:44:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
/* set the "row last seen" to the last row for the range */
|
|
|
|
pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns);
|
|
|
|
|
2002-02-25 18:44:07 +00:00
|
|
|
/* set the input value in all remaining rows */
|
|
|
|
firstRow+=column;
|
|
|
|
lastRow+=column;
|
|
|
|
mask=~mask;
|
|
|
|
for(;;) {
|
|
|
|
*firstRow=(*firstRow&mask)|value;
|
|
|
|
if(firstRow==lastRow) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
firstRow+=columns;
|
|
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
U_CAPI uint32_t U_EXPORT2
|
|
|
|
upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) {
|
|
|
|
uint32_t *row;
|
|
|
|
|
|
|
|
if(pv==NULL || c<0 || c>=0x110000) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
row=_findRow(pv, c);
|
|
|
|
return row[2+column];
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI uint32_t * U_EXPORT2
|
2002-02-25 18:44:07 +00:00
|
|
|
upvec_getRow(uint32_t *pv, int32_t rowIndex,
|
2004-08-30 22:11:48 +00:00
|
|
|
UChar32 *pRangeStart, UChar32 *pRangeLimit) {
|
2002-02-25 18:44:07 +00:00
|
|
|
uint32_t *row;
|
|
|
|
int32_t columns;
|
|
|
|
|
|
|
|
if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
columns=(int32_t)pv[UPVEC_COLUMNS];
|
2002-02-25 18:44:07 +00:00
|
|
|
row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns;
|
|
|
|
if(pRangeStart!=NULL) {
|
|
|
|
*pRangeStart=row[0];
|
|
|
|
}
|
|
|
|
if(pRangeLimit!=NULL) {
|
|
|
|
*pRangeLimit=row[1];
|
|
|
|
}
|
|
|
|
return row+2;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
static int32_t U_CALLCONV
|
|
|
|
upvec_compareRows(const void *context, const void *l, const void *r) {
|
2002-02-25 18:44:07 +00:00
|
|
|
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
|
2004-08-30 22:11:48 +00:00
|
|
|
const uint32_t *pv=(const uint32_t *)context;
|
2002-02-25 18:44:07 +00:00
|
|
|
int32_t i, count, columns;
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */
|
2002-02-25 18:44:07 +00:00
|
|
|
|
|
|
|
/* start comparing after start/limit but wrap around to them */
|
|
|
|
i=2;
|
|
|
|
do {
|
|
|
|
if(left[i]!=right[i]) {
|
|
|
|
return left[i]<right[i] ? -1 : 1;
|
|
|
|
}
|
|
|
|
if(++i==columns) {
|
|
|
|
i=0;
|
|
|
|
}
|
|
|
|
} while(--count>0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-08-30 22:11:48 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2005-01-02 00:21:25 +00:00
|
|
|
upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
|
2002-02-25 18:44:07 +00:00
|
|
|
uint32_t *row;
|
|
|
|
int32_t columns, valueColumns, rows, count;
|
2005-01-02 00:21:25 +00:00
|
|
|
UChar32 start, limit;
|
2002-02-25 18:44:07 +00:00
|
|
|
|
|
|
|
/* argument checking */
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-01-02 00:21:25 +00:00
|
|
|
if(pv==NULL || handler==NULL) {
|
2002-02-25 18:44:07 +00:00
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
row=pv+UPVEC_HEADER_LENGTH;
|
|
|
|
columns=(int32_t)pv[UPVEC_COLUMNS];
|
|
|
|
rows=(int32_t)pv[UPVEC_ROWS];
|
|
|
|
|
2005-01-02 00:21:25 +00:00
|
|
|
if(rows==0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-02-25 18:44:07 +00:00
|
|
|
/* sort the properties vectors to find unique vector values */
|
|
|
|
if(rows>1) {
|
2004-08-30 22:11:48 +00:00
|
|
|
uprv_sortArray(pv+UPVEC_HEADER_LENGTH, rows, columns*4,
|
|
|
|
upvec_compareRows, pv, FALSE, pErrorCode);
|
|
|
|
}
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
2002-02-25 18:44:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Move vector contents up to a contiguous array with only unique
|
2005-01-02 00:21:25 +00:00
|
|
|
* vector values, and call the handler function for each vector.
|
2002-02-25 18:44:07 +00:00
|
|
|
*
|
|
|
|
* This destroys the Properties Vector structure and replaces it
|
|
|
|
* with an array of just vector values.
|
|
|
|
*/
|
|
|
|
valueColumns=columns-2; /* not counting start & limit */
|
|
|
|
count=-valueColumns;
|
|
|
|
|
|
|
|
do {
|
2005-01-02 00:21:25 +00:00
|
|
|
/* fetch these first before memmove() may overwrite them */
|
|
|
|
start=(UChar32)row[0];
|
|
|
|
limit=(UChar32)row[1];
|
|
|
|
|
2002-02-25 18:44:07 +00:00
|
|
|
/* add a new values vector if it is different from the current one */
|
|
|
|
if(count<0 || 0!=uprv_memcmp(row+2, pv+count, valueColumns*4)) {
|
|
|
|
count+=valueColumns;
|
|
|
|
uprv_memmove(pv+count, row+2, valueColumns*4);
|
|
|
|
}
|
|
|
|
|
2005-01-02 00:21:25 +00:00
|
|
|
handler(context, start, limit, count, pv+count, valueColumns, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2002-02-25 18:44:07 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
row+=columns;
|
|
|
|
} while(--rows>0);
|
|
|
|
|
2005-01-02 00:21:25 +00:00
|
|
|
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
|
2002-02-25 18:44:07 +00:00
|
|
|
return count+valueColumns;
|
|
|
|
}
|
2005-01-02 00:21:25 +00:00
|
|
|
|
|
|
|
U_CAPI void U_CALLCONV
|
|
|
|
upvec_compactToTrieHandler(void *context,
|
|
|
|
UChar32 start, UChar32 limit,
|
|
|
|
int32_t rowIndex, uint32_t *row, int32_t columns,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
if(!utrie_setRange32((UNewTrie *)context, start, limit, (uint32_t)rowIndex, FALSE)) {
|
|
|
|
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
|
|
|
}
|
|
|
|
}
|