2001-03-17 00:46:46 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* Copyright ( C ) 2001 , International Business Machines
* Corporation and others . All Rights Reserved .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* file name : ucol_tok . cpp
* encoding : US - ASCII
* tab size : 8 ( not used )
* indentation : 4
*
* created 02 / 22 / 2001
* created by : Vladimir Weinstein
*
* This module maintains a contraction table structure in expanded form
* and provides means to flatten this structure
*
*/
2001-03-08 17:40:42 +00:00
# include "ucol_cnt.h"
2001-02-26 10:28:56 +00:00
# include "cmemory.h"
2001-05-15 23:00:04 +00:00
# include "unicode/uchar.h"
2001-02-26 10:28:56 +00:00
2001-10-20 01:09:31 +00:00
U_NAMESPACE_BEGIN
2001-02-26 10:28:56 +00:00
void uprv_growTable ( ContractionTable * tbl , UErrorCode * status ) {
if ( tbl - > position = = tbl - > size ) {
uint32_t * newData = ( uint32_t * ) realloc ( tbl - > CEs , 2 * tbl - > size * sizeof ( uint32_t ) ) ;
UChar * newCPs = ( UChar * ) realloc ( tbl - > codePoints , 2 * tbl - > size * sizeof ( UChar ) ) ;
if ( newData = = NULL | | newCPs = = NULL ) {
2001-05-16 17:09:31 +00:00
# ifdef UCOL_DEBUG
2001-02-26 10:28:56 +00:00
fprintf ( stderr , " out of memory for contractions \n " ) ;
2001-05-16 17:09:31 +00:00
# endif
2001-02-26 10:28:56 +00:00
* status = U_MEMORY_ALLOCATION_ERROR ;
return ;
}
tbl - > CEs = newData ;
tbl - > codePoints = newCPs ;
tbl - > size * = 2 ;
}
}
2001-11-21 01:08:55 +00:00
U_CAPI CntTable * U_EXPORT2
uprv_cnttab_open ( CompactEIntArray * mapping , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
2001-05-14 06:12:28 +00:00
CntTable * tbl = ( CntTable * ) uprv_malloc ( sizeof ( CntTable ) ) ;
2001-02-26 10:28:56 +00:00
tbl - > mapping = mapping ;
2001-05-14 06:12:28 +00:00
tbl - > elements = ( ContractionTable * * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( ContractionTable * ) ) ;
2001-02-26 10:28:56 +00:00
tbl - > capacity = INIT_EXP_TABLE_SIZE ;
2001-05-14 06:12:28 +00:00
uprv_memset ( tbl - > elements , 0 , INIT_EXP_TABLE_SIZE * sizeof ( ContractionTable * ) ) ;
2001-02-26 10:28:56 +00:00
tbl - > size = 0 ;
tbl - > position = 0 ;
tbl - > CEs = NULL ;
tbl - > codePoints = NULL ;
tbl - > offsets = NULL ;
return tbl ;
}
ContractionTable * addATableElement ( CntTable * table , uint32_t * key , UErrorCode * status ) {
2001-05-14 06:12:28 +00:00
ContractionTable * el = ( ContractionTable * ) uprv_malloc ( sizeof ( ContractionTable ) ) ;
el - > CEs = ( uint32_t * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( uint32_t ) ) ;
el - > codePoints = ( UChar * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( UChar ) ) ;
2001-02-26 10:28:56 +00:00
el - > position = 0 ;
el - > size = INIT_EXP_TABLE_SIZE ;
2001-11-13 19:45:11 +00:00
uprv_memset ( el - > CEs , 0 , INIT_EXP_TABLE_SIZE * sizeof ( uint32_t ) ) ;
uprv_memset ( el - > codePoints , 0 , INIT_EXP_TABLE_SIZE * sizeof ( UChar ) ) ;
2001-02-26 10:28:56 +00:00
table - > elements [ table - > size ] = el ;
//uhash_put(table->elements, (void *)table->size, el, status);
* key = table - > size + + ;
2001-03-22 18:11:25 +00:00
if ( table - > size = = table - > capacity ) {
2001-02-26 10:28:56 +00:00
// do realloc
2001-03-22 18:11:25 +00:00
table - > elements = ( ContractionTable * * ) realloc ( table - > elements , table - > capacity * 2 * sizeof ( ContractionTable * ) ) ;
2001-05-14 06:12:28 +00:00
uprv_memset ( table - > elements + table - > capacity , 0 , table - > capacity * sizeof ( ContractionTable * ) ) ;
2001-03-22 18:11:25 +00:00
if ( table - > elements = = NULL ) {
2001-05-16 17:09:31 +00:00
# ifdef UCOL_DEBUG
2001-03-22 18:11:25 +00:00
fprintf ( stderr , " out of memory for contraction parts \n " ) ;
2001-05-16 17:09:31 +00:00
# endif
2001-03-22 18:11:25 +00:00
* status = U_MEMORY_ALLOCATION_ERROR ;
} else {
table - > capacity * = 2 ;
}
2001-02-26 10:28:56 +00:00
}
return el ;
}
2001-11-21 01:08:55 +00:00
U_CAPI int32_t U_EXPORT2
uprv_cnttab_constructTable ( CntTable * table , uint32_t mainOffset , UErrorCode * status ) {
2001-02-28 19:01:23 +00:00
int32_t i = 0 , j = 0 ;
2001-03-15 23:07:38 +00:00
if ( U_FAILURE ( * status ) | | table - > size = = 0 ) {
2001-02-26 10:28:56 +00:00
return 0 ;
}
table - > position = 0 ;
if ( table - > offsets ! = NULL ) {
free ( table - > offsets ) ;
}
2001-05-14 06:12:28 +00:00
table - > offsets = ( int32_t * ) uprv_malloc ( table - > size * sizeof ( int32_t ) ) ;
2001-02-26 10:28:56 +00:00
/* See how much memory we need */
for ( i = 0 ; i < table - > size ; i + + ) {
table - > offsets [ i ] = table - > position + mainOffset ;
table - > position + = table - > elements [ i ] - > position ;
}
/* Allocate it */
if ( table - > CEs ! = NULL ) {
free ( table - > CEs ) ;
}
2001-05-14 06:12:28 +00:00
table - > CEs = ( uint32_t * ) uprv_malloc ( table - > position * sizeof ( uint32_t ) ) ;
uprv_memset ( table - > CEs , ' ? ' , table - > position * sizeof ( uint32_t ) ) ;
2001-02-26 10:28:56 +00:00
if ( table - > codePoints ! = NULL ) {
free ( table - > codePoints ) ;
}
2001-05-14 06:12:28 +00:00
table - > codePoints = ( UChar * ) uprv_malloc ( table - > position * sizeof ( UChar ) ) ;
uprv_memset ( table - > codePoints , ' ? ' , table - > position * sizeof ( UChar ) ) ;
2001-02-26 10:28:56 +00:00
/* Now stuff the things in*/
UChar * cpPointer = table - > codePoints ;
uint32_t * CEPointer = table - > CEs ;
for ( i = 0 ; i < table - > size ; i + + ) {
int32_t size = table - > elements [ i ] - > position ;
2001-05-15 23:00:04 +00:00
uint8_t ccMax = 0 , ccMin = 255 , cc = 0 ;
for ( j = 1 ; j < size ; j + + ) {
cc = u_getCombiningClass ( table - > elements [ i ] - > codePoints [ j ] ) ;
if ( cc > ccMax ) {
ccMax = cc ;
}
if ( cc < ccMin ) {
ccMin = cc ;
}
* ( cpPointer + j ) = table - > elements [ i ] - > codePoints [ j ] ;
}
* cpPointer = ( ( ccMin = = ccMax ) ? 1 : 0 < < 8 ) | ccMax ;
2001-05-14 06:12:28 +00:00
uprv_memcpy ( CEPointer , table - > elements [ i ] - > CEs , size * sizeof ( uint32_t ) ) ;
2001-02-26 10:28:56 +00:00
for ( j = 0 ; j < size ; j + + ) {
2001-09-27 23:18:14 +00:00
if ( isCntTableElement ( * ( CEPointer + j ) ) ) {
* ( CEPointer + j ) = constructContractCE ( getCETag ( * ( CEPointer + j ) ) , table - > offsets [ getContractOffset ( * ( CEPointer + j ) ) ] ) ;
2001-02-26 10:28:56 +00:00
}
}
cpPointer + = size ;
CEPointer + = size ;
}
uint32_t CE ;
2001-08-10 20:30:44 +00:00
for ( i = 0 ; i < = 0x10FFFF ; i + + ) {
CE = ucmpe32_get ( table - > mapping , i ) ;
2001-09-27 23:18:14 +00:00
if ( isCntTableElement ( CE ) ) {
CE = constructContractCE ( getCETag ( CE ) , table - > offsets [ getContractOffset ( CE ) ] ) ;
2001-08-10 20:30:44 +00:00
ucmpe32_set ( table - > mapping , i , CE ) ;
2001-02-26 10:28:56 +00:00
}
}
return table - > position ;
}
2001-05-14 06:12:28 +00:00
ContractionTable * uprv_cnttab_cloneContraction ( ContractionTable * t ) {
ContractionTable * r = ( ContractionTable * ) uprv_malloc ( sizeof ( ContractionTable ) ) ;
r - > position = t - > position ;
r - > size = t - > size ;
r - > codePoints = ( UChar * ) uprv_malloc ( sizeof ( UChar ) * t - > size ) ;
r - > CEs = ( uint32_t * ) uprv_malloc ( sizeof ( uint32_t ) * t - > size ) ;
uprv_memcpy ( r - > codePoints , t - > codePoints , sizeof ( UChar ) * t - > size ) ;
uprv_memcpy ( r - > CEs , t - > CEs , sizeof ( uint32_t ) * t - > size ) ;
return r ;
}
2001-11-21 01:08:55 +00:00
U_CAPI CntTable * U_EXPORT2
uprv_cnttab_clone ( CntTable * t ) {
2001-05-14 06:12:28 +00:00
int32_t i = 0 ;
CntTable * r = ( CntTable * ) uprv_malloc ( sizeof ( CntTable ) ) ;
r - > position = t - > position ;
r - > size = t - > size ;
r - > capacity = t - > capacity ;
r - > mapping = t - > mapping ;
r - > elements = ( ContractionTable * * ) uprv_malloc ( t - > capacity * sizeof ( ContractionTable * ) ) ;
2001-05-15 17:39:41 +00:00
//uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
2001-05-14 06:12:28 +00:00
for ( i = 0 ; i < t - > size ; i + + ) {
r - > elements [ i ] = uprv_cnttab_cloneContraction ( t - > elements [ i ] ) ;
}
if ( t - > CEs ! = NULL ) {
r - > CEs = ( uint32_t * ) uprv_malloc ( t - > position * sizeof ( uint32_t ) ) ;
uprv_memcpy ( r - > CEs , t - > CEs , t - > position * sizeof ( uint32_t ) ) ;
} else {
r - > CEs = NULL ;
}
if ( t - > codePoints ! = NULL ) {
r - > codePoints = ( UChar * ) uprv_malloc ( t - > position * sizeof ( UChar ) ) ;
uprv_memcpy ( r - > codePoints , t - > codePoints , t - > position * sizeof ( UChar ) ) ;
} else {
r - > codePoints = NULL ;
}
if ( t - > offsets ! = NULL ) {
r - > offsets = ( int32_t * ) uprv_malloc ( t - > size * sizeof ( int32_t ) ) ;
uprv_memcpy ( r - > offsets , t - > offsets , t - > size * sizeof ( int32_t ) ) ;
} else {
r - > offsets = NULL ;
}
return r ;
}
2001-11-21 01:08:55 +00:00
U_CAPI void U_EXPORT2
uprv_cnttab_close ( CntTable * table ) {
2001-02-26 10:28:56 +00:00
int32_t i = 0 ;
for ( i = 0 ; i < table - > size ; i + + ) {
free ( table - > elements [ i ] - > CEs ) ;
free ( table - > elements [ i ] - > codePoints ) ;
free ( table - > elements [ i ] ) ;
}
2001-02-27 21:01:11 +00:00
free ( table - > elements ) ;
2001-02-26 10:28:56 +00:00
free ( table - > CEs ) ;
free ( table - > offsets ) ;
free ( table - > codePoints ) ;
free ( table ) ;
}
/* this is for adding non contractions */
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_changeLastCE ( CntTable * table , uint32_t element , uint32_t value , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
2001-11-13 19:45:11 +00:00
return 0 ;
2001-02-26 10:28:56 +00:00
}
2001-05-16 18:11:32 +00:00
tbl - > CEs [ tbl - > position - 1 ] = value ;
2001-02-26 10:28:56 +00:00
2001-09-27 23:18:14 +00:00
return ( constructContractCE ( table - > currentTag , element ) ) ;
2001-02-26 10:28:56 +00:00
}
/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_insertContraction ( CntTable * table , uint32_t element , UChar codePoint , uint32_t value , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
tbl = addATableElement ( table , & element , status ) ;
}
uprv_growTable ( tbl , status ) ;
2001-03-22 21:16:20 +00:00
uint32_t offset = 0 ;
2001-02-26 10:28:56 +00:00
while ( tbl - > codePoints [ offset ] < codePoint & & offset < tbl - > position ) {
offset + + ;
}
2001-03-22 21:16:20 +00:00
uint32_t i = tbl - > position ;
2001-02-26 10:28:56 +00:00
for ( i = tbl - > position ; i > offset ; i - - ) {
tbl - > CEs [ i ] = tbl - > CEs [ i - 1 ] ;
tbl - > codePoints [ i ] = tbl - > codePoints [ i - 1 ] ;
}
tbl - > CEs [ offset ] = value ;
tbl - > codePoints [ offset ] = codePoint ;
tbl - > position + + ;
2001-09-27 23:18:14 +00:00
return ( constructContractCE ( table - > currentTag , element ) ) ;
2001-02-26 10:28:56 +00:00
}
/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_addContraction ( CntTable * table , uint32_t element , UChar codePoint , uint32_t value , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
tbl = addATableElement ( table , & element , status ) ;
}
uprv_growTable ( tbl , status ) ;
tbl - > CEs [ tbl - > position ] = value ;
tbl - > codePoints [ tbl - > position ] = codePoint ;
tbl - > position + + ;
2001-09-27 23:18:14 +00:00
return ( constructContractCE ( table - > currentTag , element ) ) ;
2001-02-26 10:28:56 +00:00
}
/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_setContraction ( CntTable * table , uint32_t element , uint32_t offset , UChar codePoint , uint32_t value , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
tbl = addATableElement ( table , & element , status ) ;
}
if ( offset > = tbl - > size ) {
* status = U_INDEX_OUTOFBOUNDS_ERROR ;
return 0 ;
}
tbl - > CEs [ offset ] = value ;
tbl - > codePoints [ offset ] = codePoint ;
//return(offset);
2001-09-27 23:18:14 +00:00
return ( constructContractCE ( table - > currentTag , element ) ) ;
2001-02-26 10:28:56 +00:00
}
2001-06-06 20:19:26 +00:00
ContractionTable * _cnttab_getContractionTable ( CntTable * table , uint32_t element ) {
2001-02-26 10:28:56 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
2001-06-06 20:19:26 +00:00
return NULL ;
} else {
return tbl ;
2001-02-26 10:28:56 +00:00
}
2001-06-06 20:19:26 +00:00
}
2001-02-26 10:28:56 +00:00
2001-06-06 20:19:26 +00:00
int32_t _cnttab_findCP ( ContractionTable * tbl , UChar codePoint ) {
2001-03-22 21:16:20 +00:00
uint32_t position = 0 ;
2001-06-06 20:19:26 +00:00
if ( tbl = = NULL ) {
return - 1 ;
}
2001-02-26 10:28:56 +00:00
while ( codePoint > tbl - > codePoints [ position ] ) {
position + + ;
if ( position > tbl - > position ) {
2001-06-06 20:19:26 +00:00
return - 1 ;
2001-02-26 10:28:56 +00:00
}
}
if ( codePoint = = tbl - > codePoints [ position ] ) {
return position ;
} else {
2001-06-06 20:19:26 +00:00
return - 1 ;
2001-02-26 10:28:56 +00:00
}
}
2001-06-06 20:19:26 +00:00
uint32_t _cnttab_getCE ( ContractionTable * tbl , int32_t position ) {
if ( tbl = = NULL ) {
return UCOL_NOT_FOUND ;
}
if ( ( uint32_t ) position > tbl - > position | | position = = - 1 ) {
return UCOL_NOT_FOUND ;
} else {
return tbl - > CEs [ position ] ;
}
}
2001-02-26 10:28:56 +00:00
2001-11-21 01:08:55 +00:00
U_CAPI int32_t U_EXPORT2
uprv_cnttab_findCP ( CntTable * table , uint32_t element , UChar codePoint , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
2001-06-06 20:19:26 +00:00
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
2001-06-19 22:46:02 +00:00
return _cnttab_findCP ( _cnttab_getContractionTable ( table , element ) , codePoint ) ;
2001-06-06 20:19:26 +00:00
}
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_getCE ( CntTable * table , uint32_t element , uint32_t position , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
if ( U_FAILURE ( * status ) ) {
return UCOL_NOT_FOUND ;
}
2001-06-06 20:19:26 +00:00
return ( _cnttab_getCE ( _cnttab_getContractionTable ( table , element ) , position ) ) ;
}
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_findCE ( CntTable * table , uint32_t element , UChar codePoint , UErrorCode * status ) {
2001-06-06 20:19:26 +00:00
if ( U_FAILURE ( * status ) ) {
2001-02-26 10:28:56 +00:00
return UCOL_NOT_FOUND ;
}
2001-06-06 20:19:26 +00:00
ContractionTable * tbl = _cnttab_getContractionTable ( table , element ) ;
return _cnttab_getCE ( tbl , _cnttab_findCP ( tbl , codePoint ) ) ;
}
2001-02-26 10:28:56 +00:00
2001-11-21 01:08:55 +00:00
U_CAPI UBool U_EXPORT2
uprv_cnttab_isTailored ( CntTable * table , uint32_t element , UChar * ztString , UErrorCode * status ) {
2001-06-06 20:19:26 +00:00
if ( U_FAILURE ( * status ) ) {
return FALSE ;
}
2001-02-26 10:28:56 +00:00
2001-06-06 20:19:26 +00:00
while ( * ( ztString ) ! = 0 ) {
element = uprv_cnttab_findCE ( table , element , * ( ztString ) , status ) ;
if ( element = = UCOL_NOT_FOUND ) {
return FALSE ;
}
2001-09-27 23:18:14 +00:00
if ( ! isCntTableElement ( element ) ) {
2001-06-06 20:19:26 +00:00
return TRUE ;
}
ztString + + ;
}
if ( uprv_cnttab_getCE ( table , element , 0 , status ) ! = UCOL_NOT_FOUND ) {
return TRUE ;
2001-02-26 10:28:56 +00:00
} else {
2001-06-06 20:19:26 +00:00
return FALSE ;
2001-02-26 10:28:56 +00:00
}
}
2001-03-14 18:55:38 +00:00
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_cnttab_changeContraction ( CntTable * table , uint32_t element , UChar codePoint , uint32_t newCE , UErrorCode * status ) {
2001-03-14 18:55:38 +00:00
element & = 0xFFFFFF ;
ContractionTable * tbl = NULL ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( element = = 0xFFFFFF ) | | ( tbl = table - > elements [ element ] ) = = NULL ) {
return 0 ;
}
2001-03-22 21:16:20 +00:00
uint32_t position = 0 ;
2001-03-14 18:55:38 +00:00
while ( codePoint > tbl - > codePoints [ position ] ) {
position + + ;
if ( position > tbl - > position ) {
return UCOL_NOT_FOUND ;
}
}
if ( codePoint = = tbl - > codePoints [ position ] ) {
tbl - > CEs [ position ] = newCE ;
return element ;
} else {
return UCOL_NOT_FOUND ;
}
}
2001-06-06 20:19:26 +00:00
2001-10-20 01:09:31 +00:00
U_NAMESPACE_END