2001-02-26 10:28:56 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* Copyright ( C ) 2001 , International Business Machines
* Corporation and others . All Rights Reserved .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* file name : ucaelems . cpp
* encoding : US - ASCII
* tab size : 8 ( not used )
* indentation : 4
*
* created 02 / 22 / 2001
* created by : Vladimir Weinstein
*
* This program reads the Franctional UCA table and generates
* internal format for UCA table as well as inverse UCA table .
* It then writes binary files containing the data : ucadata . dat
* & invuca . dat
2001-03-03 04:06:43 +00:00
*
* date name comments
* 03 / 02 / 2001 synwee added setMaxExpansion
2001-03-07 20:45:08 +00:00
* 03 / 07 / 2001 synwee merged UCA ' s maxexpansion and tailoring ' s
2001-02-26 10:28:56 +00:00
*/
2002-09-20 01:54:48 +00:00
# include "unicode/utypes.h"
# if !UCONFIG_NO_COLLATION
2001-03-03 03:35:17 +00:00
# include "unicode/uchar.h"
2002-03-12 20:20:08 +00:00
# include "unicode/unistr.h"
2002-08-21 19:12:24 +00:00
# include "unicode/ucoleitr.h"
2002-08-21 21:33:16 +00:00
# include "unicode/normlzr.h"
2002-07-12 21:42:24 +00:00
# include "ucol_elm.h"
# include "unormimp.h"
2002-09-17 05:06:04 +00:00
# include "unicode/caniter.h"
2002-07-12 21:42:24 +00:00
# include "cmemory.h"
2001-02-26 10:28:56 +00:00
2001-10-20 01:09:31 +00:00
U_NAMESPACE_BEGIN
2001-10-02 01:26:13 +00:00
static uint32_t uprv_uca_processContraction ( CntTable * contractions , UCAElements * element , uint32_t existingCE , UErrorCode * status ) ;
2001-10-19 23:14:38 +00:00
U_CDECL_BEGIN
2001-11-21 01:08:55 +00:00
static int32_t U_EXPORT2 U_CALLCONV
2001-10-25 21:55:45 +00:00
prefixLookupHash ( const UHashTok e ) {
2001-10-02 01:26:13 +00:00
UCAElements * element = ( UCAElements * ) e . pointer ;
2001-10-12 19:46:10 +00:00
UChar buf [ 256 ] ;
2001-10-16 18:31:13 +00:00
UHashTok key ;
2001-10-12 19:46:10 +00:00
key . pointer = buf ;
uprv_memcpy ( buf , element - > cPoints , element - > cSize * sizeof ( UChar ) ) ;
buf [ element - > cSize ] = 0 ;
//key.pointer = element->cPoints;
//element->cPoints[element->cSize] = 0;
2001-10-02 01:26:13 +00:00
return uhash_hashUChars ( key ) ;
}
2001-11-21 01:08:55 +00:00
static int8_t U_EXPORT2 U_CALLCONV
2001-10-25 21:55:45 +00:00
prefixLookupComp ( const UHashTok e1 , const UHashTok e2 ) {
2001-10-02 01:26:13 +00:00
UCAElements * element1 = ( UCAElements * ) e1 . pointer ;
UCAElements * element2 = ( UCAElements * ) e2 . pointer ;
2001-10-12 19:46:10 +00:00
UChar buf1 [ 256 ] ;
2001-10-16 18:31:13 +00:00
UHashTok key1 ;
2001-10-12 19:46:10 +00:00
key1 . pointer = buf1 ;
uprv_memcpy ( buf1 , element1 - > cPoints , element1 - > cSize * sizeof ( UChar ) ) ;
buf1 [ element1 - > cSize ] = 0 ;
UChar buf2 [ 256 ] ;
2001-10-16 18:31:13 +00:00
UHashTok key2 ;
2001-10-12 19:46:10 +00:00
key2 . pointer = buf2 ;
uprv_memcpy ( buf2 , element2 - > cPoints , element2 - > cSize * sizeof ( UChar ) ) ;
buf2 [ element2 - > cSize ] = 0 ;
2001-10-02 01:26:13 +00:00
return uhash_compareUChars ( key1 , key2 ) ;
}
2001-10-19 23:14:38 +00:00
U_CDECL_END
2001-10-02 01:26:13 +00:00
static int32_t uprv_uca_addExpansion ( ExpansionTable * expansions , uint32_t value , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( expansions - > CEs = = NULL ) {
2002-02-28 01:42:40 +00:00
expansions - > CEs = ( uint32_t * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( uint32_t ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( expansions - > CEs = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-02-26 10:28:56 +00:00
expansions - > size = INIT_EXP_TABLE_SIZE ;
expansions - > position = 0 ;
}
if ( expansions - > position = = expansions - > size ) {
2002-02-28 01:42:40 +00:00
uint32_t * newData = ( uint32_t * ) uprv_realloc ( expansions - > CEs , 2 * expansions - > size * sizeof ( uint32_t ) ) ;
2001-02-26 10:28:56 +00:00
if ( newData = = NULL ) {
2001-05-16 17:09:31 +00:00
# ifdef UCOL_DEBUG
2001-02-26 10:28:56 +00:00
fprintf ( stderr , " out of memory for expansions \n " ) ;
2001-05-16 17:09:31 +00:00
# endif
2001-02-26 10:28:56 +00:00
* status = U_MEMORY_ALLOCATION_ERROR ;
return - 1 ;
}
expansions - > CEs = newData ;
expansions - > size * = 2 ;
}
expansions - > CEs [ expansions - > position ] = value ;
return ( expansions - > position + + ) ;
}
2001-11-21 01:08:55 +00:00
U_CAPI tempUCATable * U_EXPORT2
uprv_uca_initTempTable ( UCATableHeader * image , UColOptionSet * opts , const UCollator * UCA , UColCETags initTag , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
tempUCATable * t = ( tempUCATable * ) uprv_malloc ( sizeof ( tempUCATable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( t = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-06-12 15:53:34 +00:00
MaxExpansionTable * maxet = ( MaxExpansionTable * ) uprv_malloc (
2001-03-07 20:45:08 +00:00
sizeof ( MaxExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( maxet = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2002-07-16 17:50:42 +00:00
uprv_free ( t ) ;
2002-06-29 09:31:05 +00:00
return NULL ;
}
2001-06-12 15:53:34 +00:00
MaxJamoExpansionTable * maxjet = ( MaxJamoExpansionTable * ) uprv_malloc (
sizeof ( MaxJamoExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( maxjet = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2002-07-16 17:50:42 +00:00
uprv_free ( t ) ;
uprv_free ( maxet ) ;
2002-06-29 09:31:05 +00:00
return NULL ;
}
2001-02-26 10:28:56 +00:00
t - > image = image ;
2001-03-30 00:23:46 +00:00
t - > options = opts ;
2001-03-08 00:58:36 +00:00
2001-03-07 19:43:06 +00:00
t - > UCA = UCA ;
2001-02-26 10:28:56 +00:00
t - > expansions = ( ExpansionTable * ) uprv_malloc ( sizeof ( ExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( t - > expansions = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2002-07-16 17:50:42 +00:00
uprv_free ( t ) ;
uprv_free ( maxet ) ;
uprv_free ( maxjet ) ;
2002-06-29 09:31:05 +00:00
return NULL ;
}
2001-02-26 10:28:56 +00:00
uprv_memset ( t - > expansions , 0 , sizeof ( ExpansionTable ) ) ;
2001-12-19 07:00:45 +00:00
/*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
t - > mapping = utrie_open ( NULL , NULL , 0x100000 , UCOL_SPECIAL_FLAG | ( initTag < < 24 ) , TRUE ) ; // Do your own mallocs for the structure, array and have linear Latin 1
2001-10-02 01:26:13 +00:00
t - > prefixLookup = uhash_open ( prefixLookupHash , prefixLookupComp , status ) ;
2001-10-25 21:55:45 +00:00
uhash_setValueDeleter ( t - > prefixLookup , uhash_freeBlock ) ;
2001-10-02 01:26:13 +00:00
2001-02-26 10:28:56 +00:00
t - > contractions = uprv_cnttab_open ( t - > mapping , status ) ;
2001-03-07 20:45:08 +00:00
/* copy UCA's maxexpansion and merge as we go along */
t - > maxExpansions = maxet ;
if ( UCA ! = NULL ) {
2001-03-08 00:33:38 +00:00
/* adding an extra initial value for easier manipulation */
maxet - > size = ( UCA - > lastEndExpansionCE - UCA - > endExpansionCE )
+ 2 ;
maxet - > position = maxet - > size - 1 ;
2001-03-07 20:45:08 +00:00
maxet - > endExpansionCE =
( uint32_t * ) uprv_malloc ( sizeof ( uint32_t ) * maxet - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( maxet - > endExpansionCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-03-07 20:45:08 +00:00
maxet - > expansionCESize =
( uint8_t * ) uprv_malloc ( sizeof ( uint8_t ) * maxet - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( maxet - > expansionCESize = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2002-07-16 17:50:42 +00:00
uprv_free ( maxet - > endExpansionCE ) ;
2002-06-29 09:31:05 +00:00
return NULL ;
}
2001-03-08 00:33:38 +00:00
/* initialized value */
* ( maxet - > endExpansionCE ) = 0 ;
* ( maxet - > expansionCESize ) = 0 ;
uprv_memcpy ( maxet - > endExpansionCE + 1 , UCA - > endExpansionCE ,
sizeof ( uint32_t ) * ( maxet - > size - 1 ) ) ;
uprv_memcpy ( maxet - > expansionCESize + 1 , UCA - > expansionCESize ,
sizeof ( uint8_t ) * ( maxet - > size - 1 ) ) ;
2001-03-07 20:45:08 +00:00
}
else {
maxet - > size = 0 ;
}
2001-06-12 15:53:34 +00:00
t - > maxJamoExpansions = maxjet ;
2001-06-12 21:10:57 +00:00
maxjet - > endExpansionCE = NULL ;
maxjet - > isV = NULL ;
2001-06-12 15:53:34 +00:00
maxjet - > size = 0 ;
2001-06-12 21:10:57 +00:00
maxjet - > position = 0 ;
2001-06-12 15:53:34 +00:00
maxjet - > maxLSize = 1 ;
maxjet - > maxVSize = 1 ;
maxjet - > maxTSize = 1 ;
2001-03-07 20:45:08 +00:00
2001-03-03 03:35:17 +00:00
t - > unsafeCP = ( uint8_t * ) uprv_malloc ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( t - > unsafeCP = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-10 22:12:53 +00:00
t - > contrEndCP = ( uint8_t * ) uprv_malloc ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( t - > contrEndCP = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2002-07-16 17:50:42 +00:00
uprv_free ( t - > unsafeCP ) ;
2002-06-29 09:31:05 +00:00
return NULL ;
}
2001-03-03 03:35:17 +00:00
uprv_memset ( t - > unsafeCP , 0 , UCOL_UNSAFECP_TABLE_SIZE ) ;
2001-05-10 22:12:53 +00:00
uprv_memset ( t - > contrEndCP , 0 , UCOL_UNSAFECP_TABLE_SIZE ) ;
return t ;
2001-02-26 10:28:56 +00:00
}
2001-11-21 01:08:55 +00:00
U_CAPI tempUCATable * U_EXPORT2
uprv_uca_cloneTempTable ( tempUCATable * t , UErrorCode * status ) {
2001-05-15 17:39:41 +00:00
if ( U_FAILURE ( * status ) ) {
return NULL ;
}
2001-05-14 06:12:28 +00:00
tempUCATable * r = ( tempUCATable * ) uprv_malloc ( sizeof ( tempUCATable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memset ( r , 0 , sizeof ( tempUCATable ) ) ;
2001-05-14 06:12:28 +00:00
/* mapping */
2001-05-15 17:39:41 +00:00
if ( t - > mapping ! = NULL ) {
2001-12-19 07:00:45 +00:00
/*r->mapping = ucmpe32_clone(t->mapping, status);*/
r - > mapping = utrie_clone ( NULL , t - > mapping , NULL , 0 ) ;
2001-05-15 17:39:41 +00:00
}
2001-05-14 06:12:28 +00:00
2001-10-02 01:26:13 +00:00
// a hashing clone function would be very nice. We have none currently...
// However, we should be good, as closing should not produce any prefixed elements.
2001-10-04 21:56:28 +00:00
r - > prefixLookup = NULL ; // prefixes are not used in closing
2001-10-02 01:26:13 +00:00
2001-05-14 06:12:28 +00:00
/* expansions */
2001-05-15 17:39:41 +00:00
if ( t - > expansions ! = NULL ) {
r - > expansions = ( ExpansionTable * ) uprv_malloc ( sizeof ( ExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > expansions = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
r - > expansions - > position = t - > expansions - > position ;
r - > expansions - > size = t - > expansions - > size ;
if ( t - > expansions - > CEs ! = NULL ) {
r - > expansions - > CEs = ( uint32_t * ) uprv_malloc ( sizeof ( uint32_t ) * t - > expansions - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > expansions - > CEs = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memcpy ( r - > expansions - > CEs , t - > expansions - > CEs , sizeof ( uint32_t ) * t - > expansions - > size ) ;
} else {
2002-01-17 04:25:48 +00:00
r - > expansions - > CEs = NULL ;
2001-05-15 17:39:41 +00:00
}
}
if ( t - > contractions ! = NULL ) {
2001-12-28 20:58:58 +00:00
r - > contractions = uprv_cnttab_clone ( t - > contractions , status ) ;
2001-05-15 17:39:41 +00:00
r - > contractions - > mapping = r - > mapping ;
}
if ( t - > maxExpansions ! = NULL ) {
r - > maxExpansions = ( MaxExpansionTable * ) uprv_malloc ( sizeof ( MaxExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxExpansions = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
r - > maxExpansions - > size = t - > maxExpansions - > size ;
r - > maxExpansions - > position = t - > maxExpansions - > position ;
if ( t - > maxExpansions - > endExpansionCE ! = NULL ) {
r - > maxExpansions - > endExpansionCE = ( uint32_t * ) uprv_malloc ( sizeof ( uint32_t ) * t - > maxExpansions - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxExpansions - > endExpansionCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memcpy ( r - > maxExpansions - > endExpansionCE , t - > maxExpansions - > endExpansionCE , t - > maxExpansions - > size * sizeof ( uint32_t ) ) ;
} else {
r - > maxExpansions - > endExpansionCE = NULL ;
}
if ( t - > maxExpansions - > expansionCESize ! = NULL ) {
r - > maxExpansions - > expansionCESize = ( uint8_t * ) uprv_malloc ( sizeof ( uint8_t ) * t - > maxExpansions - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxExpansions - > expansionCESize = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memcpy ( r - > maxExpansions - > expansionCESize , t - > maxExpansions - > expansionCESize , t - > maxExpansions - > size * sizeof ( uint8_t ) ) ;
} else {
r - > maxExpansions - > expansionCESize = NULL ;
}
}
2001-06-12 15:53:34 +00:00
if ( t - > maxJamoExpansions ! = NULL ) {
r - > maxJamoExpansions = ( MaxJamoExpansionTable * ) uprv_malloc ( sizeof ( MaxJamoExpansionTable ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxJamoExpansions = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-06-12 15:53:34 +00:00
r - > maxJamoExpansions - > size = t - > maxJamoExpansions - > size ;
r - > maxJamoExpansions - > position = t - > maxJamoExpansions - > position ;
r - > maxJamoExpansions - > maxLSize = t - > maxJamoExpansions - > maxLSize ;
r - > maxJamoExpansions - > maxVSize = t - > maxJamoExpansions - > maxVSize ;
r - > maxJamoExpansions - > maxTSize = t - > maxJamoExpansions - > maxTSize ;
2001-06-12 21:10:57 +00:00
if ( t - > maxJamoExpansions - > size ! = 0 ) {
2001-06-12 15:53:34 +00:00
r - > maxJamoExpansions - > endExpansionCE = ( uint32_t * ) uprv_malloc ( sizeof ( uint32_t ) * t - > maxJamoExpansions - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxJamoExpansions - > endExpansionCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-06-12 15:53:34 +00:00
uprv_memcpy ( r - > maxJamoExpansions - > endExpansionCE , t - > maxJamoExpansions - > endExpansionCE , t - > maxJamoExpansions - > size * sizeof ( uint32_t ) ) ;
r - > maxJamoExpansions - > isV = ( UBool * ) uprv_malloc ( sizeof ( UBool ) * t - > maxJamoExpansions - > size ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > maxJamoExpansions - > isV = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-06-12 15:53:34 +00:00
uprv_memcpy ( r - > maxJamoExpansions - > isV , t - > maxJamoExpansions - > isV , t - > maxJamoExpansions - > size * sizeof ( UBool ) ) ;
} else {
2001-06-12 21:10:57 +00:00
r - > maxJamoExpansions - > endExpansionCE = NULL ;
2001-06-12 15:53:34 +00:00
r - > maxJamoExpansions - > isV = NULL ;
}
}
2001-05-15 17:39:41 +00:00
if ( t - > unsafeCP ! = NULL ) {
r - > unsafeCP = ( uint8_t * ) uprv_malloc ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > unsafeCP = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memcpy ( r - > unsafeCP , t - > unsafeCP , UCOL_UNSAFECP_TABLE_SIZE ) ;
}
if ( t - > contrEndCP ! = NULL ) {
r - > contrEndCP = ( uint8_t * ) uprv_malloc ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( r - > contrEndCP = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-05-15 17:39:41 +00:00
uprv_memcpy ( r - > contrEndCP , t - > contrEndCP , UCOL_UNSAFECP_TABLE_SIZE ) ;
}
2001-05-14 06:12:28 +00:00
r - > UCA = t - > UCA ;
r - > image = t - > image ;
r - > options = t - > options ;
return r ;
}
2001-11-21 01:08:55 +00:00
U_CAPI void U_EXPORT2
uprv_uca_closeTempTable ( tempUCATable * t ) {
2002-08-01 23:09:41 +00:00
if ( t ! = NULL ) {
uprv_free ( t - > expansions - > CEs ) ;
uprv_free ( t - > expansions ) ;
if ( t - > contractions ! = NULL ) {
uprv_cnttab_close ( t - > contractions ) ;
}
/*ucmpe32_close(t->mapping);*/
utrie_close ( t - > mapping ) ;
2001-02-26 10:28:56 +00:00
2002-08-01 23:09:41 +00:00
if ( t - > prefixLookup ! = NULL ) {
uhash_close ( t - > prefixLookup ) ;
}
2001-10-02 01:26:13 +00:00
2002-08-01 23:09:41 +00:00
uprv_free ( t - > maxExpansions - > endExpansionCE ) ;
uprv_free ( t - > maxExpansions - > expansionCESize ) ;
uprv_free ( t - > maxExpansions ) ;
2001-06-12 15:53:34 +00:00
2002-08-01 23:09:41 +00:00
if ( t - > maxJamoExpansions - > size > 0 ) {
uprv_free ( t - > maxJamoExpansions - > endExpansionCE ) ;
uprv_free ( t - > maxJamoExpansions - > isV ) ;
}
uprv_free ( t - > maxJamoExpansions ) ;
2001-06-12 15:53:34 +00:00
2002-08-01 23:09:41 +00:00
uprv_free ( t - > unsafeCP ) ;
uprv_free ( t - > contrEndCP ) ;
2001-03-02 01:14:03 +00:00
2002-08-01 23:09:41 +00:00
uprv_free ( t ) ;
}
2001-02-26 10:28:56 +00:00
}
2001-03-02 01:14:03 +00:00
/**
* Looks for the maximum length of all expansion sequences ending with the same
* collation element . The size required for maxexpansion and maxsize is
* returned if the arrays are too small .
* @ param endexpansion the last expansion collation element to be added
* @ param expansionsize size of the expansion
* @ param maxexpansion data structure to store the maximum expansion data .
* @ param status error status
* @ returns size of the maxexpansion and maxsize used .
*/
int uprv_uca_setMaxExpansion ( uint32_t endexpansion ,
uint8_t expansionsize ,
MaxExpansionTable * maxexpansion ,
UErrorCode * status )
{
if ( maxexpansion - > size = = 0 ) {
2001-03-08 00:33:38 +00:00
/* we'll always make the first element 0, for easier manipulation */
2001-03-02 01:14:03 +00:00
maxexpansion - > endExpansionCE =
( uint32_t * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( int32_t ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( maxexpansion - > endExpansionCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-03-02 01:14:03 +00:00
* ( maxexpansion - > endExpansionCE ) = 0 ;
maxexpansion - > expansionCESize =
( uint8_t * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( uint8_t ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */ ;
2002-06-29 09:31:05 +00:00
if ( maxexpansion - > expansionCESize = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-03-02 01:14:03 +00:00
* ( maxexpansion - > expansionCESize ) = 0 ;
maxexpansion - > size = INIT_EXP_TABLE_SIZE ;
maxexpansion - > position = 0 ;
}
2001-03-08 00:33:38 +00:00
if ( maxexpansion - > position + 1 = = maxexpansion - > size ) {
2001-03-02 01:14:03 +00:00
uint32_t * neweece = ( uint32_t * ) uprv_realloc ( maxexpansion - > endExpansionCE ,
2001-03-07 20:45:08 +00:00
2 * maxexpansion - > size * sizeof ( uint32_t ) ) ;
2001-03-02 01:14:03 +00:00
uint8_t * neweces = ( uint8_t * ) uprv_realloc ( maxexpansion - > expansionCESize ,
2 * maxexpansion - > size * sizeof ( uint8_t ) ) ;
if ( neweece = = NULL | | neweces = = NULL ) {
2001-05-16 17:09:31 +00:00
# ifdef UCOL_DEBUG
2001-03-02 01:14:03 +00:00
fprintf ( stderr , " out of memory for maxExpansions \n " ) ;
2001-05-16 17:09:31 +00:00
# endif
2001-03-02 01:14:03 +00:00
* status = U_MEMORY_ALLOCATION_ERROR ;
return - 1 ;
}
maxexpansion - > endExpansionCE = neweece ;
maxexpansion - > expansionCESize = neweces ;
maxexpansion - > size * = 2 ;
}
uint32_t * pendexpansionce = maxexpansion - > endExpansionCE ;
uint8_t * pexpansionsize = maxexpansion - > expansionCESize ;
int pos = maxexpansion - > position ;
uint32_t * start = pendexpansionce ;
uint32_t * limit = pendexpansionce + pos ;
/* using binary search to determine if last expansion element is
already in the array */
uint32_t * mid ;
int result = - 1 ;
while ( start < limit - 1 ) {
mid = start + ( ( limit - start ) > > 1 ) ;
if ( endexpansion < = * mid ) {
limit = mid ;
}
else {
start = mid ;
}
}
if ( * start = = endexpansion ) {
result = start - pendexpansionce ;
}
else
if ( * limit = = endexpansion ) {
result = limit - pendexpansionce ;
}
if ( result > - 1 ) {
/* found the ce in expansion, we'll just modify the size if it is
smaller */
uint8_t * currentsize = pexpansionsize + result ;
if ( * currentsize < expansionsize ) {
* currentsize = expansionsize ;
}
}
else {
/* we'll need to squeeze the value into the array.
initial implementation . */
/* shifting the subarray down by 1 */
int shiftsize = ( pendexpansionce + pos ) - start ;
uint32_t * shiftpos = start + 1 ;
uint8_t * sizeshiftpos = pexpansionsize + ( shiftpos - pendexpansionce ) ;
/* okay need to rearrange the array into sorted order */
if ( shiftsize = = 0 | | * ( pendexpansionce + pos ) < endexpansion ) {
* ( pendexpansionce + pos + 1 ) = endexpansion ;
* ( pexpansionsize + pos + 1 ) = expansionsize ;
}
else {
uprv_memmove ( shiftpos + 1 , shiftpos , shiftsize * sizeof ( int32_t ) ) ;
uprv_memmove ( sizeshiftpos + 1 , sizeshiftpos ,
shiftsize * sizeof ( uint8_t ) ) ;
* shiftpos = endexpansion ;
* sizeshiftpos = expansionsize ;
}
maxexpansion - > position + + ;
2001-05-16 18:22:58 +00:00
# ifdef UCOL_DEBUG
2001-03-02 01:14:03 +00:00
int temp ;
UBool found = FALSE ;
for ( temp = 0 ; temp < maxexpansion - > position ; temp + + ) {
if ( pendexpansionce [ temp ] > = pendexpansionce [ temp + 1 ] ) {
fprintf ( stderr , " expansions %d \n " , temp ) ;
}
if ( pendexpansionce [ temp ] = = endexpansion ) {
found = TRUE ;
if ( pexpansionsize [ temp ] < expansionsize ) {
fprintf ( stderr , " expansions size %d \n " , temp ) ;
}
}
}
if ( pendexpansionce [ temp ] = = endexpansion ) {
found = TRUE ;
if ( pexpansionsize [ temp ] < expansionsize ) {
fprintf ( stderr , " expansions size %d \n " , temp ) ;
}
}
if ( ! found )
fprintf ( stderr , " expansion not found %d \n " , temp ) ;
2001-03-07 20:45:08 +00:00
# endif
2001-03-02 01:14:03 +00:00
}
return maxexpansion - > position ;
}
2001-06-12 15:53:34 +00:00
/**
* Sets the maximum length of all jamo expansion sequences ending with the same
* collation element . The size required for maxexpansion and maxsize is
* returned if the arrays are too small .
* @ param ch the jamo codepoint
* @ param endexpansion the last expansion collation element to be added
* @ param expansionsize size of the expansion
* @ param maxexpansion data structure to store the maximum expansion data .
* @ param status error status
* @ returns size of the maxexpansion and maxsize used .
*/
int uprv_uca_setMaxJamoExpansion ( UChar ch ,
uint32_t endexpansion ,
uint8_t expansionsize ,
MaxJamoExpansionTable * maxexpansion ,
UErrorCode * status )
{
UBool isV = TRUE ;
if ( ( ( uint32_t ) ch - 0x1100 ) < = ( 0x1112 - 0x1100 ) ) {
/* determines L for Jamo, doesn't need to store this since it is never
at the end of a expansion */
if ( maxexpansion - > maxLSize < expansionsize ) {
maxexpansion - > maxLSize = expansionsize ;
}
return maxexpansion - > position ;
}
if ( ( ( uint32_t ) ch - 0x1161 ) < = ( 0x1175 - 0x1161 ) ) {
/* determines V for Jamo */
if ( maxexpansion - > maxVSize < expansionsize ) {
maxexpansion - > maxVSize = expansionsize ;
}
}
if ( ( ( uint32_t ) ch - 0x11A8 ) < = ( 0x11C2 - 0x11A8 ) ) {
isV = FALSE ;
/* determines T for Jamo */
if ( maxexpansion - > maxTSize < expansionsize ) {
maxexpansion - > maxTSize = expansionsize ;
}
}
if ( maxexpansion - > size = = 0 ) {
/* we'll always make the first element 0, for easier manipulation */
maxexpansion - > endExpansionCE =
( uint32_t * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( uint32_t ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */ ;
2002-06-29 09:31:05 +00:00
if ( maxexpansion - > endExpansionCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-06-12 15:53:34 +00:00
* ( maxexpansion - > endExpansionCE ) = 0 ;
maxexpansion - > isV =
( UBool * ) uprv_malloc ( INIT_EXP_TABLE_SIZE * sizeof ( UBool ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */ ;
2002-06-29 09:31:05 +00:00
if ( maxexpansion - > isV = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-06-12 15:53:34 +00:00
* ( maxexpansion - > isV ) = 0 ;
maxexpansion - > size = INIT_EXP_TABLE_SIZE ;
maxexpansion - > position = 0 ;
}
if ( maxexpansion - > position + 1 = = maxexpansion - > size ) {
uint32_t * neweece = ( uint32_t * ) uprv_realloc ( maxexpansion - > endExpansionCE ,
2 * maxexpansion - > size * sizeof ( uint32_t ) ) ;
UBool * newisV = ( UBool * ) uprv_realloc ( maxexpansion - > isV ,
2 * maxexpansion - > size * sizeof ( UBool ) ) ;
if ( neweece = = NULL | | newisV = = NULL ) {
# ifdef UCOL_DEBUG
fprintf ( stderr , " out of memory for maxExpansions \n " ) ;
# endif
* status = U_MEMORY_ALLOCATION_ERROR ;
return - 1 ;
}
maxexpansion - > endExpansionCE = neweece ;
maxexpansion - > isV = newisV ;
maxexpansion - > size * = 2 ;
}
uint32_t * pendexpansionce = maxexpansion - > endExpansionCE ;
int pos = maxexpansion - > position ;
while ( pos > 0 ) {
2001-06-12 21:10:57 +00:00
pos - - ;
2001-06-12 15:53:34 +00:00
if ( * ( pendexpansionce + pos ) = = endexpansion ) {
return maxexpansion - > position ;
}
}
* ( pendexpansionce + maxexpansion - > position ) = endexpansion ;
* ( maxexpansion - > isV + maxexpansion - > position ) = isV ;
maxexpansion - > position + + ;
return maxexpansion - > position ;
}
2001-03-03 03:35:17 +00:00
2001-05-10 22:12:53 +00:00
static void ContrEndCPSet ( uint8_t * table , UChar c ) {
uint32_t hash ;
uint8_t * htByte ;
hash = c ;
if ( hash > = UCOL_UNSAFECP_TABLE_SIZE * 8 ) {
hash = ( hash & UCOL_UNSAFECP_TABLE_MASK ) + 256 ;
}
htByte = & table [ hash > > 3 ] ;
* htByte | = ( 1 < < ( hash & 7 ) ) ;
}
2001-03-03 03:35:17 +00:00
static void unsafeCPSet ( uint8_t * table , UChar c ) {
uint32_t hash ;
uint8_t * htByte ;
hash = c ;
if ( hash > = UCOL_UNSAFECP_TABLE_SIZE * 8 ) {
if ( hash > = 0xd800 & & hash < = 0xf8ff ) {
/* Part of a surrogate, or in private use area. */
/* These don't go in the table */
return ;
}
hash = ( hash & UCOL_UNSAFECP_TABLE_MASK ) + 256 ;
}
htByte = & table [ hash > > 3 ] ;
* htByte | = ( 1 < < ( hash & 7 ) ) ;
}
/* to the UnsafeCP hash table, add all chars with combining class != 0 */
2001-10-29 17:56:00 +00:00
static void uprv_uca_unsafeCPAddCCNZ ( tempUCATable * t , UErrorCode * status ) {
UChar c ;
uint16_t fcd ; // Hi byte is lead combining class.
// lo byte is trailing combing class.
const uint16_t * fcdTrieData ;
fcdTrieData = unorm_getFCDTrie ( status ) ;
if ( U_FAILURE ( * status ) ) {
return ;
}
2001-03-03 03:35:17 +00:00
for ( c = 0 ; c < 0xffff ; c + + ) {
2001-10-29 17:56:00 +00:00
fcd = unorm_getFCD16 ( fcdTrieData , c ) ;
if ( fcd > = 0x100 | | // if the leading combining class(c) > 0 ||
( UTF_IS_LEAD ( c ) & & fcd ! = 0 ) ) // c is a leading surrogate with some FCD data
unsafeCPSet ( t - > unsafeCP , c ) ;
2001-03-03 03:35:17 +00:00
}
2001-10-29 17:56:00 +00:00
2001-10-12 19:46:10 +00:00
if ( t - > prefixLookup ! = NULL ) {
int32_t i = - 1 ;
const UHashElement * e = NULL ;
UCAElements * element = NULL ;
UChar NFCbuf [ 256 ] ;
uint32_t NFCbufLen = 0 ;
while ( ( e = uhash_nextElement ( t - > prefixLookup , & i ) ) ! = NULL ) {
2001-10-16 18:31:13 +00:00
element = ( UCAElements * ) e - > value . pointer ;
2001-10-12 19:46:10 +00:00
// codepoints here are in the NFD form. We need to add the
// first code point of the NFC form to unsafe, because
// strcoll needs to backup over them.
NFCbufLen = unorm_normalize ( element - > cPoints , element - > cSize , UNORM_NFC , 0 ,
2001-10-29 17:56:00 +00:00
NFCbuf , 256 , status ) ;
2001-10-12 19:46:10 +00:00
unsafeCPSet ( t - > unsafeCP , NFCbuf [ 0 ] ) ;
}
}
2001-03-03 03:35:17 +00:00
}
2001-09-27 23:18:14 +00:00
uint32_t uprv_uca_addPrefix ( tempUCATable * t , uint32_t CE ,
UCAElements * element , UErrorCode * status ) {
// currently the longest prefix we're supporting in Japanese is two characters
// long. Although this table could quite easily mimic complete contraction stuff
// there is no good reason to make a general solution, as it would require some
// error prone messing.
CntTable * contractions = t - > contractions ;
UChar32 cp ;
uint32_t cpsize = 0 ;
UChar * oldCP = element - > cPoints ;
uint32_t oldCPSize = element - > cSize ;
2001-10-02 01:26:13 +00:00
2001-09-27 23:18:14 +00:00
contractions - > currentTag = SPEC_PROC_TAG ;
2001-10-12 19:46:10 +00:00
// here, we will normalize & add prefix to the table.
2001-09-27 23:18:14 +00:00
uint32_t j = 0 ;
2001-10-16 22:36:02 +00:00
# ifdef UCOL_DEBUG
2001-10-13 16:20:01 +00:00
for ( j = 0 ; j < element - > cSize ; j + + ) {
fprintf ( stdout , " CP: %04X " , element - > cPoints [ j ] ) ;
}
fprintf ( stdout , " El: %08X Pref: " , CE ) ;
for ( j = 0 ; j < element - > prefixSize ; j + + ) {
fprintf ( stdout , " %04X " , element - > prefix [ j ] ) ;
}
fprintf ( stdout , " %08X " , element - > mapCE ) ;
2001-10-16 22:36:02 +00:00
# endif
2001-10-13 16:20:01 +00:00
2001-10-12 19:46:10 +00:00
for ( j = 1 ; j < element - > prefixSize ; j + + ) { /* First add NFD prefix chars to unsafe CP hash table */
2001-09-27 23:18:14 +00:00
// Unless it is a trail surrogate, which is handled algoritmically and
// shouldn't take up space in the table.
if ( ! ( UTF_IS_TRAIL ( element - > prefix [ j ] ) ) ) {
unsafeCPSet ( t - > unsafeCP , element - > prefix [ j ] ) ;
}
}
2001-10-02 01:26:13 +00:00
2001-10-13 16:20:01 +00:00
UChar tempPrefix = 0 ;
for ( j = 0 ; j < /*nfcSize*/ element - > prefixSize / 2 ; j + + ) { // prefixes are going to be looked up backwards
2001-10-12 19:46:10 +00:00
// therefore, we will promptly reverse the prefix buffer...
2001-10-13 16:20:01 +00:00
tempPrefix = * ( /*nfcBuffer*/ element - > prefix + element - > prefixSize - j - 1 ) ;
* ( /*nfcBuffer*/ element - > prefix + element - > prefixSize - j - 1 ) = element - > prefix [ j ] ;
element - > prefix [ j ] = tempPrefix ;
2001-10-12 19:46:10 +00:00
}
2001-10-16 22:36:02 +00:00
# ifdef UCOL_DEBUG
2001-10-13 16:20:01 +00:00
fprintf ( stdout , " Reversed: " ) ;
for ( j = 0 ; j < element - > prefixSize ; j + + ) {
fprintf ( stdout , " %04X " , element - > prefix [ j ] ) ;
}
fprintf ( stdout , " %08X \n " , element - > mapCE ) ;
2001-10-16 22:36:02 +00:00
# endif
2001-10-12 19:46:10 +00:00
// the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
if ( ! ( UTF_IS_TRAIL ( element - > cPoints [ 0 ] ) ) ) {
unsafeCPSet ( t - > unsafeCP , element - > cPoints [ 0 ] ) ;
}
// Maybe we need this... To handle prefixes completely in the forward direction...
//if(element->cSize == 1) {
// if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
// ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
// }
//}
2001-10-02 01:26:13 +00:00
element - > cPoints = element - > prefix ;
element - > cSize = element - > prefixSize ;
2001-09-27 23:18:14 +00:00
// Add the last char of the contraction to the contraction-end hash table.
// unless it is a trail surrogate, which is handled algorithmically and
// shouldn't be in the table
2001-10-02 01:26:13 +00:00
if ( ! ( UTF_IS_TRAIL ( element - > cPoints [ element - > cSize - 1 ] ) ) ) {
ContrEndCPSet ( t - > contrEndCP , element - > cPoints [ element - > cSize - 1 ] ) ;
2001-09-27 23:18:14 +00:00
}
2001-10-02 01:26:13 +00:00
// First we need to check if contractions starts with a surrogate
UTF_NEXT_CHAR ( element - > cPoints , cpsize , element - > cSize , cp ) ;
2001-09-27 23:18:14 +00:00
// If there are any Jamos in the contraction, we should turn on special
// processing for Jamos
if ( UCOL_ISJAMO ( element - > prefix [ 0 ] ) ) {
t - > image - > jamoSpecial = TRUE ;
}
/* then we need to deal with it */
/* we could aready have something in table - or we might not */
if ( ! isPrefix ( CE ) ) {
/* if it wasn't contraction, we wouldn't end up here*/
int32_t firstContractionOffset = 0 ;
int32_t contractionOffset = 0 ;
firstContractionOffset = uprv_cnttab_addContraction ( contractions , UPRV_CNTTAB_NEWELEMENT , 0 , CE , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
2001-10-02 01:26:13 +00:00
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , * element - > prefix , newCE , status ) ;
2001-09-27 23:18:14 +00:00
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , 0xFFFF , CE , status ) ;
CE = constructContractCE ( SPEC_PROC_TAG , firstContractionOffset ) ;
} else { /* we are adding to existing contraction */
/* there were already some elements in the table, so we need to add a new contraction */
/* Two things can happen here: either the codepoint is already in the table, or it is not */
int32_t position = uprv_cnttab_findCP ( contractions , CE , * element - > prefix , status ) ;
if ( position > 0 ) { /* if it is we just continue down the chain */
uint32_t eCE = uprv_cnttab_getCE ( contractions , CE , position , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , eCE , status ) ;
uprv_cnttab_setContraction ( contractions , CE , position , * ( element - > prefix ) , newCE , status ) ;
} else { /* if it isn't, we will have to create a new sequence */
2001-11-20 17:35:46 +00:00
uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
2001-09-27 23:18:14 +00:00
uprv_cnttab_insertContraction ( contractions , CE , * ( element - > prefix ) , element - > mapCE , status ) ;
}
}
element - > cPoints = oldCP ;
element - > cSize = oldCPSize ;
return CE ;
}
2001-03-03 03:35:17 +00:00
2001-08-10 20:30:44 +00:00
// Note regarding surrogate handling: We are interested only in the single
// or leading surrogates in a contraction. If a surrogate is somewhere else
// in the contraction, it is going to be handled as a pair of code units,
// as it doesn't affect the performance AND handling surrogates specially
// would complicate code way too much.
uint32_t uprv_uca_addContraction ( tempUCATable * t , uint32_t CE ,
UCAElements * element , UErrorCode * status ) {
CntTable * contractions = t - > contractions ;
UChar32 cp ;
uint32_t cpsize = 0 ;
2001-09-27 23:18:14 +00:00
contractions - > currentTag = CONTRACTION_TAG ;
2001-08-10 20:30:44 +00:00
// First we need to check if contractions starts with a surrogate
UTF_NEXT_CHAR ( element - > cPoints , cpsize , element - > cSize , cp ) ;
if ( cpsize < element - > cSize ) { // This is a real contraction, if there are other characters after the first
uint32_t j = 0 ;
for ( j = 1 ; j < element - > cSize ; j + + ) { /* First add contraction chars to unsafe CP hash table */
2001-09-18 18:37:57 +00:00
// Unless it is a trail surrogate, which is handled algoritmically and
// shouldn't take up space in the table.
if ( ! ( UTF_IS_TRAIL ( element - > cPoints [ j ] ) ) ) {
2001-08-10 20:30:44 +00:00
unsafeCPSet ( t - > unsafeCP , element - > cPoints [ j ] ) ;
2001-09-18 18:37:57 +00:00
}
2001-08-10 20:30:44 +00:00
}
// Add the last char of the contraction to the contraction-end hash table.
2001-09-18 18:37:57 +00:00
// unless it is a trail surrogate, which is handled algorithmically and
// shouldn't be in the table
if ( ! ( UTF_IS_TRAIL ( element - > cPoints [ element - > cSize - 1 ] ) ) ) {
ContrEndCPSet ( t - > contrEndCP , element - > cPoints [ element - > cSize - 1 ] ) ;
}
// If there are any Jamos in the contraction, we should turn on special
// processing for Jamos
2001-08-10 20:30:44 +00:00
if ( UCOL_ISJAMO ( element - > cPoints [ 0 ] ) ) {
t - > image - > jamoSpecial = TRUE ;
}
/* then we need to deal with it */
/* we could aready have something in table - or we might not */
element - > cPoints + = cpsize ;
element - > cSize - = cpsize ;
if ( ! isContraction ( CE ) ) {
/* if it wasn't contraction, we wouldn't end up here*/
int32_t firstContractionOffset = 0 ;
int32_t contractionOffset = 0 ;
firstContractionOffset = uprv_cnttab_addContraction ( contractions , UPRV_CNTTAB_NEWELEMENT , 0 , CE , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , * element - > cPoints , newCE , status ) ;
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , 0xFFFF , CE , status ) ;
2001-09-27 23:18:14 +00:00
CE = constructContractCE ( CONTRACTION_TAG , firstContractionOffset ) ;
2001-08-10 20:30:44 +00:00
} else { /* we are adding to existing contraction */
/* there were already some elements in the table, so we need to add a new contraction */
/* Two things can happen here: either the codepoint is already in the table, or it is not */
int32_t position = uprv_cnttab_findCP ( contractions , CE , * element - > cPoints , status ) ;
if ( position > 0 ) { /* if it is we just continue down the chain */
uint32_t eCE = uprv_cnttab_getCE ( contractions , CE , position , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , eCE , status ) ;
uprv_cnttab_setContraction ( contractions , CE , position , * ( element - > cPoints ) , newCE , status ) ;
} else { /* if it isn't, we will have to create a new sequence */
uint32_t newCE = uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
uprv_cnttab_insertContraction ( contractions , CE , * ( element - > cPoints ) , newCE , status ) ;
}
}
element - > cPoints - = cpsize ;
element - > cSize + = cpsize ;
2001-12-19 07:00:45 +00:00
/*ucmpe32_set(t->mapping, cp, CE);*/
utrie_set32 ( t - > mapping , cp , CE ) ;
2001-08-10 20:30:44 +00:00
} else if ( ! isContraction ( CE ) ) { /* this is just a surrogate, and there is no contraction */
2001-12-19 07:00:45 +00:00
/*ucmpe32_set(t->mapping, cp, element->mapCE);*/
utrie_set32 ( t - > mapping , cp , element - > mapCE ) ;
2001-08-10 20:30:44 +00:00
} else { /* fill out the first stage of the contraction with the surrogate CE */
uprv_cnttab_changeContraction ( contractions , CE , 0 , element - > mapCE , status ) ;
uprv_cnttab_changeContraction ( contractions , CE , 0xFFFF , element - > mapCE , status ) ;
}
return CE ;
}
2001-05-10 22:12:53 +00:00
2001-08-10 20:30:44 +00:00
2001-10-02 01:26:13 +00:00
static uint32_t uprv_uca_processContraction ( CntTable * contractions , UCAElements * element , uint32_t existingCE , UErrorCode * status ) {
2001-08-10 20:30:44 +00:00
int32_t firstContractionOffset = 0 ;
int32_t contractionOffset = 0 ;
2001-08-18 00:31:46 +00:00
// uint32_t contractionElement = UCOL_NOT_FOUND;
2001-08-10 20:30:44 +00:00
if ( U_FAILURE ( * status ) ) {
return UCOL_NOT_FOUND ;
2001-04-06 22:53:06 +00:00
}
2001-08-10 20:30:44 +00:00
/* end of recursion */
if ( element - > cSize = = 1 ) {
2001-10-02 01:26:13 +00:00
if ( isCntTableElement ( existingCE ) & & ( ( UColCETags ) getCETag ( existingCE ) = = contractions - > currentTag ) ) {
2001-08-10 20:30:44 +00:00
uprv_cnttab_changeContraction ( contractions , existingCE , 0 , element - > mapCE , status ) ;
uprv_cnttab_changeContraction ( contractions , existingCE , 0xFFFF , element - > mapCE , status ) ;
return existingCE ;
} else {
return element - > mapCE ; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */
}
2001-04-06 22:53:06 +00:00
}
2001-05-16 18:11:32 +00:00
2001-08-10 20:30:44 +00:00
/* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
/* for both backward and forward cycles */
/* we encountered either an empty space or a non-contraction element */
/* this means we are constructing a new contraction sequence */
element - > cPoints + + ;
element - > cSize - - ;
2001-09-27 23:18:14 +00:00
if ( ! isCntTableElement ( existingCE ) ) {
2001-08-10 20:30:44 +00:00
/* if it wasn't contraction, we wouldn't end up here*/
firstContractionOffset = uprv_cnttab_addContraction ( contractions , UPRV_CNTTAB_NEWELEMENT , 0 , existingCE , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , * element - > cPoints , newCE , status ) ;
contractionOffset = uprv_cnttab_addContraction ( contractions , firstContractionOffset , 0xFFFF , existingCE , status ) ;
2001-09-27 23:18:14 +00:00
existingCE = constructContractCE ( contractions - > currentTag , firstContractionOffset ) ;
2001-08-10 20:30:44 +00:00
} else { /* we are adding to existing contraction */
/* there were already some elements in the table, so we need to add a new contraction */
/* Two things can happen here: either the codepoint is already in the table, or it is not */
int32_t position = uprv_cnttab_findCP ( contractions , existingCE , * element - > cPoints , status ) ;
if ( position > 0 ) { /* if it is we just continue down the chain */
uint32_t eCE = uprv_cnttab_getCE ( contractions , existingCE , position , status ) ;
uint32_t newCE = uprv_uca_processContraction ( contractions , element , eCE , status ) ;
uprv_cnttab_setContraction ( contractions , existingCE , position , * ( element - > cPoints ) , newCE , status ) ;
} else { /* if it isn't, we will have to create a new sequence */
uint32_t newCE = uprv_uca_processContraction ( contractions , element , UCOL_NOT_FOUND , status ) ;
uprv_cnttab_insertContraction ( contractions , existingCE , * ( element - > cPoints ) , newCE , status ) ;
}
}
element - > cPoints - - ;
element - > cSize + + ;
return existingCE ;
2001-04-06 22:53:06 +00:00
}
2001-03-03 03:35:17 +00:00
2001-10-12 23:15:25 +00:00
static uint32_t uprv_uca_finalizeAddition ( tempUCATable * t , UCAElements * element , UErrorCode * status ) {
2001-10-12 21:03:36 +00:00
uint32_t CE = UCOL_NOT_FOUND ;
2002-07-02 22:32:14 +00:00
// This should add a completely ignorable element to the
// unsafe table, so that backward iteration will skip
// over it when treating contractions.
uint32_t i = 0 ;
if ( element - > mapCE = = 0 ) {
for ( i = 0 ; i < element - > cSize ; i + + ) {
if ( ! UTF_IS_TRAIL ( element - > cPoints [ i ] ) ) {
unsafeCPSet ( t - > unsafeCP , element - > cPoints [ i ] ) ;
}
}
}
2001-10-12 21:03:36 +00:00
if ( element - > cSize > 1 ) { /* we're adding a contraction */
uint32_t i = 0 ;
UChar32 cp ;
UTF_NEXT_CHAR ( element - > cPoints , i , element - > cSize , cp ) ;
2001-12-19 07:00:45 +00:00
/*CE = ucmpe32_get(t->mapping, cp);*/
CE = utrie_get32 ( t - > mapping , cp , NULL ) ;
2001-10-12 21:03:36 +00:00
CE = uprv_uca_addContraction ( t , CE , element , status ) ;
} else { /* easy case, */
2001-12-19 07:00:45 +00:00
/*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/
CE = utrie_get32 ( t - > mapping , element - > cPoints [ 0 ] , NULL ) ;
2001-10-12 21:03:36 +00:00
if ( CE ! = UCOL_NOT_FOUND ) {
2001-10-26 23:23:25 +00:00
if ( isCntTableElement ( CE ) /*isContraction(CE)*/ ) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
if ( ! isPrefix ( element - > mapCE ) ) { // we cannot reenter prefix elements - as we are going to create a dead loop
// Only expansions and regular CEs can go here... Contractions will never happen in this place
2001-10-12 21:03:36 +00:00
uprv_cnttab_setContraction ( t - > contractions , CE , 0 , 0 , element - > mapCE , status ) ;
/* This loop has to change the CE at the end of contraction REDO!*/
uprv_cnttab_changeLastCE ( t - > contractions , CE , element - > mapCE , status ) ;
2001-10-26 23:23:25 +00:00
}
} else {
2001-12-19 07:00:45 +00:00
/*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
utrie_set32 ( t - > mapping , element - > cPoints [ 0 ] , element - > mapCE ) ;
2001-10-12 21:03:36 +00:00
# ifdef UCOL_DEBUG
2001-10-26 23:23:25 +00:00
fprintf ( stderr , " Warning - trying to overwrite existing data %08X for cp %04X with %08X \n " , CE , element - > cPoints [ 0 ] , element - > CEs [ 0 ] ) ;
//*status = U_ILLEGAL_ARGUMENT_ERROR;
2001-10-12 21:03:36 +00:00
# endif
2001-10-26 23:23:25 +00:00
}
2001-10-12 21:03:36 +00:00
} else {
2001-12-19 07:00:45 +00:00
/*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
utrie_set32 ( t - > mapping , element - > cPoints [ 0 ] , element - > mapCE ) ;
2001-10-12 21:03:36 +00:00
}
}
return CE ;
}
2001-02-26 10:28:56 +00:00
/* This adds a read element, while testing for existence */
2001-11-21 01:08:55 +00:00
U_CAPI uint32_t U_EXPORT2
uprv_uca_addAnElement ( tempUCATable * t , UCAElements * element , UErrorCode * status ) {
2001-02-26 10:28:56 +00:00
ExpansionTable * expansions = t - > expansions ;
2001-03-22 21:16:20 +00:00
uint32_t i = 1 ;
2001-02-26 10:28:56 +00:00
uint32_t expansion = 0 ;
uint32_t CE ;
if ( U_FAILURE ( * status ) ) {
return 0xFFFF ;
}
if ( element - > noOfCEs = = 1 ) {
if ( element - > isThai = = FALSE ) {
element - > mapCE = element - > CEs [ 0 ] ;
} else { /* add thai - totally bad here */
2001-08-28 18:53:23 +00:00
expansion = ( uint32_t ) ( UCOL_SPECIAL_FLAG | ( THAI_TAG < < UCOL_TAG_SHIFT )
2001-03-30 00:23:46 +00:00
| ( ( uprv_uca_addExpansion ( expansions , element - > CEs [ 0 ] , status ) + ( headersize > > 2 ) ) < < 4 )
2001-08-28 18:53:23 +00:00
| 0x1 ) ;
2001-02-26 10:28:56 +00:00
element - > mapCE = expansion ;
}
} else {
2002-01-08 22:16:56 +00:00
/* ICU 2.1 long primaries */
/* unfortunately, it looks like we have to look for a long primary here */
/* since in canonical closure we are going to hit some long primaries from */
/* the first phase, and they will come back as continuations/expansions */
/* destroying the effect of the previous opitimization */
/* A long primary is a three byte primary with starting secondaries and tertiaries */
/* It can appear in long runs of only primary differences (like east Asian tailorings) */
/* also, it should not be an expansion, as expansions would break with this */
// This part came in from ucol_bld.cpp
//if(tok->expansion == 0
//&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1
//&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) {
/* we will construct a special CE that will go unchanged to the table */
if ( element - > noOfCEs = = 2 // a two CE expansion
& & isContinuation ( element - > CEs [ 1 ] ) // which is a continuation
& & ( element - > CEs [ 1 ] & ( ~ ( 0xFF < < 24 | UCOL_CONTINUATION_MARKER ) ) ) = = 0 // that has only primaries in continuation,
& & ( ( ( element - > CEs [ 0 ] > > 8 ) & 0xFF ) = = UCOL_BYTE_COMMON ) // a common secondary
& & ( ( element - > CEs [ 0 ] & 0xFF ) = = UCOL_BYTE_COMMON ) // and a common tertiary
) {
# ifdef UCOL_DEBUG
fprintf ( stdout , " Long primary %04X \n " , element - > cPoints [ 0 ] ) ;
# endif
element - > mapCE = UCOL_SPECIAL_FLAG | ( LONG_PRIMARY_TAG < < 24 ) // a long primary special
| ( ( element - > CEs [ 0 ] > > 8 ) & 0xFFFF00 ) // first and second byte of primary
| ( ( element - > CEs [ 1 ] > > 24 ) & 0xFF ) ; // third byte of primary
2001-02-26 10:28:56 +00:00
} else {
2002-01-08 22:16:56 +00:00
expansion = ( uint32_t ) ( UCOL_SPECIAL_FLAG | ( EXPANSION_TAG < < UCOL_TAG_SHIFT )
| ( ( uprv_uca_addExpansion ( expansions , element - > CEs [ 0 ] , status ) + ( headersize > > 2 ) ) < < 4 )
& 0xFFFFF0 ) ;
for ( i = 1 ; i < element - > noOfCEs ; i + + ) {
uprv_uca_addExpansion ( expansions , element - > CEs [ i ] , status ) ;
}
if ( element - > noOfCEs < = 0xF ) {
expansion | = element - > noOfCEs ;
} else {
uprv_uca_addExpansion ( expansions , 0 , status ) ;
}
element - > mapCE = expansion ;
uprv_uca_setMaxExpansion ( element - > CEs [ element - > noOfCEs - 1 ] ,
2001-06-12 15:53:34 +00:00
( uint8_t ) element - > noOfCEs ,
2002-01-08 22:16:56 +00:00
t - > maxExpansions ,
2001-06-12 15:53:34 +00:00
status ) ;
2002-01-08 22:16:56 +00:00
if ( UCOL_ISJAMO ( element - > cPoints [ 0 ] ) ) {
t - > image - > jamoSpecial = TRUE ;
uprv_uca_setMaxJamoExpansion ( element - > cPoints [ 0 ] ,
element - > CEs [ element - > noOfCEs - 1 ] ,
( uint8_t ) element - > noOfCEs ,
t - > maxJamoExpansions ,
status ) ;
}
2001-03-08 00:58:36 +00:00
}
2001-02-26 10:28:56 +00:00
}
2001-09-27 23:18:14 +00:00
// here we want to add the prefix structure.
// I will try to process it as a reverse contraction, if possible.
// prefix buffer is already reversed.
if ( element - > prefixSize ! = 0 ) {
2001-10-12 19:46:10 +00:00
// We keep the seen prefix starter elements in a hashtable
// we need it to be able to distinguish between the simple
// codepoints and prefix starters. Also, we need to use it
// for canonical closure.
2001-10-12 21:03:36 +00:00
UCAElements * composed = ( UCAElements * ) uprv_malloc ( sizeof ( UCAElements ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( composed = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-10-12 21:03:36 +00:00
uprv_memcpy ( composed , element , sizeof ( UCAElements ) ) ;
composed - > cPoints = composed - > uchars ;
composed - > prefix = composed - > prefixChars ;
composed - > prefixSize = unorm_normalize ( element - > prefix , element - > prefixSize , UNORM_NFC , 0 , composed - > prefix , 128 , status ) ;
2001-10-02 01:26:13 +00:00
if ( t - > prefixLookup ! = NULL ) {
UCAElements * uCE = ( UCAElements * ) uhash_get ( t - > prefixLookup , element ) ;
if ( uCE ! = NULL ) { // there is already a set of code points here
element - > mapCE = uprv_uca_addPrefix ( t , uCE - > mapCE , element , status ) ;
} else { // no code points, so this spot is clean
element - > mapCE = uprv_uca_addPrefix ( t , UCOL_NOT_FOUND , element , status ) ;
uCE = ( UCAElements * ) uprv_malloc ( sizeof ( UCAElements ) ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( uCE = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
2001-10-02 01:26:13 +00:00
uprv_memcpy ( uCE , element , sizeof ( UCAElements ) ) ;
2001-10-05 02:08:25 +00:00
uCE - > cPoints = uCE - > uchars ;
2001-10-02 01:26:13 +00:00
uhash_put ( t - > prefixLookup , uCE , uCE , status ) ;
}
2001-10-12 21:03:36 +00:00
if ( composed - > prefixSize ! = element - > prefixSize | | uprv_memcmp ( composed - > prefix , element - > prefix , element - > prefixSize ) ) {
// do it!
composed - > mapCE = uprv_uca_addPrefix ( t , element - > mapCE , composed , status ) ;
}
2001-03-08 00:58:36 +00:00
}
2001-04-06 22:53:06 +00:00
uprv_free ( composed ) ;
2001-02-26 10:28:56 +00:00
}
2002-03-12 20:20:08 +00:00
// We need to use the canonical iterator here
// the way we do it is to generate the canonically equivalent strings
// for the contraction and then add the sequences that pass FCD check
2002-01-08 22:16:56 +00:00
if ( element - > cSize > 1 & & ! ( element - > cSize = = 2 & & UTF16_IS_LEAD ( element - > cPoints [ 0 ] ) & & UTF16_IS_TRAIL ( element - > cPoints [ 1 ] ) ) ) { // this is a contraction, we should check whether a composed form should also be included
2002-03-12 20:20:08 +00:00
UnicodeString source ( element - > cPoints , element - > cSize ) ;
CanonicalIterator it ( source , * status ) ;
source = it . next ( ) ;
2002-09-17 05:06:04 +00:00
while ( ! source . isBogus ( ) ) {
2002-03-12 20:20:08 +00:00
if ( Normalizer : : quickCheck ( source , UNORM_FCD , * status ) ! = UNORM_NO ) {
element - > cSize = source . extract ( element - > cPoints , 128 , * status ) ;
uprv_uca_finalizeAddition ( t , element , status ) ;
}
source = it . next ( ) ;
}
2002-04-02 02:55:31 +00:00
CE = element - > mapCE ;
2002-03-12 20:20:08 +00:00
} else {
CE = uprv_uca_finalizeAddition ( t , element , status ) ;
2001-10-25 20:37:02 +00:00
}
2001-02-26 10:28:56 +00:00
return CE ;
}
2001-12-19 07:00:45 +00:00
/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */
void uprv_uca_getMaxExpansionJamo ( UNewTrie * mapping ,
2001-06-12 15:53:34 +00:00
MaxExpansionTable * maxexpansion ,
MaxJamoExpansionTable * maxjamoexpansion ,
UBool jamospecial ,
UErrorCode * status )
2001-03-03 04:06:43 +00:00
{
const uint32_t VBASE = 0x1161 ;
2001-06-12 15:53:34 +00:00
const uint32_t TBASE = 0x11A8 ;
2001-03-03 04:06:43 +00:00
const uint32_t VCOUNT = 21 ;
const uint32_t TCOUNT = 28 ;
2001-06-12 01:39:24 +00:00
2001-03-03 04:06:43 +00:00
uint32_t v = VBASE + VCOUNT - 1 ;
uint32_t t = TBASE + TCOUNT - 1 ;
uint32_t ce ;
2001-06-12 15:53:34 +00:00
while ( v > = VBASE ) {
2001-12-19 07:00:45 +00:00
/*ce = ucmpe32_get(mapping, v);*/
ce = utrie_get32 ( mapping , v , NULL ) ;
2001-06-12 15:53:34 +00:00
if ( ce < UCOL_SPECIAL_FLAG ) {
uprv_uca_setMaxExpansion ( ce , 2 , maxexpansion , status ) ;
2001-06-12 00:00:14 +00:00
}
2001-06-12 15:53:34 +00:00
v - - ;
}
2001-06-12 00:00:14 +00:00
2001-06-12 15:53:34 +00:00
while ( t > = TBASE )
{
2001-12-19 07:00:45 +00:00
/*ce = ucmpe32_get(mapping, t);*/
ce = utrie_get32 ( mapping , t , NULL ) ;
2001-06-12 15:53:34 +00:00
if ( ce < UCOL_SPECIAL_FLAG ) {
uprv_uca_setMaxExpansion ( ce , 3 , maxexpansion , status ) ;
2001-06-12 00:00:14 +00:00
}
2001-06-12 15:53:34 +00:00
t - - ;
2001-03-03 04:06:43 +00:00
}
2001-06-12 15:53:34 +00:00
/* According to the docs, 99% of the time, the Jamo will not be special */
if ( jamospecial ) {
/* gets the max expansion in all unicode characters */
int count = maxjamoexpansion - > position ;
2001-08-28 18:53:23 +00:00
uint8_t maxTSize = ( uint8_t ) ( maxjamoexpansion - > maxLSize +
maxjamoexpansion - > maxVSize +
maxjamoexpansion - > maxTSize ) ;
uint8_t maxVSize = ( uint8_t ) ( maxjamoexpansion - > maxLSize +
maxjamoexpansion - > maxVSize ) ;
2001-06-12 15:53:34 +00:00
while ( count > 0 ) {
count - - ;
if ( * ( maxjamoexpansion - > isV + count ) = = TRUE ) {
uprv_uca_setMaxExpansion (
* ( maxjamoexpansion - > endExpansionCE + count ) ,
maxVSize , maxexpansion , status ) ;
}
else {
uprv_uca_setMaxExpansion (
* ( maxjamoexpansion - > endExpansionCE + count ) ,
maxTSize , maxexpansion , status ) ;
}
2001-06-12 00:00:14 +00:00
}
2001-03-03 04:06:43 +00:00
}
}
2001-03-03 03:35:17 +00:00
2002-02-19 23:17:20 +00:00
U_CDECL_BEGIN
static inline uint32_t U_CALLCONV
getFoldedValue ( UNewTrie * trie , UChar32 start , int32_t offset )
{
2001-12-19 07:00:45 +00:00
uint32_t value ;
uint32_t tag ;
UChar32 limit ;
UBool inBlockZero ;
limit = start + 0x400 ;
while ( start < limit ) {
value = utrie_get32 ( trie , start , & inBlockZero ) ;
tag = getCETag ( value ) ;
if ( inBlockZero = = TRUE ) {
start + = UTRIE_DATA_BLOCK_LENGTH ;
2002-06-13 18:31:34 +00:00
} else if ( ! ( isSpecial ( value ) & & ( tag = = IMPLICIT_TAG | | tag = = NOT_FOUND_TAG ) ) ) {
/* These are values that are starting in either UCA (IMPLICIT_TAG) or in the
* tailorings ( NOT_FOUND_TAG ) . Presence of these tags means that there is
* nothing in this position and that it should be skipped .
*/
2002-01-08 22:16:56 +00:00
# ifdef UCOL_DEBUG
2002-02-20 18:13:29 +00:00
static int32_t count = 1 ;
2002-01-08 22:16:56 +00:00
fprintf ( stdout , " %i, Folded %08X, value %08X \n " , count + + , start , value ) ;
# endif
2001-12-19 07:00:45 +00:00
return ( uint32_t ) ( UCOL_SPECIAL_FLAG | ( SURROGATE_TAG < < 24 ) | offset ) ;
} else {
+ + start ;
}
}
return 0 ;
}
2002-02-19 23:17:20 +00:00
U_CDECL_END
2002-01-08 22:16:56 +00:00
# ifdef UCOL_DEBUG
// This is a debug function to print the contents of a trie.
// It is used in conjuction with the code around utrie_unserialize call
void enumRange ( const void * context , UChar32 start , UChar32 limit , uint32_t value ) {
if ( start < 0x10000 ) {
fprintf ( stdout , " %08X, %08X, %08X \n " , start , limit , value ) ;
} else {
fprintf ( stdout , " %08X=%04X %04X, %08X=%04X %04X, %08X \n " , start , UTF16_LEAD ( start ) , UTF16_TRAIL ( start ) , limit , UTF16_LEAD ( limit ) , UTF16_TRAIL ( limit ) , value ) ;
}
}
int32_t
myGetFoldingOffset ( uint32_t data ) {
if ( data > UCOL_NOT_FOUND & & getCETag ( data ) = = SURROGATE_TAG ) {
return ( data & 0xFFFFFF ) ;
} else {
return 0 ;
}
}
# endif
2001-11-21 01:08:55 +00:00
U_CAPI UCATableHeader * U_EXPORT2
uprv_uca_assembleTable ( tempUCATable * t , UErrorCode * status ) {
2001-12-19 07:00:45 +00:00
/*CompactEIntArray *mapping = t->mapping;*/
UNewTrie * mapping = t - > mapping ;
2001-02-26 10:28:56 +00:00
ExpansionTable * expansions = t - > expansions ;
2001-08-28 18:53:23 +00:00
CntTable * contractions = t - > contractions ;
2001-03-02 01:14:03 +00:00
MaxExpansionTable * maxexpansion = t - > maxExpansions ;
2001-02-26 10:28:56 +00:00
if ( U_FAILURE ( * status ) ) {
return NULL ;
}
2001-08-28 18:53:23 +00:00
uint32_t beforeContractions = ( uint32_t ) ( ( headersize + paddedsize ( expansions - > position * sizeof ( uint32_t ) ) ) / sizeof ( UChar ) ) ;
2001-02-26 10:28:56 +00:00
int32_t contractionsSize = 0 ;
2001-03-08 21:01:34 +00:00
contractionsSize = uprv_cnttab_constructTable ( contractions , beforeContractions , status ) ;
2001-02-26 10:28:56 +00:00
2001-12-19 07:00:45 +00:00
/* the following operation depends on the trie data. Therefore, we have to do it before */
/* the trie is compacted */
2001-06-12 00:00:14 +00:00
/* sets jamo expansions */
2001-06-12 15:53:34 +00:00
uprv_uca_getMaxExpansionJamo ( mapping , maxexpansion , t - > maxJamoExpansions ,
t - > image - > jamoSpecial , status ) ;
2001-03-03 04:06:43 +00:00
2001-12-19 07:00:45 +00:00
/*ucmpe32_compact(mapping);*/
/*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/
/*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/
/*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/
// After setting the jamo expansions, compact the trie and get the needed size
int32_t mappingSize = utrie_serialize ( mapping , NULL , 0 , getFoldedValue /*getFoldedValue*/ , FALSE , status ) ;
2001-02-26 10:28:56 +00:00
uint32_t tableOffset = 0 ;
uint8_t * dataStart ;
2001-12-19 07:00:45 +00:00
/* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */
2001-08-28 18:53:23 +00:00
uint32_t toAllocate = ( uint32_t ) ( headersize +
2001-03-03 03:35:17 +00:00
paddedsize ( expansions - > position * sizeof ( uint32_t ) ) +
paddedsize ( mappingSize ) +
paddedsize ( contractionsSize * ( sizeof ( UChar ) + sizeof ( uint32_t ) ) ) +
2001-12-28 20:58:58 +00:00
//paddedsize(0x100*sizeof(uint32_t)) /* Latin1 is now included in the trie */
2001-03-02 01:14:03 +00:00
/* maxexpansion array */
+ paddedsize ( maxexpansion - > position * sizeof ( uint32_t ) ) +
/* maxexpansion size array */
2001-03-03 03:35:17 +00:00
paddedsize ( maxexpansion - > position * sizeof ( uint8_t ) ) +
2001-05-10 22:12:53 +00:00
paddedsize ( UCOL_UNSAFECP_TABLE_SIZE ) + /* Unsafe chars */
2001-08-28 18:53:23 +00:00
paddedsize ( UCOL_UNSAFECP_TABLE_SIZE ) ) ; /* Contraction Ending chars */
2001-05-10 22:12:53 +00:00
2001-02-26 10:28:56 +00:00
2002-02-28 01:42:40 +00:00
dataStart = ( uint8_t * ) uprv_malloc ( toAllocate ) ;
2002-07-02 15:10:30 +00:00
/* test for NULL */
2002-06-29 09:31:05 +00:00
if ( dataStart = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
2001-02-26 10:28:56 +00:00
UCATableHeader * myData = ( UCATableHeader * ) dataStart ;
2001-03-08 21:01:34 +00:00
uprv_memcpy ( myData , t - > image , sizeof ( UCATableHeader ) ) ;
myData - > contractionSize = contractionsSize ;
2001-02-26 10:28:56 +00:00
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( sizeof ( UCATableHeader ) ) ) ;
2001-02-26 10:28:56 +00:00
2001-03-30 00:23:46 +00:00
myData - > options = tableOffset ;
2002-02-28 01:42:40 +00:00
uprv_memcpy ( dataStart + tableOffset , t - > options , sizeof ( UColOptionSet ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( sizeof ( UColOptionSet ) ) ) ;
2001-03-30 00:23:46 +00:00
2001-02-26 10:28:56 +00:00
/* copy expansions */
/*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
myData - > expansion = tableOffset ;
memcpy ( dataStart + tableOffset , expansions - > CEs , expansions - > position * sizeof ( uint32_t ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( expansions - > position * sizeof ( uint32_t ) ) ) ;
2001-02-26 10:28:56 +00:00
/* contractions block */
2001-03-15 23:07:38 +00:00
if ( contractionsSize ! = 0 ) {
/* copy contraction index */
/*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
myData - > contractionIndex = tableOffset ;
memcpy ( dataStart + tableOffset , contractions - > codePoints , contractionsSize * sizeof ( UChar ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( contractionsSize * sizeof ( UChar ) ) ) ;
2001-03-15 23:07:38 +00:00
/* copy contraction collation elements */
/*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
myData - > contractionCEs = tableOffset ;
memcpy ( dataStart + tableOffset , contractions - > CEs , contractionsSize * sizeof ( uint32_t ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( contractionsSize * sizeof ( uint32_t ) ) ) ;
2001-03-15 23:07:38 +00:00
} else {
myData - > contractionIndex = 0 ;
myData - > contractionIndex = 0 ;
}
2001-02-26 10:28:56 +00:00
/* copy mapping table */
/*myData->mappingPosition = dataStart+tableOffset;*/
2001-12-19 07:00:45 +00:00
/*myData->mappingPosition = tableOffset;*/
/*memcpy(dataStart+tableOffset, flattened, mappingSize);*/
2001-02-26 10:28:56 +00:00
myData - > mappingPosition = tableOffset ;
2001-12-19 07:00:45 +00:00
utrie_serialize ( mapping , dataStart + tableOffset , toAllocate - tableOffset , getFoldedValue , FALSE , status ) ;
2002-01-08 22:16:56 +00:00
# ifdef UCOL_DEBUG
// This is debug code to dump the contents of the trie. It needs two functions defined above
{
UTrie UCAt = { 0 } ;
utrie_unserialize ( & UCAt , dataStart + tableOffset , 9999999 , status ) ;
UCAt . getFoldingOffset = myGetFoldingOffset ;
if ( U_SUCCESS ( * status ) ) {
utrie_enum ( & UCAt , NULL , enumRange , NULL ) ;
}
}
# endif
2001-02-26 10:28:56 +00:00
tableOffset + = paddedsize ( mappingSize ) ;
2002-01-08 22:16:56 +00:00
2001-12-28 20:58:58 +00:00
int32_t i = 0 ;
2001-02-26 10:28:56 +00:00
2001-03-02 01:14:03 +00:00
/* copy max expansion table */
myData - > endExpansionCE = tableOffset ;
myData - > endExpansionCECount = maxexpansion - > position ;
/* not copying the first element which is a dummy */
uprv_memcpy ( dataStart + tableOffset , maxexpansion - > endExpansionCE + 1 ,
maxexpansion - > position * sizeof ( uint32_t ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( maxexpansion - > position * sizeof ( uint32_t ) ) ) ;
2001-03-02 01:14:03 +00:00
myData - > expansionCESize = tableOffset ;
uprv_memcpy ( dataStart + tableOffset , maxexpansion - > expansionCESize + 1 ,
maxexpansion - > position * sizeof ( uint8_t ) ) ;
2001-08-28 18:53:23 +00:00
tableOffset + = ( uint32_t ) ( paddedsize ( maxexpansion - > position * sizeof ( uint8_t ) ) ) ;
2001-03-02 01:14:03 +00:00
2001-03-03 03:35:17 +00:00
/* Unsafe chars table. Finish it off, then copy it. */
2001-10-29 17:56:00 +00:00
uprv_uca_unsafeCPAddCCNZ ( t , status ) ;
2001-05-11 01:13:08 +00:00
if ( t - > UCA ! = 0 ) { /* Or in unsafebits from UCA, making a combined table. */
for ( i = 0 ; i < UCOL_UNSAFECP_TABLE_SIZE ; i + + ) {
t - > unsafeCP [ i ] | = t - > UCA - > unsafeCP [ i ] ;
}
}
2001-03-03 03:35:17 +00:00
myData - > unsafeCP = tableOffset ;
uprv_memcpy ( dataStart + tableOffset , t - > unsafeCP , UCOL_UNSAFECP_TABLE_SIZE ) ;
tableOffset + = paddedsize ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2001-05-11 01:13:08 +00:00
/* Finish building Contraction Ending chars hash table and then copy it out. */
if ( t - > UCA ! = 0 ) { /* Or in unsafebits from UCA, making a combined table. */
for ( i = 0 ; i < UCOL_UNSAFECP_TABLE_SIZE ; i + + ) {
t - > contrEndCP [ i ] | = t - > UCA - > contrEndCP [ i ] ;
}
}
2001-05-10 22:12:53 +00:00
myData - > contrEndCP = tableOffset ;
uprv_memcpy ( dataStart + tableOffset , t - > contrEndCP , UCOL_UNSAFECP_TABLE_SIZE ) ;
tableOffset + = paddedsize ( UCOL_UNSAFECP_TABLE_SIZE ) ;
2001-02-26 10:28:56 +00:00
if ( tableOffset ! = toAllocate ) {
2001-05-16 17:09:31 +00:00
# ifdef UCOL_DEBUG
2001-02-26 10:28:56 +00:00
fprintf ( stderr , " calculation screwup!!! Expected to write %i but wrote %i instead!!! \n " , toAllocate , tableOffset ) ;
2001-05-16 17:09:31 +00:00
# endif
2001-02-26 10:28:56 +00:00
* status = U_INTERNAL_PROGRAM_ERROR ;
2002-02-28 01:42:40 +00:00
uprv_free ( dataStart ) ;
2001-02-26 10:28:56 +00:00
return 0 ;
}
myData - > size = tableOffset ;
/* This should happen upon ressurection */
2001-02-28 19:01:23 +00:00
/*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/
2001-12-19 07:00:45 +00:00
/*uprv_mstrm_close(ms);*/
2001-02-26 10:28:56 +00:00
return myData ;
}
2002-06-13 18:31:34 +00:00
struct enumStruct {
tempUCATable * t ;
UCollator * tempColl ;
UCollationElements * colEl ;
2002-07-13 05:24:35 +00:00
int32_t noOfClosures ;
2002-06-13 18:31:34 +00:00
UErrorCode * status ;
} ;
U_CDECL_BEGIN
static UBool U_CALLCONV
_enumCategoryRangeClosureCategory ( const void * context , UChar32 start , UChar32 limit , UCharCategory type ) {
UErrorCode * status = ( ( enumStruct * ) context ) - > status ;
tempUCATable * t = ( ( enumStruct * ) context ) - > t ;
UCollator * tempColl = ( ( enumStruct * ) context ) - > tempColl ;
UCollationElements * colEl = ( ( enumStruct * ) context ) - > colEl ;
UCAElements el ;
UChar decomp [ 256 ] = { 0 } ;
2002-07-13 05:24:35 +00:00
int32_t noOfDec = 0 ;
2002-06-13 18:31:34 +00:00
UChar32 u32 = 0 ;
UChar comp [ 2 ] ;
uint32_t len = 0 ;
2002-07-13 05:24:35 +00:00
if ( type ! = U_UNASSIGNED & & type ! = U_PRIVATE_USE_CHAR ) { // if the range is assigned - we might ommit more categories later
2002-06-13 18:31:34 +00:00
for ( u32 = start ; u32 < limit ; u32 + + ) {
2002-07-13 05:24:35 +00:00
noOfDec = unorm_getDecomposition ( u32 , FALSE , decomp , 256 ) ;
//if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
//|| (noOfDec == 1 && *decomp != (UChar)u32))
if ( noOfDec > 0 ) // if we're positive, that means there is no decomposition
2002-06-13 18:31:34 +00:00
{
2002-07-13 05:24:35 +00:00
len = 0 ;
UTF_APPEND_CHAR_UNSAFE ( comp , len , u32 ) ;
2002-06-13 18:31:34 +00:00
if ( ucol_strcoll ( tempColl , comp , len , decomp , noOfDec ) ! = UCOL_EQUAL ) {
2002-07-13 05:24:35 +00:00
# ifdef UCOL_DEBUG
fprintf ( stderr , " Closure: %08X -> " , u32 ) ;
uint32_t i = 0 ;
for ( i = 0 ; i < noOfDec ; i + + ) {
fprintf ( stderr , " %04X " , decomp [ i ] ) ;
}
fprintf ( stderr , " \n " ) ;
# endif
( ( enumStruct * ) context ) - > noOfClosures + + ;
2002-06-13 18:31:34 +00:00
el . cPoints = decomp ;
el . cSize = noOfDec ;
el . noOfCEs = 0 ;
el . prefix = el . prefixChars ;
el . prefixSize = 0 ;
UCAElements * prefix = ( UCAElements * ) uhash_get ( t - > prefixLookup , & el ) ;
if ( prefix = = NULL ) {
el . cPoints = comp ;
el . cSize = len ;
el . prefix = el . prefixChars ;
el . prefixSize = 0 ;
el . noOfCEs = 0 ;
ucol_setText ( colEl , decomp , noOfDec , status ) ;
while ( ( el . CEs [ el . noOfCEs ] = ucol_next ( colEl , status ) ) ! = UCOL_NULLORDER ) {
el . noOfCEs + + ;
}
} else {
el . cPoints = comp ;
el . cSize = len ;
el . prefix = el . prefixChars ;
el . prefixSize = 0 ;
el . noOfCEs = 1 ;
el . CEs [ 0 ] = prefix - > mapCE ;
// This character uses a prefix. We have to add it
// to the unsafe table, as it decomposed form is already
// in. In Japanese, this happens for \u309e & \u30fe
// Since unsafeCPSet is static in ucol_elm, we are going
// to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
}
2002-07-09 23:57:45 +00:00
if ( UCOL_ISTHAIPREVOWEL ( el . cPoints [ 0 ] ) ) {
el . isThai = TRUE ;
} else {
el . isThai = FALSE ;
}
2002-06-13 18:31:34 +00:00
uprv_uca_addAnElement ( t , & el , status ) ;
}
}
}
}
return TRUE ;
}
U_CDECL_END
2002-07-13 05:24:35 +00:00
U_CAPI int32_t U_EXPORT2
2002-06-13 18:31:34 +00:00
uprv_uca_canonicalClosure ( tempUCATable * t , UErrorCode * status )
{
2002-07-13 05:24:35 +00:00
enumStruct context ;
context . noOfClosures = 0 ;
2002-06-13 18:31:34 +00:00
if ( U_SUCCESS ( * status ) ) {
UCollator * tempColl = NULL ;
2002-08-01 23:09:41 +00:00
tempUCATable * tempTable = uprv_uca_cloneTempTable ( t , status ) ;
2002-06-13 18:31:34 +00:00
2002-08-01 23:09:41 +00:00
UCATableHeader * tempData = uprv_uca_assembleTable ( tempTable , status ) ;
tempColl = ucol_initCollator ( tempData , 0 , status ) ;
uprv_uca_closeTempTable ( tempTable ) ;
2002-06-13 18:31:34 +00:00
2002-08-01 23:09:41 +00:00
if ( U_SUCCESS ( * status ) ) {
tempColl - > rb = NULL ;
tempColl - > binary = NULL ;
tempColl - > requestedLocale = NULL ;
tempColl - > hasRealData = TRUE ;
} else if ( tempData ! = 0 ) {
uprv_free ( tempData ) ;
2002-06-13 18:31:34 +00:00
}
/* produce canonical closure */
UCollationElements * colEl = ucol_openElements ( tempColl , NULL , 0 , status ) ;
context . t = t ;
context . tempColl = tempColl ;
context . colEl = colEl ;
context . status = status ;
u_enumCharTypes ( _enumCategoryRangeClosureCategory , & context ) ;
ucol_closeElements ( colEl ) ;
ucol_close ( tempColl ) ;
}
2002-07-13 05:24:35 +00:00
return context . noOfClosures ;
2002-06-13 18:31:34 +00:00
}
2001-10-20 01:09:31 +00:00
U_NAMESPACE_END
2002-09-20 01:54:48 +00:00
# endif /* #if !UCONFIG_NO_COLLATION */