1999-11-23 02:17:43 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
1999-12-13 22:28:37 +00:00
*
2008-02-13 09:35:50 +00:00
* Copyright ( C ) 1999 - 2008 , International Business Machines
1999-12-13 22:28:37 +00:00
* Corporation and others . All Rights Reserved .
*
1999-11-23 02:17:43 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* file name : gencnval . c
* encoding : US - ASCII
* tab size : 8 ( not used )
* indentation : 4
*
* created on : 1999 nov05
* created by : Markus W . Scherer
*
* This program reads convrtrs . txt and writes a memory - mappable
* converter name alias table to cnvalias . dat .
2000-08-10 01:31:28 +00:00
*
* This program currently writes version 2.1 of the data format . See
* ucnv_io . c for more details on the format . Note that version 2.1
* is written in such a way that a 2.0 reader will be able to use it ,
* and a 2.1 reader will be able to read 2.0 .
1999-11-23 02:17:43 +00:00
*/
1999-12-28 23:57:50 +00:00
# include "unicode/utypes.h"
2001-01-03 00:18:57 +00:00
# include "unicode/putil.h"
2000-11-17 06:07:31 +00:00
# include "unicode/ucnv.h" /* ucnv_compareNames() */
2002-06-28 23:13:30 +00:00
# include "ucnv_io.h"
1999-11-23 02:17:43 +00:00
# include "cmemory.h"
# include "cstring.h"
2004-09-14 16:58:51 +00:00
# include "uinvchar.h"
1999-11-23 02:17:43 +00:00
# include "filestrm.h"
2003-08-14 21:34:54 +00:00
# include "unicode/uclean.h"
1999-11-23 02:17:43 +00:00
# include "unewdata.h"
2000-04-18 21:16:39 +00:00
# include "uoptions.h"
1999-11-23 02:17:43 +00:00
2002-06-12 17:29:40 +00:00
# include <stdio.h>
# include <stdlib.h>
# include <ctype.h>
2002-06-28 23:13:30 +00:00
/* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
2002-02-08 01:08:50 +00:00
2002-06-28 23:13:30 +00:00
/* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
That is the maximum size for the string stores combined
because the strings are index at 16 - bit boundries by a
16 - bit index , and there is only one section for the
strings .
*/
# define STRING_STORE_SIZE 0x1FBFE /* 130046 */
# define TAG_STORE_SIZE 0x400 /* 1024 */
/* The combined tag and converter count can affect the number of lists
created . The size of all lists must be less than ( 2 ^ 17 - 1 )
because the lists are indexed as a 16 - bit array with a 16 - bit index .
*/
# define MAX_TAG_COUNT 0x3F /* 63 */
# define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
# define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
1999-11-23 02:17:43 +00:00
2002-06-28 23:13:30 +00:00
/* The maximum number of aliases that a standard tag/converter combination can have.
At this moment 6 / 18 / 2002 , IANA has 12 names for ASCII . Don ' t go below 15 for
this value . I don ' t recommend more than 31 for this value .
*/
# define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
2000-08-10 01:31:28 +00:00
2002-06-28 23:13:30 +00:00
# define MAX_LINE_SIZE 0x7FFF /* 32767 */
# define MAX_LIST_SIZE 0xFFFF /* 65535 */
2002-06-12 17:29:40 +00:00
1999-11-23 02:17:43 +00:00
# define DATA_NAME "cnvalias"
2002-06-28 23:13:30 +00:00
# define DATA_TYPE "icu" /* ICU alias table */
# define ALL_TAG_STR "ALL"
# define ALL_TAG_NUM 1
# define EMPTY_TAG_NUM 0
1999-11-23 02:17:43 +00:00
/* UDataInfo cf. udata.h */
static const UDataInfo dataInfo = {
sizeof ( UDataInfo ) ,
0 ,
U_IS_BIG_ENDIAN ,
U_CHARSET_FAMILY ,
sizeof ( UChar ) ,
0 ,
2001-03-26 20:43:03 +00:00
{ 0x43 , 0x76 , 0x41 , 0x6c } , /* dataFormat="CvAl" */
2006-06-11 16:57:02 +00:00
{ 3 , 0 , 1 , 0 } , /* formatVersion */
2001-03-26 20:43:03 +00:00
{ 1 , 4 , 2 , 0 } /* dataVersion */
1999-11-23 02:17:43 +00:00
} ;
2000-08-10 01:31:28 +00:00
typedef struct {
char * store ;
uint32_t top ;
uint32_t max ;
} StringBlock ;
1999-11-23 02:17:43 +00:00
2000-08-10 01:31:28 +00:00
static char stringStore [ STRING_STORE_SIZE ] ;
static StringBlock stringBlock = { stringStore , 0 , STRING_STORE_SIZE } ;
2001-03-21 23:22:16 +00:00
1999-11-23 02:17:43 +00:00
typedef struct {
2002-06-28 23:13:30 +00:00
uint16_t aliasCount ;
uint16_t * aliases ; /* Index into stringStore */
} AliasList ;
1999-11-23 02:17:43 +00:00
typedef struct {
2002-06-28 23:13:30 +00:00
uint16_t converter ; /* Index into stringStore */
uint16_t totalAliasCount ; /* Total aliases in this column */
1999-11-23 02:17:43 +00:00
} Converter ;
2002-06-28 23:13:30 +00:00
static Converter converters [ MAX_CONV_COUNT ] ;
1999-11-23 02:17:43 +00:00
static uint16_t converterCount = 0 ;
2000-08-10 01:31:28 +00:00
static char tagStore [ TAG_STORE_SIZE ] ;
static StringBlock tagBlock = { tagStore , 0 , TAG_STORE_SIZE } ;
typedef struct {
2002-06-28 23:13:30 +00:00
uint16_t tag ; /* Index into tagStore */
uint16_t totalAliasCount ; /* Total aliases in this row */
AliasList aliasList [ MAX_CONV_COUNT ] ;
2000-08-10 01:31:28 +00:00
} Tag ;
2002-06-28 23:13:30 +00:00
/* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
2000-08-10 01:31:28 +00:00
static Tag tags [ MAX_TAG_COUNT ] ;
static uint16_t tagCount = 0 ;
2002-06-28 23:13:30 +00:00
/* Used for storing all aliases */
static uint16_t knownAliases [ MAX_ALIAS_COUNT ] ;
static uint16_t knownAliasesCount = 0 ;
/*static uint16_t duplicateKnownAliasesCount = 0;*/
/* Used for storing the lists section that point to aliases */
static uint16_t aliasLists [ MAX_LIST_SIZE ] ;
static uint16_t aliasListsSize = 0 ;
2002-06-12 17:29:40 +00:00
/* Were the standard tags declared before the aliases. */
2002-06-28 23:13:30 +00:00
static UBool standardTagsUsed = FALSE ;
static UBool verbose = FALSE ;
2004-05-17 06:33:22 +00:00
static int lineNum = 1 ;
2002-06-12 17:29:40 +00:00
2006-06-11 16:57:02 +00:00
static UConverterAliasOptions tableOptions = {
2006-06-14 23:09:52 +00:00
UCNV_IO_STD_NORMALIZED ,
2006-06-12 10:05:08 +00:00
1 /* containsCnvOptionInfo */
2006-06-11 16:57:02 +00:00
} ;
1999-11-23 02:17:43 +00:00
/* prototypes --------------------------------------------------------------- */
static void
parseLine ( const char * line ) ;
2002-06-12 17:29:40 +00:00
static void
parseFile ( FileStream * in ) ;
static int32_t
chomp ( char * line ) ;
static void
addOfficialTaggedStandards ( char * line , int32_t lineLen ) ;
2000-08-10 01:31:28 +00:00
static uint16_t
2002-06-28 23:13:30 +00:00
addAlias ( const char * alias , uint16_t standard , uint16_t converter , UBool defaultName ) ;
1999-11-23 02:17:43 +00:00
1999-11-23 22:49:12 +00:00
static uint16_t
1999-11-23 02:17:43 +00:00
addConverter ( const char * converter ) ;
static char *
2003-07-02 23:14:10 +00:00
allocString ( StringBlock * block , const char * s , int32_t length ) ;
1999-11-23 02:17:43 +00:00
2002-06-28 23:13:30 +00:00
static uint16_t
addToKnownAliases ( const char * alias ) ;
1999-11-23 02:17:43 +00:00
static int
2000-08-10 01:31:28 +00:00
compareAliases ( const void * alias1 , const void * alias2 ) ;
1999-11-23 02:17:43 +00:00
2000-08-10 01:31:28 +00:00
static uint16_t
getTagNumber ( const char * tag , uint16_t tagLen ) ;
2002-06-28 23:13:30 +00:00
/*static void
addTaggedAlias ( uint16_t tag , const char * alias , uint16_t converter ) ; */
2000-08-10 01:31:28 +00:00
static void
2002-06-28 23:13:30 +00:00
writeAliasTable ( UNewDataMemory * out ) ;
2000-08-10 01:31:28 +00:00
1999-11-23 02:17:43 +00:00
/* -------------------------------------------------------------------------- */
2002-06-28 23:13:30 +00:00
/* Presumes that you used allocString() */
# define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
# define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
/* Presumes that you used allocString() */
# define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
# define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
enum
{
HELP1 ,
HELP2 ,
VERBOSE ,
COPYRIGHT ,
DESTDIR ,
2006-06-14 23:09:52 +00:00
SOURCEDIR
2002-06-28 23:13:30 +00:00
} ;
2000-04-18 21:16:39 +00:00
static UOption options [ ] = {
UOPTION_HELP_H ,
UOPTION_HELP_QUESTION_MARK ,
2002-06-28 23:13:30 +00:00
UOPTION_VERBOSE ,
2000-04-18 21:16:39 +00:00
UOPTION_COPYRIGHT ,
UOPTION_DESTDIR ,
2006-06-14 23:09:52 +00:00
UOPTION_SOURCEDIR
2000-04-18 21:16:39 +00:00
} ;
1999-11-23 02:17:43 +00:00
extern int
2000-09-21 21:49:32 +00:00
main ( int argc , char * argv [ ] ) {
2002-06-12 17:29:40 +00:00
char pathBuf [ 512 ] ;
2000-04-18 21:16:39 +00:00
const char * path ;
1999-11-23 02:17:43 +00:00
FileStream * in ;
UNewDataMemory * out ;
UErrorCode errorCode = U_ZERO_ERROR ;
2002-03-15 23:41:40 +00:00
U_MAIN_INIT_ARGS ( argc , argv ) ;
2000-04-18 21:16:39 +00:00
/* preset then read command line options */
2003-07-02 23:14:10 +00:00
options [ DESTDIR ] . value = options [ SOURCEDIR ] . value = u_getDataDirectory ( ) ;
2000-04-18 21:16:39 +00:00
argc = u_parseArgs ( argc , argv , sizeof ( options ) / sizeof ( options [ 0 ] ) , options ) ;
/* error handling, printing usage message */
if ( argc < 0 ) {
fprintf ( stderr ,
" error in command line argument \" %s \" \n " ,
argv [ - argc ] ) ;
}
2002-06-28 23:13:30 +00:00
if ( argc < 0 | | options [ HELP1 ] . doesOccur | | options [ HELP2 ] . doesOccur ) {
2000-04-18 21:16:39 +00:00
fprintf ( stderr ,
" usage: %s [-options] [convrtrs.txt] \n "
2002-07-17 03:56:50 +00:00
" \t read convrtrs.txt and create " U_ICUDATA_NAME " _ " DATA_NAME " . " DATA_TYPE " \n "
2002-03-15 23:41:40 +00:00
" options: \n "
" \t -h or -? or --help this usage text \n "
2002-06-28 23:13:30 +00:00
" \t -v or --verbose prints out extra information about the alias table \n "
2002-03-15 23:41:40 +00:00
" \t -c or --copyright include a copyright notice \n "
" \t -d or --destdir destination directory, followed by the path \n "
" \t -s or --sourcedir source directory, followed by the path \n " ,
2000-04-18 21:16:39 +00:00
argv [ 0 ] ) ;
return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR ;
2000-02-29 18:42:28 +00:00
}
2002-06-28 23:13:30 +00:00
if ( options [ VERBOSE ] . doesOccur ) {
verbose = TRUE ;
}
2000-04-18 21:16:39 +00:00
if ( argc > = 2 ) {
path = argv [ 1 ] ;
1999-11-23 02:17:43 +00:00
} else {
2003-07-02 23:14:10 +00:00
path = options [ SOURCEDIR ] . value ;
2000-04-18 21:16:39 +00:00
if ( path ! = NULL & & * path ! = 0 ) {
2003-07-02 23:14:10 +00:00
char * end ;
2002-06-12 17:29:40 +00:00
uprv_strcpy ( pathBuf , path ) ;
2008-02-13 10:04:38 +00:00
end = uprv_strchr ( pathBuf , 0 ) ;
2002-06-12 17:29:40 +00:00
if ( * ( end - 1 ) ! = U_FILE_SEP_CHAR ) {
* ( end + + ) = U_FILE_SEP_CHAR ;
2000-04-18 21:16:39 +00:00
}
2002-06-12 17:29:40 +00:00
uprv_strcpy ( end , " convrtrs.txt " ) ;
path = pathBuf ;
2000-02-07 20:55:54 +00:00
} else {
2002-02-08 01:08:50 +00:00
path = " convrtrs.txt " ;
2000-02-07 20:55:54 +00:00
}
1999-11-23 02:17:43 +00:00
}
2002-06-28 23:13:30 +00:00
uprv_memset ( stringStore , 0 , sizeof ( stringStore ) ) ;
uprv_memset ( tagStore , 0 , sizeof ( tagStore ) ) ;
uprv_memset ( converters , 0 , sizeof ( converters ) ) ;
uprv_memset ( tags , 0 , sizeof ( tags ) ) ;
uprv_memset ( aliasLists , 0 , sizeof ( aliasLists ) ) ;
uprv_memset ( knownAliases , 0 , sizeof ( aliasLists ) ) ;
1999-11-23 02:17:43 +00:00
in = T_FileStream_open ( path , " r " ) ;
if ( in = = NULL ) {
fprintf ( stderr , " gencnval: unable to open input file convrtrs.txt \n " ) ;
exit ( U_FILE_ACCESS_ERROR ) ;
}
2002-06-12 17:29:40 +00:00
parseFile ( in ) ;
1999-11-23 02:17:43 +00:00
T_FileStream_close ( in ) ;
/* create the output file */
2004-04-14 20:08:16 +00:00
out = udata_create ( options [ DESTDIR ] . value , DATA_TYPE , DATA_NAME , & dataInfo ,
2003-07-02 23:14:10 +00:00
options [ COPYRIGHT ] . doesOccur ? U_COPYRIGHT_STRING : NULL , & errorCode ) ;
1999-11-23 02:17:43 +00:00
if ( U_FAILURE ( errorCode ) ) {
1999-12-08 23:31:17 +00:00
fprintf ( stderr , " gencnval: unable to open output file - error %s \n " , u_errorName ( errorCode ) ) ;
1999-11-23 02:17:43 +00:00
exit ( errorCode ) ;
}
2002-06-28 23:13:30 +00:00
/* write the table of aliases based on a tag/converter name combination */
writeAliasTable ( out ) ;
1999-11-23 02:17:43 +00:00
/* finish */
udata_finish ( out , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
1999-12-08 23:31:17 +00:00
fprintf ( stderr , " gencnval: error finishing output file - %s \n " , u_errorName ( errorCode ) ) ;
1999-11-23 02:17:43 +00:00
exit ( errorCode ) ;
}
return 0 ;
}
2002-06-12 17:29:40 +00:00
static void
parseFile ( FileStream * in ) {
char line [ MAX_LINE_SIZE ] ;
char lastLine [ MAX_LINE_SIZE ] ;
int32_t lineSize = 0 ;
2002-06-28 23:13:30 +00:00
int32_t lastLineSize = 0 ;
2002-06-12 17:29:40 +00:00
UBool validParse = TRUE ;
2002-06-28 23:13:30 +00:00
lineNum = 0 ;
/* Add the empty tag, which is for untagged aliases */
getTagNumber ( " " , 0 ) ;
getTagNumber ( ALL_TAG_STR , 3 ) ;
2003-07-02 23:14:10 +00:00
allocString ( & stringBlock , " " , 0 ) ;
2002-06-12 17:29:40 +00:00
/* read the list of aliases */
while ( validParse ) {
validParse = FALSE ;
/* Read non-empty lines that don't start with a space character. */
while ( T_FileStream_readLine ( in , lastLine , MAX_LINE_SIZE ) ! = NULL ) {
lastLineSize = chomp ( lastLine ) ;
if ( lineSize = = 0 | | ( lastLineSize > 0 & & isspace ( * lastLine ) ) ) {
uprv_strcpy ( line + lineSize , lastLine ) ;
lineSize + = lastLineSize ;
} else if ( lineSize > 0 ) {
validParse = TRUE ;
break ;
}
2002-06-28 23:13:30 +00:00
lineNum + + ;
2002-06-12 17:29:40 +00:00
}
2002-06-28 23:13:30 +00:00
if ( validParse | | lineSize > 0 ) {
2002-06-12 17:29:40 +00:00
if ( isspace ( * line ) ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): cannot start an alias with a space \n " , lineNum - 1 ) ;
exit ( U_PARSE_ERROR ) ;
2002-06-12 17:29:40 +00:00
} else if ( line [ 0 ] = = ' { ' ) {
if ( ! standardTagsUsed & & line [ lineSize - 1 ] ! = ' } ' ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): alias needs to start with a converter name \n " , lineNum ) ;
exit ( U_PARSE_ERROR ) ;
2002-06-12 17:29:40 +00:00
}
addOfficialTaggedStandards ( line , lineSize ) ;
standardTagsUsed = TRUE ;
} else {
2002-06-28 23:13:30 +00:00
if ( standardTagsUsed ) {
parseLine ( line ) ;
}
else {
fprintf ( stderr , " error(line %d): alias table needs to start a list of standard tags \n " , lineNum ) ;
exit ( U_PARSE_ERROR ) ;
}
2002-06-12 17:29:40 +00:00
}
/* Was the last line consumed */
if ( lastLineSize > 0 ) {
uprv_strcpy ( line , lastLine ) ;
lineSize = lastLineSize ;
}
else {
lineSize = 0 ;
}
}
2002-06-28 23:13:30 +00:00
lineNum + + ;
2002-06-12 17:29:40 +00:00
}
}
/* This works almost like the Perl chomp.
It removes the newlines , comments and trailing whitespace ( not preceding whitespace ) .
*/
static int32_t
chomp ( char * line ) {
char * s = line ;
char * lastNonSpace = line ;
while ( * s ! = 0 ) {
/* truncate at a newline or a comment */
if ( * s = = ' \r ' | | * s = = ' \n ' | | * s = = ' # ' ) {
* s = 0 ;
break ;
}
if ( ! isspace ( * s ) ) {
lastNonSpace = s ;
}
+ + s ;
}
if ( lastNonSpace + + > line ) {
* lastNonSpace = 0 ;
s = lastNonSpace ;
}
return ( int32_t ) ( s - line ) ;
}
1999-11-23 02:17:43 +00:00
static void
parseLine ( const char * line ) {
1999-11-23 22:49:12 +00:00
uint16_t pos = 0 , start , limit , length , cnv ;
char * converter , * alias ;
1999-11-23 02:17:43 +00:00
/* skip leading white space */
2002-06-28 23:13:30 +00:00
/* There is no whitespace at the beginning anymore */
/* while(line[pos]!=0 && isspace(line[pos])) {
1999-11-23 02:17:43 +00:00
+ + pos ;
}
2002-06-28 23:13:30 +00:00
*/
1999-11-23 02:17:43 +00:00
2002-06-12 17:29:40 +00:00
/* is there nothing on this line? */
if ( line [ pos ] = = 0 ) {
1999-11-23 02:17:43 +00:00
return ;
}
/* get the converter name */
start = pos ;
2002-06-28 23:13:30 +00:00
while ( line [ pos ] ! = 0 & & ! isspace ( line [ pos ] ) ) {
1999-11-23 02:17:43 +00:00
+ + pos ;
}
limit = pos ;
/* store the converter name */
2000-08-23 16:46:39 +00:00
length = ( uint16_t ) ( limit - start ) ;
2003-07-02 23:14:10 +00:00
converter = allocString ( & stringBlock , line + start , length ) ;
1999-11-23 02:17:43 +00:00
/* add the converter to the converter table */
cnv = addConverter ( converter ) ;
2002-06-28 23:13:30 +00:00
/* The name itself may be tagged, so let's added it to the aliases list properly */
pos = start ;
1999-11-23 02:17:43 +00:00
/* get all the real aliases */
for ( ; ; ) {
2002-06-28 23:13:30 +00:00
1999-11-23 02:17:43 +00:00
/* skip white space */
2002-06-28 23:13:30 +00:00
while ( line [ pos ] ! = 0 & & isspace ( line [ pos ] ) ) {
1999-11-23 02:17:43 +00:00
+ + pos ;
}
/* is there no more alias name on this line? */
2002-06-28 23:13:30 +00:00
if ( line [ pos ] = = 0 ) {
1999-11-23 02:17:43 +00:00
break ;
}
/* get an alias name */
start = pos ;
2002-06-28 23:13:30 +00:00
while ( line [ pos ] ! = 0 & & line [ pos ] ! = ' { ' & & ! isspace ( line [ pos ] ) ) {
1999-11-23 02:17:43 +00:00
+ + pos ;
}
limit = pos ;
1999-11-23 22:49:12 +00:00
/* store the alias name */
2000-08-23 16:46:39 +00:00
length = ( uint16_t ) ( limit - start ) ;
2002-06-28 23:13:30 +00:00
if ( start = = 0 ) {
/* add the converter as its own alias to the alias table */
alias = converter ;
addAlias ( alias , ALL_TAG_NUM , cnv , TRUE ) ;
}
else {
2003-07-02 23:14:10 +00:00
alias = allocString ( & stringBlock , line + start , length ) ;
2002-06-28 23:13:30 +00:00
addAlias ( alias , ALL_TAG_NUM , cnv , FALSE ) ;
}
addToKnownAliases ( alias ) ;
1999-11-23 02:17:43 +00:00
/* add the alias/converter pair to the alias table */
2002-06-28 23:13:30 +00:00
/* addAlias(alias, 0, cnv, FALSE);*/
1999-11-23 02:17:43 +00:00
2000-08-04 00:37:27 +00:00
/* skip whitespace */
2002-06-28 23:13:30 +00:00
while ( line [ pos ] & & isspace ( line [ pos ] ) ) {
2000-08-04 00:37:27 +00:00
+ + pos ;
}
2000-08-10 01:31:28 +00:00
/* handle tags if they are present */
if ( line [ pos ] = = ' { ' ) {
+ + pos ;
do {
start = pos ;
2002-06-28 23:13:30 +00:00
while ( line [ pos ] & & line [ pos ] ! = ' } ' & & ! isspace ( line [ pos ] ) ) {
2000-08-10 01:31:28 +00:00
+ + pos ;
}
limit = pos ;
if ( start ! = limit ) {
/* add the tag to the tag table */
2002-06-28 23:13:30 +00:00
uint16_t tag = getTagNumber ( line + start , ( uint16_t ) ( limit - start ) ) ;
addAlias ( alias , tag , cnv , ( UBool ) ( line [ limit - 1 ] = = ' * ' ) ) ;
2000-08-10 01:31:28 +00:00
}
2002-06-28 23:13:30 +00:00
while ( line [ pos ] & & isspace ( line [ pos ] ) ) {
2000-08-10 01:31:28 +00:00
+ + pos ;
}
2002-06-28 23:13:30 +00:00
} while ( line [ pos ] & & line [ pos ] ! = ' } ' ) ;
2000-08-10 01:31:28 +00:00
2000-08-04 00:37:27 +00:00
if ( line [ pos ] = = ' } ' ) {
+ + pos ;
} else {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): Unterminated tag list \n " , lineNum ) ;
exit ( U_UNMATCHED_BRACES ) ;
2000-08-04 00:37:27 +00:00
}
2002-06-28 23:13:30 +00:00
} else {
addAlias ( alias , EMPTY_TAG_NUM , cnv , ( UBool ) ( tags [ 0 ] . aliasList [ cnv ] . aliasCount = = 0 ) ) ;
2000-08-04 00:37:27 +00:00
}
1999-11-23 02:17:43 +00:00
}
}
2000-08-10 01:31:28 +00:00
static uint16_t
getTagNumber ( const char * tag , uint16_t tagLen ) {
char * atag ;
uint16_t t ;
2002-06-28 23:13:30 +00:00
UBool preferredName = ( ( tagLen > 0 ) ? ( tag [ tagLen - 1 ] = = ' * ' ) : ( FALSE ) ) ;
2000-08-10 01:31:28 +00:00
2002-02-08 01:08:50 +00:00
if ( tagCount > = MAX_TAG_COUNT ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): too many tags \n " , lineNum ) ;
2000-08-10 01:31:28 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
2002-06-12 17:29:40 +00:00
if ( preferredName ) {
/* puts(tag);*/
tagLen - - ;
}
2000-08-10 01:31:28 +00:00
for ( t = 0 ; t < tagCount ; + + t ) {
2002-06-28 23:13:30 +00:00
const char * currTag = GET_TAG_STR ( tags [ t ] . tag ) ;
if ( uprv_strlen ( currTag ) = = tagLen & & ! uprv_strnicmp ( currTag , tag , tagLen ) ) {
2000-08-10 01:31:28 +00:00
return t ;
}
}
/* we need to add this tag */
2002-02-08 01:08:50 +00:00
if ( tagCount > = MAX_TAG_COUNT ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): too many tags \n " , lineNum ) ;
2000-08-10 01:31:28 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
/* allocate a new entry in the tag table */
2003-07-02 23:14:10 +00:00
atag = allocString ( & tagBlock , tag , tagLen ) ;
2000-08-10 01:31:28 +00:00
2002-06-12 17:29:40 +00:00
if ( standardTagsUsed ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): Tag \" %s \" is not declared at the beginning of the alias table. \n " ,
lineNum , atag ) ;
2002-06-12 17:29:40 +00:00
exit ( 1 ) ;
}
2002-06-28 23:13:30 +00:00
else if ( tagLen > 0 & & strcmp ( tag , ALL_TAG_STR ) ! = 0 ) {
fprintf ( stderr , " warning(line %d): Tag \" %s \" was added to the list of standards because it was not declared at beginning of the alias table. \n " ,
lineNum , atag ) ;
2002-06-12 17:29:40 +00:00
}
2000-08-10 01:31:28 +00:00
/* add the tag to the tag table */
2002-06-28 23:13:30 +00:00
tags [ tagCount ] . tag = GET_TAG_NUM ( atag ) ;
/* The aliasList should be set to 0's already */
2000-08-10 01:31:28 +00:00
return tagCount + + ;
}
2002-06-28 23:13:30 +00:00
/*static void
2000-08-10 01:31:28 +00:00
addTaggedAlias ( uint16_t tag , const char * alias , uint16_t converter ) {
tags [ tag ] . aliases [ converter ] = alias ;
}
2002-06-28 23:13:30 +00:00
*/
2000-08-10 01:31:28 +00:00
2002-06-12 17:29:40 +00:00
static void
addOfficialTaggedStandards ( char * line , int32_t lineLen ) {
char * atag ;
2008-02-13 09:35:50 +00:00
char * endTagExp ;
char * tag ;
2002-06-12 17:29:40 +00:00
static const char WHITESPACE [ ] = " \t " ;
2002-08-08 22:46:05 +00:00
if ( tagCount > UCNV_NUM_RESERVED_TAGS ) {
2002-06-28 23:13:30 +00:00
fprintf ( stderr , " error(line %d): official tags already added \n " , lineNum ) ;
2002-06-12 17:29:40 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
2008-02-13 09:35:50 +00:00
tag = strchr ( line , ' { ' ) ;
if ( tag = = NULL ) {
/* Why were we called? */
fprintf ( stderr , " error(line %d): Missing start of tag group \n " , lineNum ) ;
exit ( U_PARSE_ERROR ) ;
}
tag + + ;
endTagExp = strchr ( tag , ' } ' ) ;
if ( endTagExp = = NULL ) {
fprintf ( stderr , " error(line %d): Missing end of tag group \n " , lineNum ) ;
exit ( U_PARSE_ERROR ) ;
}
endTagExp [ 0 ] = 0 ;
2002-06-12 17:29:40 +00:00
tag = strtok ( tag , WHITESPACE ) ;
while ( tag ! = NULL ) {
/* printf("Adding original tag \"%s\"\n", tag);*/
/* allocate a new entry in the tag table */
2003-07-02 23:14:10 +00:00
atag = allocString ( & tagBlock , tag , - 1 ) ;
2002-06-12 17:29:40 +00:00
/* add the tag to the tag table */
2002-06-28 23:13:30 +00:00
tags [ tagCount + + ] . tag = ( uint16_t ) ( ( atag - tagStore ) > > 1 ) ;
/* The aliasList should already be set to 0's */
2002-06-12 17:29:40 +00:00
/* Get next tag */
tag = strtok ( NULL , WHITESPACE ) ;
}
}
2000-08-10 01:31:28 +00:00
static uint16_t
2002-06-28 23:13:30 +00:00
addToKnownAliases ( const char * alias ) {
/* uint32_t idx; */
/* strict matching */
/* for (idx = 0; idx < knownAliasesCount; idx++) {
uint16_t num = GET_ALIAS_NUM ( alias ) ;
if ( knownAliases [ idx ] ! = num
& & uprv_strcmp ( alias , GET_ALIAS_STR ( knownAliases [ idx ] ) ) = = 0 )
{
fprintf ( stderr , " warning(line %d): duplicate alias %s and %s found \n " ,
lineNum , alias , GET_ALIAS_STR ( knownAliases [ idx ] ) ) ;
duplicateKnownAliasesCount + + ;
break ;
}
else if ( knownAliases [ idx ] ! = num
& & ucnv_compareNames ( alias , GET_ALIAS_STR ( knownAliases [ idx ] ) ) = = 0 )
{
if ( verbose ) {
fprintf ( stderr , " information(line %d): duplicate alias %s and %s found \n " ,
lineNum , alias , GET_ALIAS_STR ( knownAliases [ idx ] ) ) ;
}
duplicateKnownAliasesCount + + ;
break ;
}
}
*/
if ( knownAliasesCount > = MAX_ALIAS_COUNT ) {
fprintf ( stderr , " warning(line %d): Too many aliases defined for all converters \n " ,
lineNum ) ;
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
/* TODO: We could try to unlist exact duplicates. */
return knownAliases [ knownAliasesCount + + ] = GET_ALIAS_NUM ( alias ) ;
}
/*
2003-02-25 01:58:57 +00:00
@ param standard When standard is 0 , then it ' s the " empty " tag .
2002-06-28 23:13:30 +00:00
*/
static uint16_t
addAlias ( const char * alias , uint16_t standard , uint16_t converter , UBool defaultName ) {
uint32_t idx , idx2 ;
UBool dupFound = FALSE ;
UBool startEmptyWithoutDefault = FALSE ;
AliasList * aliasList ;
if ( standard > = MAX_TAG_COUNT ) {
fprintf ( stderr , " error(line %d): too many standard tags \n " , lineNum ) ;
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
if ( converter > = MAX_CONV_COUNT ) {
fprintf ( stderr , " error(line %d): too many converter names \n " , lineNum ) ;
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
aliasList = & tags [ standard ] . aliasList [ converter ] ;
if ( strchr ( alias , ' } ' ) ) {
fprintf ( stderr , " error(line %d): unmatched } found \n " ,
lineNum ) ;
}
if ( aliasList - > aliasCount + 1 > = MAX_TC_ALIAS_COUNT ) {
fprintf ( stderr , " error(line %d): too many aliases for alias %s and converter %s \n " ,
lineNum , alias , GET_ALIAS_STR ( converters [ converter ] . converter ) ) ;
1999-11-23 02:17:43 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
2003-02-25 01:58:57 +00:00
/* Show this warning only once. All aliases are added to the "ALL" tag. */
if ( standard = = ALL_TAG_NUM & & GET_ALIAS_STR ( converters [ converter ] . converter ) ! = alias ) {
/* Normally these option values are parsed at runtime, and they can
be discarded when the alias is a default converter . Options should
only be on a converter and not an alias . */
if ( uprv_strchr ( alias , UCNV_OPTION_SEP_CHAR ) ! = 0 )
{
fprintf ( stderr , " warning(line %d): alias %s contains a \" " UCNV_OPTION_SEP_STRING " \" . Options are parsed at run-time and do not need to be in the alias table. \n " ,
lineNum , alias ) ;
}
if ( uprv_strchr ( alias , UCNV_VALUE_SEP_CHAR ) ! = 0 )
{
fprintf ( stderr , " warning(line %d): alias %s contains an \" " UCNV_VALUE_SEP_STRING " \" . Options are parsed at run-time and do not need to be in the alias table. \n " ,
lineNum , alias ) ;
}
}
2006-06-28 23:16:45 +00:00
if ( standard ! = ALL_TAG_NUM ) {
2002-06-28 23:13:30 +00:00
/* Check for duplicate aliases for this tag on all converters */
for ( idx = 0 ; idx < converterCount ; idx + + ) {
for ( idx2 = 0 ; idx2 < tags [ standard ] . aliasList [ idx ] . aliasCount ; idx2 + + ) {
uint16_t aliasNum = tags [ standard ] . aliasList [ idx ] . aliases [ idx2 ] ;
if ( aliasNum
& & ucnv_compareNames ( alias , GET_ALIAS_STR ( aliasNum ) ) = = 0 )
{
2006-06-28 23:16:45 +00:00
if ( idx = = converter ) {
/*
* ( alias , standard ) duplicates are harmless if they map to the same converter .
* Only print a warning in verbose mode , or if the alias is a precise duplicate ,
* not just a lenient - match duplicate .
*/
if ( verbose | | 0 = = uprv_strcmp ( alias , GET_ALIAS_STR ( aliasNum ) ) ) {
fprintf ( stderr , " warning(line %d): duplicate aliases %s and %s found for standard %s and converter %s \n " ,
lineNum , alias , GET_ALIAS_STR ( aliasNum ) ,
GET_TAG_STR ( tags [ standard ] . tag ) ,
GET_ALIAS_STR ( converters [ converter ] . converter ) ) ;
}
} else {
fprintf ( stderr , " warning(line %d): duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s \n " ,
lineNum , alias , GET_ALIAS_STR ( aliasNum ) ,
GET_TAG_STR ( tags [ standard ] . tag ) ,
GET_ALIAS_STR ( converters [ converter ] . converter ) ,
GET_ALIAS_STR ( converters [ idx ] . converter ) ) ;
}
2002-06-28 23:13:30 +00:00
dupFound = TRUE ;
break ;
}
}
}
1999-11-23 02:17:43 +00:00
2002-06-28 23:13:30 +00:00
/* Check for duplicate default aliases for this converter on all tags */
/* It's okay to have multiple standards prefer the same name */
/* if (verbose && !dupFound) {
for ( idx = 0 ; idx < tagCount ; idx + + ) {
if ( tags [ idx ] . aliasList [ converter ] . aliases ) {
uint16_t aliasNum = tags [ idx ] . aliasList [ converter ] . aliases [ 0 ] ;
if ( aliasNum
& & ucnv_compareNames ( alias , GET_ALIAS_STR ( aliasNum ) ) = = 0 )
{
fprintf ( stderr , " warning(line %d): duplicate alias %s found for converter %s and standard tag %s \n " ,
lineNum , alias , GET_ALIAS_STR ( converters [ converter ] . converter ) , GET_TAG_STR ( tags [ standard ] . tag ) ) ;
break ;
}
}
}
} */
}
2000-08-10 01:31:28 +00:00
2002-06-28 23:13:30 +00:00
if ( aliasList - > aliasCount < = 0 ) {
aliasList - > aliasCount + + ;
startEmptyWithoutDefault = TRUE ;
}
2002-07-12 18:06:28 +00:00
aliasList - > aliases = ( uint16_t * ) uprv_realloc ( aliasList - > aliases , ( aliasList - > aliasCount + 1 ) * sizeof ( aliasList - > aliases [ 0 ] ) ) ;
2002-06-28 23:13:30 +00:00
if ( startEmptyWithoutDefault ) {
aliasList - > aliases [ 0 ] = 0 ;
}
if ( defaultName ) {
if ( aliasList - > aliases [ 0 ] ! = 0 ) {
fprintf ( stderr , " error(line %d): Alias %s and %s cannot both be the default alias for standard tag %s and converter %s \n " ,
lineNum ,
alias ,
GET_ALIAS_STR ( aliasList - > aliases [ 0 ] ) ,
GET_TAG_STR ( tags [ standard ] . tag ) ,
GET_ALIAS_STR ( converters [ converter ] . converter ) ) ;
exit ( U_PARSE_ERROR ) ;
}
aliasList - > aliases [ 0 ] = GET_ALIAS_NUM ( alias ) ;
} else {
aliasList - > aliases [ aliasList - > aliasCount + + ] = GET_ALIAS_NUM ( alias ) ;
}
/* aliasList->converter = converter;*/
converters [ converter ] . totalAliasCount + + ; /* One more to the column */
tags [ standard ] . totalAliasCount + + ; /* One more to the row */
return aliasList - > aliasCount ;
1999-11-23 02:17:43 +00:00
}
1999-11-23 22:49:12 +00:00
static uint16_t
1999-11-23 02:17:43 +00:00
addConverter ( const char * converter ) {
2002-06-28 23:13:30 +00:00
uint32_t idx ;
if ( converterCount > = MAX_CONV_COUNT ) {
fprintf ( stderr , " error(line %d): too many converters \n " , lineNum ) ;
1999-11-23 02:17:43 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
2002-06-28 23:13:30 +00:00
for ( idx = 0 ; idx < converterCount ; idx + + ) {
if ( ucnv_compareNames ( converter , GET_ALIAS_STR ( converters [ idx ] . converter ) ) = = 0 ) {
fprintf ( stderr , " error(line %d): duplicate converter %s found! \n " , lineNum , converter ) ;
exit ( U_PARSE_ERROR ) ;
break ;
}
}
converters [ converterCount ] . converter = GET_ALIAS_NUM ( converter ) ;
converters [ converterCount ] . totalAliasCount = 0 ;
1999-11-23 02:17:43 +00:00
1999-11-23 22:49:12 +00:00
return converterCount + + ;
1999-11-23 02:17:43 +00:00
}
2002-06-28 23:13:30 +00:00
/* resolve this alias based on the prioritization of the standard tags. */
static void
resolveAliasToConverter ( uint16_t alias , uint16_t * tagNum , uint16_t * converterNum ) {
uint16_t idx , idx2 , idx3 ;
2002-08-08 22:46:05 +00:00
for ( idx = UCNV_NUM_RESERVED_TAGS ; idx < tagCount ; idx + + ) {
2002-06-28 23:13:30 +00:00
for ( idx2 = 0 ; idx2 < converterCount ; idx2 + + ) {
for ( idx3 = 0 ; idx3 < tags [ idx ] . aliasList [ idx2 ] . aliasCount ; idx3 + + ) {
uint16_t aliasNum = tags [ idx ] . aliasList [ idx2 ] . aliases [ idx3 ] ;
if ( aliasNum = = alias ) {
* tagNum = idx ;
* converterNum = idx2 ;
return ;
}
}
}
}
/* Do the leftovers last, just in case */
/* There is no need to do the ALL tag */
idx = 0 ;
for ( idx2 = 0 ; idx2 < converterCount ; idx2 + + ) {
for ( idx3 = 0 ; idx3 < tags [ idx ] . aliasList [ idx2 ] . aliasCount ; idx3 + + ) {
uint16_t aliasNum = tags [ idx ] . aliasList [ idx2 ] . aliases [ idx3 ] ;
if ( aliasNum = = alias ) {
* tagNum = idx ;
* converterNum = idx2 ;
return ;
}
}
}
* tagNum = UINT16_MAX ;
* converterNum = UINT16_MAX ;
fprintf ( stderr , " warning: alias %s not found \n " ,
GET_ALIAS_STR ( alias ) ) ;
return ;
}
/* The knownAliases should be sorted before calling this function */
static uint32_t
resolveAliases ( uint16_t * uniqueAliasArr , uint16_t * uniqueAliasToConverterArr , uint16_t aliasOffset ) {
uint32_t uniqueAliasIdx = 0 ;
uint32_t idx ;
uint16_t currTagNum , oldTagNum ;
2002-08-08 22:46:05 +00:00
uint16_t currConvNum , oldConvNum ;
2002-06-28 23:13:30 +00:00
const char * lastName ;
resolveAliasToConverter ( knownAliases [ 0 ] , & oldTagNum , & currConvNum ) ;
uniqueAliasToConverterArr [ uniqueAliasIdx ] = currConvNum ;
2002-08-13 16:10:43 +00:00
oldConvNum = currConvNum ;
2002-06-28 23:13:30 +00:00
uniqueAliasArr [ uniqueAliasIdx ] = knownAliases [ 0 ] + aliasOffset ;
uniqueAliasIdx + + ;
lastName = GET_ALIAS_STR ( knownAliases [ 0 ] ) ;
for ( idx = 1 ; idx < knownAliasesCount ; idx + + ) {
resolveAliasToConverter ( knownAliases [ idx ] , & currTagNum , & currConvNum ) ;
if ( ucnv_compareNames ( lastName , GET_ALIAS_STR ( knownAliases [ idx ] ) ) = = 0 ) {
/* duplicate found */
2002-08-08 22:46:05 +00:00
if ( ( currTagNum < oldTagNum & & currTagNum > = UCNV_NUM_RESERVED_TAGS )
| | oldTagNum = = 0 ) {
2002-06-28 23:13:30 +00:00
oldTagNum = currTagNum ;
uniqueAliasToConverterArr [ uniqueAliasIdx - 1 ] = currConvNum ;
uniqueAliasArr [ uniqueAliasIdx - 1 ] = knownAliases [ idx ] + aliasOffset ;
if ( verbose ) {
printf ( " using %s instead of %s -> %s " ,
GET_ALIAS_STR ( knownAliases [ idx ] ) ,
lastName ,
GET_ALIAS_STR ( converters [ currConvNum ] . converter ) ) ;
2002-08-08 22:46:05 +00:00
if ( oldConvNum ! = currConvNum ) {
2002-06-28 23:13:30 +00:00
printf ( " (alias conflict) " ) ;
}
puts ( " " ) ;
}
}
else {
/* else ignore it */
if ( verbose ) {
printf ( " folding %s into %s -> %s " ,
GET_ALIAS_STR ( knownAliases [ idx ] ) ,
lastName ,
2002-08-08 22:46:05 +00:00
GET_ALIAS_STR ( converters [ oldConvNum ] . converter ) ) ;
if ( oldConvNum ! = currConvNum ) {
2002-06-28 23:13:30 +00:00
printf ( " (alias conflict) " ) ;
}
puts ( " " ) ;
}
}
2002-08-08 22:46:05 +00:00
if ( oldConvNum ! = currConvNum ) {
2002-06-28 23:13:30 +00:00
uniqueAliasToConverterArr [ uniqueAliasIdx - 1 ] | = UCNV_AMBIGUOUS_ALIAS_MAP_BIT ;
}
}
else {
uniqueAliasToConverterArr [ uniqueAliasIdx ] = currConvNum ;
2002-08-08 22:46:05 +00:00
oldConvNum = currConvNum ;
2002-06-28 23:13:30 +00:00
uniqueAliasArr [ uniqueAliasIdx ] = knownAliases [ idx ] + aliasOffset ;
uniqueAliasIdx + + ;
lastName = GET_ALIAS_STR ( knownAliases [ idx ] ) ;
2002-08-08 22:46:05 +00:00
oldTagNum = currTagNum ;
2002-06-28 23:13:30 +00:00
/*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
}
2006-06-12 10:05:08 +00:00
if ( uprv_strchr ( GET_ALIAS_STR ( converters [ currConvNum ] . converter ) , UCNV_OPTION_SEP_CHAR ) ! = NULL ) {
uniqueAliasToConverterArr [ uniqueAliasIdx - 1 ] | = UCNV_CONTAINS_OPTION_BIT ;
}
2002-06-28 23:13:30 +00:00
}
return uniqueAliasIdx ;
}
static void
createOneAliasList ( uint16_t * aliasArrLists , uint32_t tag , uint32_t converter , uint16_t offset ) {
uint32_t aliasNum ;
AliasList * aliasList = & tags [ tag ] . aliasList [ converter ] ;
if ( aliasList - > aliasCount = = 0 ) {
aliasArrLists [ tag * converterCount + converter ] = 0 ;
}
else {
aliasLists [ aliasListsSize + + ] = aliasList - > aliasCount ;
/* write into the array area a 1's based index. */
aliasArrLists [ tag * converterCount + converter ] = aliasListsSize ;
/* printf("tag %s converter %s\n",
GET_TAG_STR ( tags [ tag ] . tag ) ,
GET_ALIAS_STR ( converters [ converter ] . converter ) ) ; */
for ( aliasNum = 0 ; aliasNum < aliasList - > aliasCount ; aliasNum + + ) {
uint16_t value ;
/* printf(" %s\n",
GET_ALIAS_STR ( aliasList - > aliases [ aliasNum ] ) ) ; */
if ( aliasList - > aliases [ aliasNum ] ) {
value = aliasList - > aliases [ aliasNum ] + offset ;
} else {
value = 0 ;
if ( tag ! = 0 ) { /* Only show the warning when it's not the leftover tag. */
printf ( " warning: tag %s does not have a default alias for %s \n " ,
GET_TAG_STR ( tags [ tag ] . tag ) ,
GET_ALIAS_STR ( converters [ converter ] . converter ) ) ;
}
}
aliasLists [ aliasListsSize + + ] = value ;
if ( aliasListsSize > = MAX_LIST_SIZE ) {
2002-07-16 17:23:05 +00:00
fprintf ( stderr , " error: Too many alias lists \n " ) ;
2002-06-28 23:13:30 +00:00
exit ( U_BUFFER_OVERFLOW_ERROR ) ;
}
}
}
}
2006-06-11 16:57:02 +00:00
static void
createNormalizedAliasStrings ( char * normalizedStrings , const char * origStringBlock , int32_t stringBlockLength ) {
int32_t currStrLen ;
uprv_memcpy ( normalizedStrings , origStringBlock , stringBlockLength ) ;
while ( ( currStrLen = ( int32_t ) uprv_strlen ( origStringBlock ) ) < stringBlockLength ) {
int32_t currStrSize = currStrLen + 1 ;
if ( currStrLen > 0 ) {
int32_t normStrLen ;
ucnv_io_stripForCompare ( normalizedStrings , origStringBlock ) ;
normStrLen = uprv_strlen ( normalizedStrings ) ;
if ( normStrLen > 0 ) {
uprv_memset ( normalizedStrings + normStrLen , 0 , currStrSize - normStrLen ) ;
}
}
stringBlockLength - = currStrSize ;
normalizedStrings + = currStrSize ;
origStringBlock + = currStrSize ;
}
}
2002-06-28 23:13:30 +00:00
static void
writeAliasTable ( UNewDataMemory * out ) {
uint32_t i , j ;
uint32_t uniqueAliasesSize ;
uint16_t aliasOffset = ( uint16_t ) ( tagBlock . top / sizeof ( uint16_t ) ) ;
uint16_t * aliasArrLists = ( uint16_t * ) uprv_malloc ( tagCount * converterCount * sizeof ( uint16_t ) ) ;
uint16_t * uniqueAliases = ( uint16_t * ) uprv_malloc ( knownAliasesCount * sizeof ( uint16_t ) ) ;
uint16_t * uniqueAliasesToConverter = ( uint16_t * ) uprv_malloc ( knownAliasesCount * sizeof ( uint16_t ) ) ;
qsort ( knownAliases , knownAliasesCount , sizeof ( knownAliases [ 0 ] ) , compareAliases ) ;
uniqueAliasesSize = resolveAliases ( uniqueAliases , uniqueAliasesToConverter , aliasOffset ) ;
/* Array index starts at 1. aliasLists[0] is the size of the lists section. */
aliasListsSize = 0 ;
/* write the offsets of all the aliases lists in a 2D array, and create the lists. */
for ( i = 0 ; i < tagCount ; + + i ) {
for ( j = 0 ; j < converterCount ; + + j ) {
createOneAliasList ( aliasArrLists , i , j , aliasOffset ) ;
}
}
/* Write the size of the TOC */
2006-06-11 16:57:02 +00:00
if ( tableOptions . stringNormalizationType = = UCNV_IO_UNNORMALIZED ) {
udata_write32 ( out , 8 ) ;
}
else {
udata_write32 ( out , 9 ) ;
}
2002-06-28 23:13:30 +00:00
/* Write the sizes of each section */
/* All sizes are the number of uint16_t units, not bytes */
udata_write32 ( out , converterCount ) ;
udata_write32 ( out , tagCount ) ;
udata_write32 ( out , uniqueAliasesSize ) ; /* list of aliases */
udata_write32 ( out , uniqueAliasesSize ) ; /* The preresolved form of mapping an untagged the alias to a converter */
udata_write32 ( out , tagCount * converterCount ) ;
udata_write32 ( out , aliasListsSize + 1 ) ;
2006-06-14 23:09:52 +00:00
udata_write32 ( out , sizeof ( tableOptions ) / sizeof ( uint16_t ) ) ;
2002-06-28 23:13:30 +00:00
udata_write32 ( out , ( tagBlock . top + stringBlock . top ) / sizeof ( uint16_t ) ) ;
2006-06-11 16:57:02 +00:00
if ( tableOptions . stringNormalizationType ! = UCNV_IO_UNNORMALIZED ) {
udata_write32 ( out , ( tagBlock . top + stringBlock . top ) / sizeof ( uint16_t ) ) ;
}
2002-06-28 23:13:30 +00:00
/* write the table of converters */
/* Think of this as the column headers */
for ( i = 0 ; i < converterCount ; + + i ) {
udata_write16 ( out , ( uint16_t ) ( converters [ i ] . converter + aliasOffset ) ) ;
}
/* write the table of tags */
/* Think of this as the row headers */
2002-08-08 22:46:05 +00:00
for ( i = UCNV_NUM_RESERVED_TAGS ; i < tagCount ; + + i ) {
2002-06-28 23:13:30 +00:00
udata_write16 ( out , tags [ i ] . tag ) ;
}
/* The empty tag is considered the leftover list, and put that at the end of the priority list. */
udata_write16 ( out , tags [ EMPTY_TAG_NUM ] . tag ) ;
udata_write16 ( out , tags [ ALL_TAG_NUM ] . tag ) ;
/* Write the unique list of aliases */
udata_writeBlock ( out , uniqueAliases , uniqueAliasesSize * sizeof ( uint16_t ) ) ;
/* Write the unique list of aliases */
udata_writeBlock ( out , uniqueAliasesToConverter , uniqueAliasesSize * sizeof ( uint16_t ) ) ;
/* Write the array to the lists */
udata_writeBlock ( out , ( const void * ) ( aliasArrLists + ( 2 * converterCount ) ) , ( ( ( tagCount - 2 ) * converterCount ) * sizeof ( uint16_t ) ) ) ;
/* Now write the leftover part of the array for the EMPTY and ALL lists */
udata_writeBlock ( out , ( const void * ) aliasArrLists , ( 2 * converterCount * sizeof ( uint16_t ) ) ) ;
/* Offset the next array to make the index start at 1. */
udata_write16 ( out , 0xDEAD ) ;
/* Write the lists */
udata_writeBlock ( out , ( const void * ) aliasLists , aliasListsSize * sizeof ( uint16_t ) ) ;
2006-06-11 16:57:02 +00:00
/* Write any options for the alias table. */
2006-06-14 23:09:52 +00:00
udata_writeBlock ( out , ( const void * ) & tableOptions , sizeof ( tableOptions ) ) ;
2006-06-11 16:57:02 +00:00
2002-06-28 23:13:30 +00:00
/* write the tags strings */
udata_writeString ( out , tagBlock . store , tagBlock . top ) ;
/* write the aliases strings */
udata_writeString ( out , stringBlock . store , stringBlock . top ) ;
2006-06-11 16:57:02 +00:00
/* write the normalized aliases strings */
if ( tableOptions . stringNormalizationType ! = UCNV_IO_UNNORMALIZED ) {
char * normalizedStrings = ( char * ) uprv_malloc ( tagBlock . top + stringBlock . top ) ;
createNormalizedAliasStrings ( normalizedStrings , tagBlock . store , tagBlock . top ) ;
createNormalizedAliasStrings ( normalizedStrings + tagBlock . top , stringBlock . store , stringBlock . top ) ;
/* Write out the complete normalized array. */
udata_writeString ( out , normalizedStrings , tagBlock . top + stringBlock . top ) ;
uprv_free ( normalizedStrings ) ;
}
2002-06-28 23:13:30 +00:00
uprv_free ( aliasArrLists ) ;
uprv_free ( uniqueAliases ) ;
}
1999-11-23 02:17:43 +00:00
static char *
2003-07-02 23:14:10 +00:00
allocString ( StringBlock * block , const char * s , int32_t length ) {
uint32_t top ;
1999-11-23 02:17:43 +00:00
char * p ;
2003-07-02 23:14:10 +00:00
if ( length < 0 ) {
2003-12-11 05:00:40 +00:00
length = ( int32_t ) uprv_strlen ( s ) ;
2003-07-02 23:14:10 +00:00
}
/*
* add 1 for the terminating NUL
* and round up ( + 1 & ~ 1 )
* to keep the addresses on a 16 - bit boundary
*/
top = block - > top + ( uint32_t ) ( ( length + 1 + 1 ) & ~ 1 ) ;
2002-06-28 23:13:30 +00:00
if ( top > = block - > max ) {
fprintf ( stderr , " error(line %d): out of memory \n " , lineNum ) ;
1999-11-23 02:17:43 +00:00
exit ( U_MEMORY_ALLOCATION_ERROR ) ;
}
2003-07-02 23:14:10 +00:00
/* get the pointer and copy the string */
2002-02-08 01:08:50 +00:00
p = block - > store + block - > top ;
2003-07-02 23:14:10 +00:00
uprv_memcpy ( p , s , length ) ;
p [ length ] = 0 ; /* NUL-terminate it */
if ( ( length & 1 ) = = 0 ) {
p [ length + 1 ] = 0 ; /* set the padding byte */
}
/* check for invariant characters now that we have a NUL-terminated string for easy output */
if ( ! uprv_isInvariantString ( p , length ) ) {
fprintf ( stderr , " error(line %d): the name %s contains not just invariant characters \n " , lineNum , p ) ;
exit ( U_INVALID_TABLE_FORMAT ) ;
}
2002-02-08 01:08:50 +00:00
block - > top = top ;
1999-11-23 02:17:43 +00:00
return p ;
}
static int
compareAliases ( const void * alias1 , const void * alias2 ) {
2002-06-28 23:13:30 +00:00
/* Names like IBM850 and ibm-850 need to be sorted together */
2003-01-11 01:26:50 +00:00
int result = ucnv_compareNames ( GET_ALIAS_STR ( * ( uint16_t * ) alias1 ) , GET_ALIAS_STR ( * ( uint16_t * ) alias2 ) ) ;
if ( ! result ) {
/* Sort the shortest first */
2003-12-11 05:00:40 +00:00
return ( int ) uprv_strlen ( GET_ALIAS_STR ( * ( uint16_t * ) alias1 ) ) - ( int ) uprv_strlen ( GET_ALIAS_STR ( * ( uint16_t * ) alias2 ) ) ;
2003-01-11 01:26:50 +00:00
}
return result ;
1999-11-23 02:17:43 +00:00
}
2000-02-29 18:42:28 +00:00
/*
* Hey , Emacs , please set the following :
*
* Local Variables :
* indent - tabs - mode : nil
* End :
*
*/
2000-08-10 01:31:28 +00:00