ee3f476557
X-SVN-Rev: 13105
1072 lines
35 KiB
C
1072 lines
35 KiB
C
/*
|
|
********************************************************************************
|
|
*
|
|
* Copyright (C) 1998-2003, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
********************************************************************************
|
|
*
|
|
*
|
|
* makeconv.c:
|
|
* tool creating a binary (compressed) representation of the conversion mapping
|
|
* table (IBM NLTC ucmap format).
|
|
*
|
|
* 05/04/2000 helena Added fallback mapping into the picture...
|
|
* 06/29/2000 helena Major rewrite of the callback APIs.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include "unicode/putil.h"
|
|
#include "ucnv_io.h"
|
|
#include "unicode/ucnv_err.h"
|
|
#include "ucnv_bld.h"
|
|
#include "ucnv_imp.h"
|
|
#include "ucnv_cnv.h"
|
|
#include "cstring.h"
|
|
#include "cmemory.h"
|
|
#include "filestrm.h"
|
|
#include "toolutil.h"
|
|
#include "uoptions.h"
|
|
#include "unicode/udata.h"
|
|
#include "unewdata.h"
|
|
#include "ucmpwrit.h"
|
|
#include "makeconv.h"
|
|
#include "genmbcs.h"
|
|
|
|
#define DEBUG 0
|
|
|
|
/*
|
|
* from ucnvstat.c - static prototypes of data-based converters
|
|
*/
|
|
extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
|
|
|
|
/*
|
|
* Global - verbosity
|
|
*/
|
|
UBool VERBOSE = FALSE;
|
|
UBool TOUCHFILE = FALSE;
|
|
|
|
/*Reads the header of the table file and fills in basic knowledge about the converter
|
|
*in "converter"
|
|
*/
|
|
static void readHeaderFromFile(UConverterSharedData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err);
|
|
|
|
/*Reads the rest of the file, and fills up the shared objects if necessary
|
|
Returns the UConverterTable. */
|
|
static void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err);
|
|
|
|
/* creates a UConverterSharedData from a mapping file.
|
|
* Fills in: *staticData, *table. Converter is NOT otherwise useful.
|
|
*/
|
|
static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err);
|
|
|
|
/*
|
|
* Set up the UNewData and write the converter..
|
|
*/
|
|
void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status);
|
|
|
|
static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' };
|
|
static const char FALLBACK_SEPARATOR = '|';
|
|
static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' };
|
|
static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' };
|
|
|
|
static const char *
|
|
skipWhitespace(const char *s) {
|
|
while(*s==' ' || *s=='\t') {
|
|
++s;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
static int32_t
|
|
parseCodepageBytes(const char *s, uint32_t *pBytes, const char **pEnd) {
|
|
char *end;
|
|
int32_t length=0;
|
|
uint32_t bytes=0, value;
|
|
|
|
while(s[0]=='\\' && s[1]=='x') {
|
|
if(length==4) {
|
|
return -1;
|
|
}
|
|
value=uprv_strtoul(s+2, &end, 16);
|
|
s+=4;
|
|
if(end!=s) {
|
|
return -1;
|
|
}
|
|
bytes=(bytes<<8)|value;
|
|
++length;
|
|
}
|
|
if(length==0) {
|
|
return -1;
|
|
}
|
|
if(pEnd!=NULL) {
|
|
*pEnd=s;
|
|
}
|
|
*pBytes=bytes;
|
|
return length;
|
|
}
|
|
|
|
/* Remove all characters followed by '#'. There is an exception if there
|
|
* is a fallback sign '|' after the comment and the comment does not
|
|
* start in column 0. In this case, we just blank from '#' to just
|
|
* before the '|' in order to support the fact that IBM official .ucm
|
|
* files have the fallback information in comments!
|
|
*/
|
|
static char *
|
|
removeComments (char *line)
|
|
{
|
|
char *pound;
|
|
|
|
line = (char*)skipWhitespace(line);
|
|
pound = uprv_strchr (line, '#');
|
|
if (pound != NULL)
|
|
{
|
|
char *fallback = pound == line ? 0 : uprv_strchr(pound + 1, '|');
|
|
if (fallback != NULL)
|
|
{
|
|
uprv_memset(pound, ' ', fallback-pound);
|
|
}
|
|
else
|
|
{
|
|
*pound = '\0';
|
|
}
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/* Returns true in c is a in set 'setOfChars', false otherwise
|
|
*/
|
|
static UBool
|
|
isInSet (char c, const char *setOfChars)
|
|
{
|
|
uint8_t i = 0;
|
|
|
|
while (setOfChars[i] != '\0')
|
|
{
|
|
if (c == setOfChars[i++])
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* Returns pointer to the next non-whitespace (or non-separator)
|
|
*/
|
|
static int32_t
|
|
nextTokenOffset (const char *line, const char *separators)
|
|
{
|
|
int32_t i = 0;
|
|
|
|
while (line[i] && isInSet(line[i], separators))
|
|
i++;
|
|
|
|
return i;
|
|
}
|
|
|
|
/* Returns pointer to the next token based on the set of separators
|
|
*/
|
|
static char *
|
|
getToken (char *token, char *line, const char *separators)
|
|
{
|
|
int32_t i = nextTokenOffset (line, separators);
|
|
int8_t j = 0;
|
|
|
|
while (line[i] && (!isInSet(line[i], separators)))
|
|
token[j++] = line[i++];
|
|
token[j] = '\0';
|
|
|
|
return line + i;
|
|
}
|
|
|
|
UBool haveCopyright=TRUE;
|
|
|
|
static UDataInfo dataInfo={
|
|
sizeof(UDataInfo),
|
|
0,
|
|
|
|
U_IS_BIG_ENDIAN,
|
|
U_CHARSET_FAMILY,
|
|
sizeof(UChar),
|
|
0,
|
|
|
|
{0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
|
|
{6, 2, 0, 0}, /* formatVersion */
|
|
{0, 0, 0, 0} /* dataVersion (calculated at runtime) */
|
|
};
|
|
|
|
void writeConverterData(UConverterSharedData *mySharedData,
|
|
const char *cnvName,
|
|
const char *cnvDir,
|
|
UErrorCode *status)
|
|
{
|
|
UNewDataMemory *mem = NULL;
|
|
uint32_t sz2;
|
|
uint32_t size = 0;
|
|
|
|
if(U_FAILURE(*status))
|
|
{
|
|
return;
|
|
}
|
|
|
|
mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
|
|
|
|
if(U_FAILURE(*status))
|
|
{
|
|
fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
|
|
cnvName,
|
|
"cnv",
|
|
u_errorName(*status));
|
|
return;
|
|
}
|
|
|
|
if(VERBOSE)
|
|
{
|
|
fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv");
|
|
}
|
|
|
|
/* all read only, clean, platform independent data. Mmmm. :) */
|
|
udata_writeBlock(mem, mySharedData->staticData, sizeof(UConverterStaticData));
|
|
size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
|
|
/* Now, write the table */
|
|
size += ((NewConverter *)mySharedData->table)->write((NewConverter *)mySharedData->table, mySharedData->staticData, mem);
|
|
|
|
sz2 = udata_finish(mem, status);
|
|
if(size != sz2)
|
|
{
|
|
fprintf(stderr, "error: wrote %d bytes to the .cnv file but counted %d bytes\n", sz2, size);
|
|
*status=U_INTERNAL_PROGRAM_ERROR;
|
|
}
|
|
if(VERBOSE)
|
|
{
|
|
fprintf(stderr, "- Wrote %d bytes to the udata.\n", sz2);
|
|
}
|
|
}
|
|
|
|
static UOption options[]={
|
|
UOPTION_HELP_H, /* 0 Numbers for those who*/
|
|
UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
|
|
UOPTION_COPYRIGHT, /* 2 */
|
|
UOPTION_VERSION, /* 3 */
|
|
UOPTION_DESTDIR, /* 4 */
|
|
UOPTION_VERBOSE, /* 5 */
|
|
UOPTION_PACKAGE_NAME, /* 6 */
|
|
UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */
|
|
};
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
UConverterSharedData* mySharedData = NULL;
|
|
UErrorCode err = U_ZERO_ERROR, localError;
|
|
char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
|
|
char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
|
|
const char* destdir, *arg;
|
|
const char *pkgName = NULL;
|
|
size_t destdirlen;
|
|
char* dot = NULL, *outBasename;
|
|
char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
|
|
char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
|
|
UVersionInfo icuVersion;
|
|
|
|
err = U_ZERO_ERROR;
|
|
|
|
U_MAIN_INIT_ARGS(argc, argv);
|
|
|
|
/* Set up the ICU version number */
|
|
u_getVersion(icuVersion);
|
|
uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
|
|
|
|
/* preset then read command line options */
|
|
options[4].value=u_getDataDirectory();
|
|
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
|
|
|
/* error handling, printing usage message */
|
|
if(argc<0) {
|
|
fprintf(stderr,
|
|
"error in command line argument \"%s\"\n",
|
|
argv[-argc]);
|
|
} else if(argc<2) {
|
|
argc=-1;
|
|
}
|
|
if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
|
|
fprintf(stderr,
|
|
"usage: %s [-options] files...\n"
|
|
"\tread .ucm codepage mapping files and write .cnv files\n"
|
|
"options:\n"
|
|
"\t-h or -? or --help this usage text\n"
|
|
"\t-V or --version show a version message\n"
|
|
"\t-c or --copyright include a copyright notice\n"
|
|
"\t-d or --destdir destination directory, followed by the path\n"
|
|
"\t-v or --verbose Turn on verbose output\n",
|
|
argv[0]);
|
|
fprintf(stderr,
|
|
"\t-p or --pkgname sets the 'package' name for output files.\n"
|
|
"\t If name is ICUDATA, then the default icu package\n"
|
|
"\t name will be used.\n"
|
|
"\t-t or --touchfile Generate additional small file without packagename, for nmake\n");
|
|
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
|
}
|
|
|
|
if(options[3].doesOccur) {
|
|
fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
|
|
dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
|
|
fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n");
|
|
fprintf(stderr,"Corporation and others. All Rights Reserved.\n");
|
|
exit(0);
|
|
}
|
|
|
|
TOUCHFILE = options[7].doesOccur;
|
|
|
|
if(!options[6].doesOccur)
|
|
{
|
|
fprintf(stderr, "%s : option -p (package name) is required.\n",
|
|
argv[0]);
|
|
exit(1);
|
|
}
|
|
else
|
|
{
|
|
pkgName =options[6].value;
|
|
if(!strcmp(pkgName, "ICUDATA"))
|
|
{
|
|
pkgName = U_ICUDATA_NAME;
|
|
}
|
|
if(pkgName[0] == 0)
|
|
{
|
|
pkgName = NULL;
|
|
|
|
if(TOUCHFILE)
|
|
{
|
|
fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n",
|
|
argv[0]);
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* get the options values */
|
|
haveCopyright = options[2].doesOccur;
|
|
destdir = options[4].value;
|
|
VERBOSE = options[5].doesOccur;
|
|
|
|
if (destdir != NULL && *destdir != 0) {
|
|
uprv_strcpy(outFileName, destdir);
|
|
destdirlen = uprv_strlen(destdir);
|
|
outBasename = outFileName + destdirlen;
|
|
if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
|
|
*outBasename++ = U_FILE_SEP_CHAR;
|
|
++destdirlen;
|
|
}
|
|
} else {
|
|
destdirlen = 0;
|
|
outBasename = outFileName;
|
|
}
|
|
|
|
#if DEBUG
|
|
{
|
|
int i;
|
|
printf("makeconv: processing %d files...\n", argc - 1);
|
|
for(i=1; i<argc; ++i) {
|
|
printf("%s ", argv[i]);
|
|
}
|
|
printf("\n");
|
|
fflush(stdout);
|
|
}
|
|
#endif
|
|
|
|
err = U_ZERO_ERROR;
|
|
for (++argv; --argc; ++argv)
|
|
{
|
|
arg = getLongPathname(*argv);
|
|
|
|
/*produces the right destination path for display*/
|
|
if (destdirlen != 0)
|
|
{
|
|
const char *basename;
|
|
|
|
/* find the last file sepator */
|
|
basename = uprv_strrchr(arg, U_FILE_SEP_CHAR);
|
|
if (basename == NULL) {
|
|
basename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
|
|
if (basename == NULL) {
|
|
basename = arg;
|
|
} else {
|
|
++basename;
|
|
}
|
|
} else {
|
|
++basename;
|
|
}
|
|
|
|
uprv_strcpy(outBasename, basename);
|
|
}
|
|
else
|
|
{
|
|
uprv_strcpy(outFileName, arg);
|
|
}
|
|
|
|
/*removes the extension if any is found*/
|
|
dot = uprv_strrchr(outBasename, '.');
|
|
if (dot)
|
|
{
|
|
*dot = '\0';
|
|
}
|
|
|
|
/* the basename without extension is the converter name */
|
|
uprv_strcpy(cnvName, outBasename);
|
|
|
|
if(TOUCHFILE)
|
|
{
|
|
uprv_strcpy(touchFileName, outBasename);
|
|
uprv_strcat(touchFileName, ".cnv");
|
|
}
|
|
|
|
if(pkgName != NULL)
|
|
{
|
|
/* changes both baename and filename */
|
|
uprv_strcpy(outBasename, pkgName);
|
|
uprv_strcat(outBasename, "_");
|
|
uprv_strcat(outBasename, cnvName);
|
|
}
|
|
|
|
|
|
/*Adds the target extension*/
|
|
uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
|
|
|
|
#if DEBUG
|
|
printf("makeconv: processing %s ...\n", arg);
|
|
fflush(stdout);
|
|
#endif
|
|
localError = U_ZERO_ERROR;
|
|
mySharedData = createConverterFromTableFile(arg, &localError);
|
|
|
|
if (U_FAILURE(localError) || (mySharedData == NULL))
|
|
{
|
|
/* if an error is found, print out an error msg and keep going */
|
|
fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
|
|
u_errorName(localError));
|
|
if(U_SUCCESS(err)) {
|
|
err = localError;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Make the static data name equal to the file name */
|
|
if( /*VERBOSE && */ uprv_stricmp(cnvName,mySharedData->staticData->name))
|
|
{
|
|
fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
|
|
cnvName,
|
|
CONVERTER_FILE_EXTENSION,
|
|
mySharedData->staticData->name);
|
|
}
|
|
|
|
uprv_strcpy((char*)mySharedData->staticData->name, cnvName);
|
|
|
|
if(!uprv_isInvariantString((char*)mySharedData->staticData->name, -1)) {
|
|
fprintf(stderr,
|
|
"Error: A converter name must contain only invariant characters.\n"
|
|
"%s is not a valid converter name.\n",
|
|
mySharedData->staticData->name);
|
|
if(U_SUCCESS(err)) {
|
|
err = U_INVALID_TABLE_FORMAT;
|
|
}
|
|
}
|
|
|
|
if(pkgName == NULL)
|
|
{
|
|
uprv_strcpy(cnvNameWithPkg, cnvName);
|
|
}
|
|
else
|
|
{
|
|
uprv_strcpy(cnvNameWithPkg, pkgName);
|
|
uprv_strcat(cnvNameWithPkg, "_");
|
|
uprv_strcat(cnvNameWithPkg, cnvName);
|
|
}
|
|
|
|
localError = U_ZERO_ERROR;
|
|
writeConverterData(mySharedData, cnvNameWithPkg, destdir, &localError);
|
|
((NewConverter *)mySharedData->table)->close((NewConverter *)mySharedData->table);
|
|
if(TOUCHFILE)
|
|
{
|
|
FileStream *q;
|
|
char msg[1024];
|
|
|
|
sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n",
|
|
cnvName, pkgName);
|
|
|
|
q = T_FileStream_open(touchFileName, "w");
|
|
if(q == NULL)
|
|
{
|
|
fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName);
|
|
localError = U_FILE_ACCESS_ERROR;
|
|
}
|
|
|
|
else
|
|
{
|
|
T_FileStream_write(q, msg, uprv_strlen(msg));
|
|
T_FileStream_close(q);
|
|
}
|
|
}
|
|
|
|
/* write the information data */
|
|
uprv_free((UConverterStaticData *)mySharedData->staticData);
|
|
uprv_free(mySharedData);
|
|
|
|
if(U_FAILURE(localError))
|
|
{
|
|
/* if an error is found, print out an error msg and keep going*/
|
|
fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
|
|
u_errorName(localError));
|
|
if(U_SUCCESS(err)) {
|
|
err = localError;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
puts(outFileName);
|
|
}
|
|
}
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
|
|
if( (name[0]=='i' || name[0]=='I') &&
|
|
(name[1]=='b' || name[1]=='B') &&
|
|
(name[2]=='m' || name[2]=='M')
|
|
) {
|
|
name+=3;
|
|
if(*name=='-') {
|
|
++name;
|
|
}
|
|
*pPlatform=UCNV_IBM;
|
|
*pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
|
|
} else {
|
|
*pPlatform=UCNV_UNKNOWN;
|
|
*pCCSID=0;
|
|
}
|
|
}
|
|
|
|
/*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/
|
|
void readHeaderFromFile(UConverterSharedData* mySharedData,
|
|
FileStream* convFile,
|
|
const char* converterName,
|
|
UErrorCode *pErrorCode)
|
|
{
|
|
char line[200];
|
|
char *s, *end, *key, *value;
|
|
UConverterStaticData *staticData;
|
|
char c;
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
return;
|
|
}
|
|
|
|
staticData=(UConverterStaticData *)mySharedData->staticData;
|
|
staticData->conversionType=UCNV_UNSUPPORTED_CONVERTER;
|
|
staticData->platform=UCNV_IBM;
|
|
staticData->subCharLen=0;
|
|
|
|
while(T_FileStream_readLine(convFile, line, sizeof(line))) {
|
|
/* remove comments and trailing CR and LF and remove whitespace from the end */
|
|
for(end=line; (c=*end)!=0; ++end) {
|
|
if(c=='#' || c=='\r' || c=='\n') {
|
|
break;
|
|
}
|
|
}
|
|
while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) {
|
|
--end;
|
|
}
|
|
*end=0;
|
|
|
|
/* skip leading white space and ignore empty lines */
|
|
s=(char *)skipWhitespace(line);
|
|
if(*s==0) {
|
|
continue;
|
|
}
|
|
|
|
/* stop at the beginning of the mapping section */
|
|
if(uprv_memcmp(s, "CHARMAP", 7)==0) {
|
|
break;
|
|
}
|
|
|
|
/* get the key name, bracketed in <> */
|
|
if(*s!='<') {
|
|
fprintf(stderr, "error: no header field <key> in line \"%s\"\n", line);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
key=++s;
|
|
while(*s!='>') {
|
|
if(*s==0) {
|
|
fprintf(stderr, "error: incomplete header field <key> in line \"%s\"\n", line);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
++s;
|
|
}
|
|
*s=0;
|
|
|
|
/* get the value string, possibly quoted */
|
|
s=(char *)skipWhitespace(s+1);
|
|
if(*s!='"') {
|
|
value=s;
|
|
} else {
|
|
/* remove the quotes */
|
|
value=s+1;
|
|
if(end>value && *(end-1)=='"') {
|
|
*--end=0;
|
|
}
|
|
}
|
|
|
|
/* collect the information from the header field, ignore unknown keys */
|
|
if(uprv_strcmp(key, "code_set_name")==0) {
|
|
if(*value!=0) {
|
|
uprv_strcpy((char*)staticData->name, value);
|
|
getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
|
|
}
|
|
} else if(uprv_strcmp(key, "uconv_class")==0) {
|
|
const UConverterStaticData *prototype;
|
|
|
|
if(uprv_strcmp(value, "DBCS")==0) {
|
|
staticData->conversionType=UCNV_DBCS;
|
|
} else if(uprv_strcmp(value, "SBCS")==0) {
|
|
staticData->conversionType = UCNV_SBCS;
|
|
} else if(uprv_strcmp(value, "MBCS")==0) {
|
|
staticData->conversionType = UCNV_MBCS;
|
|
} else if(uprv_strcmp(value, "EBCDIC_STATEFUL")==0) {
|
|
staticData->conversionType = UCNV_EBCDIC_STATEFUL;
|
|
} else {
|
|
fprintf(stderr, "error: unknown <uconv_class> %s\n", value);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
|
|
/* Now that we know the type, copy any 'default' values from the table. */
|
|
prototype=ucnv_converterStaticData[staticData->conversionType];
|
|
if(prototype!=NULL) {
|
|
if(staticData->name[0]==0) {
|
|
uprv_strcpy((char*)staticData->name, prototype->name);
|
|
}
|
|
|
|
if(staticData->codepage==0) {
|
|
staticData->codepage = prototype->codepage;
|
|
}
|
|
|
|
if(staticData->platform==0) {
|
|
staticData->platform = prototype->platform;
|
|
}
|
|
|
|
if(staticData->minBytesPerChar==0) {
|
|
staticData->minBytesPerChar = prototype->minBytesPerChar;
|
|
}
|
|
|
|
if(staticData->maxBytesPerChar==0) {
|
|
staticData->maxBytesPerChar = prototype->maxBytesPerChar;
|
|
}
|
|
|
|
if(staticData->subCharLen==0) {
|
|
staticData->subCharLen=prototype->subCharLen;
|
|
if(prototype->subCharLen>0) {
|
|
uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
|
|
}
|
|
}
|
|
}
|
|
} else if(uprv_strcmp(key, "mb_cur_max")==0) {
|
|
if('1'<=*value && *value<='4' && value[1]==0) {
|
|
staticData->maxBytesPerChar=(int8_t)(*value-'0');
|
|
} else {
|
|
fprintf(stderr, "error: illegal <mb_cur_max> %s\n", value);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
} else if(uprv_strcmp(key, "mb_cur_min")==0) {
|
|
if('1'<=*value && *value<='4' && value[1]==0) {
|
|
staticData->minBytesPerChar=(int8_t)(*value-'0');
|
|
} else {
|
|
fprintf(stderr, "error: illegal <mb_cur_min> %s\n", value);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
} else if(uprv_strcmp(key, "subchar")==0) {
|
|
uint32_t bytes;
|
|
int32_t length;
|
|
|
|
length=parseCodepageBytes(value, &bytes, (const char **)&end);
|
|
if(length>0 && *end==0) {
|
|
staticData->subCharLen=(int8_t)length;
|
|
do {
|
|
staticData->subChar[--length]=(uint8_t)bytes;
|
|
bytes>>=8;
|
|
} while(length>0);
|
|
} else {
|
|
fprintf(stderr, "error: illegal <subchar> %s\n", value);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
} else if(uprv_strcmp(key, "subchar1")==0) {
|
|
uint32_t bytes;
|
|
|
|
if(1==parseCodepageBytes(value, &bytes, (const char **)&end) && *end==0) {
|
|
staticData->subChar1=(uint8_t)bytes;
|
|
} else {
|
|
fprintf(stderr, "error: illegal <subchar1> %s\n", value);
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
} else if(uprv_strcmp(key, "icu:state")==0) {
|
|
/* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */
|
|
switch(staticData->conversionType) {
|
|
case UCNV_SBCS:
|
|
case UCNV_DBCS:
|
|
case UCNV_EBCDIC_STATEFUL:
|
|
staticData->conversionType = UCNV_MBCS;
|
|
break;
|
|
case UCNV_MBCS:
|
|
break;
|
|
default:
|
|
fprintf(stderr, "error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n");
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
|
|
if(staticData->maxBytesPerChar==0) {
|
|
fprintf(stderr, "error: <icu:state> before the <mb_cur_max> line\n");
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
if(mySharedData->table==NULL) {
|
|
mySharedData->table=(UConverterTable *)MBCSOpen(staticData->maxBytesPerChar);
|
|
if(mySharedData->table==NULL) {
|
|
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
}
|
|
if(!MBCSAddState((NewConverter *)mySharedData->table, value)) {
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
} else if(staticData->conversionType==UCNV_MBCS && mySharedData->table==NULL) {
|
|
fprintf(stderr, "error: missing state table information (<icu:state>) for MBCS\n");
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
} else if(staticData->subChar1!=0 &&
|
|
!staticData->conversionType==UCNV_MBCS &&
|
|
!staticData->conversionType==UCNV_EBCDIC_STATEFUL
|
|
) {
|
|
fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
|
|
*pErrorCode=U_INVALID_TABLE_FORMAT;
|
|
}
|
|
}
|
|
|
|
void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err)
|
|
{
|
|
char storageLine[200];
|
|
char* line = NULL;
|
|
UConverterStaticData *staticData=(UConverterStaticData *)sharedData->staticData;
|
|
NewConverter *cnvData = (NewConverter *)sharedData->table;
|
|
UChar32 unicodeValue, codepageValue;
|
|
uint8_t mbcsBytes[8];
|
|
int32_t mbcsLength;
|
|
char codepointBytes[20];
|
|
UBool isOK = TRUE;
|
|
uint8_t precisionMask = 0, unicodeMask = 0;
|
|
char endOfLine;
|
|
|
|
if(cnvData->startMappings!=NULL)
|
|
{
|
|
if(!cnvData->startMappings(cnvData)) {
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if(cnvData->isValid!=NULL)
|
|
{
|
|
const uint8_t *p = staticData->subChar;
|
|
codepageValue = 0;
|
|
switch(staticData->subCharLen) {
|
|
case 4: codepageValue = (codepageValue << 8) | *p++;
|
|
case 3: codepageValue = (codepageValue << 8) | *p++;
|
|
case 2: codepageValue = (codepageValue << 8) | *p++;
|
|
case 1: codepageValue = (codepageValue << 8) | *p;
|
|
default: break; /* must never occur */
|
|
}
|
|
if(!cnvData->isValid(cnvData, staticData->subChar, staticData->subCharLen, codepageValue)) {
|
|
fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
isOK = FALSE;
|
|
}
|
|
}
|
|
|
|
staticData->hasFromUnicodeFallback = staticData->hasToUnicodeFallback = FALSE;
|
|
|
|
while (T_FileStream_readLine(convFile, storageLine, sizeof(storageLine)))
|
|
{
|
|
removeComments(storageLine);
|
|
line = storageLine;
|
|
if (line[nextTokenOffset(line, NLTC_SEPARATORS)] != '\0')
|
|
{
|
|
/* get the Unicode code point */
|
|
line = getToken(codepointBytes, line, UNICODE_CODEPOINT_SEPARATORS);
|
|
if (uprv_strcmp(codepointBytes, "END") == 0)
|
|
{
|
|
break;
|
|
}
|
|
unicodeValue = (UChar32)T_CString_stringToInteger(codepointBytes, 16);
|
|
|
|
/* get the codepage bytes */
|
|
codepageValue = 0;
|
|
mbcsLength = 0;
|
|
do
|
|
{
|
|
line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS);
|
|
mbcsBytes[mbcsLength] = (uint8_t)T_CString_stringToInteger(codepointBytes, 16);
|
|
codepageValue = codepageValue << 8 | mbcsBytes[mbcsLength++];
|
|
|
|
/* End of line could be \0 or | (if fallback) */
|
|
endOfLine= line[nextTokenOffset(line, CODEPOINT_SEPARATORS)];
|
|
} while((endOfLine != '\0') && (endOfLine != FALLBACK_SEPARATOR));
|
|
|
|
if(unicodeValue>=0x10000) {
|
|
unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */
|
|
} else if(UTF_IS_SURROGATE(unicodeValue)) {
|
|
unicodeMask|=UCNV_HAS_SURROGATES; /* there are single surrogates */
|
|
}
|
|
|
|
if((uint32_t)unicodeValue > 0x10ffff)
|
|
{
|
|
fprintf(stderr, "error: Unicode code point > U+10ffff in '%s'\n", storageLine);
|
|
isOK = FALSE;
|
|
}
|
|
else if(endOfLine == FALLBACK_SEPARATOR)
|
|
{
|
|
/* we know that there is a fallback separator */
|
|
precisionMask |= 1;
|
|
line = uprv_strchr(line, FALLBACK_SEPARATOR) + 1;
|
|
switch(*line)
|
|
{
|
|
case '0':
|
|
/* set roundtrip mappings */
|
|
isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0) &&
|
|
cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0);
|
|
break;
|
|
case '1':
|
|
/* set only a fallback mapping from Unicode to codepage */
|
|
staticData->hasFromUnicodeFallback = TRUE;
|
|
isOK &= cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
|
|
break;
|
|
case '2':
|
|
/* skip subchar mappings */
|
|
break;
|
|
case '3':
|
|
/* set only a fallback mapping from codepage to Unicode */
|
|
staticData->hasToUnicodeFallback = TRUE;
|
|
isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
|
|
break;
|
|
default:
|
|
fprintf(stderr, "error: illegal fallback indicator '%s' in '%s'\n", line - 1, storageLine);
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
precisionMask |= 2;
|
|
/* set the mappings */
|
|
isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1) &&
|
|
cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(unicodeMask == 3)
|
|
{
|
|
fprintf(stderr, "warning: contains mappings to both supplementary code points and single surrogates\n");
|
|
}
|
|
staticData->unicodeMask = unicodeMask;
|
|
|
|
if(cnvData->finishMappings!=NULL)
|
|
{
|
|
cnvData->finishMappings(cnvData, staticData);
|
|
}
|
|
|
|
if(!isOK)
|
|
{
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
}
|
|
else if(precisionMask == 3)
|
|
{
|
|
fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
}
|
|
}
|
|
|
|
/*creates a UConverterStaticData, fills in necessary links to it the appropriate function pointers*/
|
|
UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err)
|
|
{
|
|
FileStream* convFile = NULL;
|
|
UConverterSharedData* mySharedData = NULL;
|
|
UConverterStaticData* myStaticData = NULL;
|
|
|
|
if (U_FAILURE(*err)) return NULL;
|
|
|
|
convFile = T_FileStream_open(converterName, "r");
|
|
if (convFile == NULL)
|
|
{
|
|
*err = U_FILE_ACCESS_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
|
|
mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData));
|
|
if (mySharedData == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
T_FileStream_close(convFile);
|
|
return NULL;
|
|
}
|
|
|
|
uprv_memset(mySharedData, 0, sizeof(UConverterSharedData));
|
|
|
|
mySharedData->structSize = sizeof(UConverterSharedData);
|
|
|
|
myStaticData = (UConverterStaticData*) uprv_malloc(sizeof(UConverterStaticData));
|
|
if (myStaticData == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
T_FileStream_close(convFile);
|
|
return NULL;
|
|
}
|
|
uprv_memset(myStaticData, 0, sizeof(UConverterStaticData));
|
|
mySharedData->staticData = myStaticData;
|
|
myStaticData->structSize = sizeof(UConverterStaticData);
|
|
/* mySharedData->staticDataOwned = FALSE; */ /* not owned if in udata */
|
|
mySharedData->sharedDataCached = FALSE;
|
|
|
|
mySharedData->dataMemory = NULL; /* for init */
|
|
|
|
readHeaderFromFile(mySharedData, convFile, converterName, err);
|
|
|
|
if (U_FAILURE(*err)) return NULL;
|
|
|
|
switch (myStaticData->conversionType)
|
|
{
|
|
case UCNV_SBCS:
|
|
{
|
|
/* SBCS: use MBCS data structure with a default state table */
|
|
if(mySharedData->staticData->maxBytesPerChar!=1) {
|
|
fprintf(stderr, "error: SBCS codepage with max bytes/char!=1\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
break;
|
|
}
|
|
myStaticData->conversionType = UCNV_MBCS;
|
|
if(mySharedData->table == NULL) {
|
|
NewConverter *sharedDataTable = MBCSOpen(1);
|
|
if(sharedDataTable != NULL) {
|
|
if(!MBCSAddState(sharedDataTable, "0-ff")) {
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
sharedDataTable->close(sharedDataTable);
|
|
} else {
|
|
mySharedData->table = (UConverterTable *)sharedDataTable;
|
|
}
|
|
} else {
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case UCNV_MBCS:
|
|
{
|
|
/* MBCSOpen() was called by readHeaderFromFile() */
|
|
break;
|
|
}
|
|
case UCNV_EBCDIC_STATEFUL:
|
|
{
|
|
/* EBCDIC_STATEFUL: use MBCS data structure with a default state table */
|
|
if(mySharedData->staticData->maxBytesPerChar!=2) {
|
|
fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
break;
|
|
}
|
|
myStaticData->conversionType = UCNV_MBCS;
|
|
if(mySharedData->table == NULL) {
|
|
NewConverter *sharedDataTable = MBCSOpen(2);
|
|
if(sharedDataTable != NULL) {
|
|
if( !MBCSAddState(sharedDataTable, "0-ff, e:1.s, f:0.s") ||
|
|
!MBCSAddState(sharedDataTable, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4") ||
|
|
!MBCSAddState(sharedDataTable, "0-40:1.i, 41-fe:1., ff:1.i") ||
|
|
!MBCSAddState(sharedDataTable, "0-ff:1.i, 40:1.") ||
|
|
!MBCSAddState(sharedDataTable, "0-ff:1.i")
|
|
) {
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
sharedDataTable->close(sharedDataTable);
|
|
} else {
|
|
mySharedData->table = (UConverterTable *)sharedDataTable;
|
|
}
|
|
} else {
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case UCNV_DBCS:
|
|
{
|
|
/* DBCS: use MBCS data structure with a default state table */
|
|
if(mySharedData->staticData->maxBytesPerChar!=2) {
|
|
fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
break;
|
|
}
|
|
myStaticData->conversionType = UCNV_MBCS;
|
|
if(mySharedData->table == NULL) {
|
|
NewConverter *sharedDataTable = MBCSOpen(2);
|
|
if(sharedDataTable != NULL) {
|
|
if( !MBCSAddState(sharedDataTable, "0-3f:3, 40:2, 41-fe:1, ff:3") ||
|
|
!MBCSAddState(sharedDataTable, "41-fe") ||
|
|
!MBCSAddState(sharedDataTable, "40") ||
|
|
!MBCSAddState(sharedDataTable, "")
|
|
) {
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
sharedDataTable->close(sharedDataTable);
|
|
} else {
|
|
mySharedData->table = (UConverterTable *)sharedDataTable;
|
|
}
|
|
} else {
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
default :
|
|
fprintf(stderr, "error: <uconv_class> omitted\n");
|
|
*err = U_INVALID_TABLE_FORMAT;
|
|
mySharedData->table = NULL;
|
|
break;
|
|
};
|
|
|
|
if(U_SUCCESS(*err) && mySharedData->table != NULL)
|
|
{
|
|
loadTableFromFile(convFile, mySharedData, err);
|
|
}
|
|
|
|
T_FileStream_close(convFile);
|
|
|
|
return mySharedData;
|
|
}
|
|
|
|
/*
|
|
* Hey, Emacs, please set the following:
|
|
*
|
|
* Local Variables:
|
|
* indent-tabs-mode: nil
|
|
* End:
|
|
*
|
|
*/
|