2003-08-08 23:39:34 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2004-01-15 00:23:07 +00:00
|
|
|
* Copyright (C) 2003-2004, International Business Machines
|
2003-08-08 23:39:34 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: icuswap.cpp
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2003aug08
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* This tool takes an ICU data file and "swaps" it, that is, changes its
|
|
|
|
* platform properties between big-/little-endianness and ASCII/EBCDIC charset
|
|
|
|
* families.
|
|
|
|
* The modified data file is written to a new file.
|
|
|
|
* Useful as an install-time tool for shipping only one flavor of ICU data
|
|
|
|
* and preparing data files for the target platform.
|
|
|
|
* Will not work with data DLLs (shared libraries).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/udata.h"
|
2003-09-18 21:33:37 +00:00
|
|
|
#include "cmemory.h"
|
|
|
|
#include "cstring.h"
|
|
|
|
#include "uarrsort.h"
|
|
|
|
#include "ucmndata.h"
|
2003-08-08 23:39:34 +00:00
|
|
|
#include "udataswp.h"
|
2004-01-15 00:23:07 +00:00
|
|
|
#include "toolutil.h"
|
2003-08-12 12:23:56 +00:00
|
|
|
#include "uoptions.h"
|
|
|
|
|
|
|
|
/* swapping implementations in common */
|
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
#include "uresdata.h"
|
2003-08-12 00:26:07 +00:00
|
|
|
#include "ucnv_io.h"
|
2003-08-12 12:23:56 +00:00
|
|
|
#include "uprops.h"
|
2003-09-12 02:46:15 +00:00
|
|
|
#include "ucol_swp.h"
|
2003-09-22 13:57:19 +00:00
|
|
|
#include "ucnv_bld.h"
|
2003-09-22 21:57:14 +00:00
|
|
|
#include "unormimp.h"
|
2003-09-22 22:51:37 +00:00
|
|
|
#include "sprpimpl.h"
|
2003-09-23 21:06:24 +00:00
|
|
|
#include "propname.h"
|
2003-09-29 17:24:15 +00:00
|
|
|
#include "rbbidata.h"
|
2003-08-12 12:23:56 +00:00
|
|
|
|
|
|
|
/* swapping implementations in i18n */
|
|
|
|
|
|
|
|
/* definitions */
|
2003-08-08 23:39:34 +00:00
|
|
|
|
|
|
|
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
|
|
|
|
|
|
|
static UOption options[]={
|
|
|
|
UOPTION_HELP_H,
|
|
|
|
UOPTION_HELP_QUESTION_MARK,
|
|
|
|
UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
OPT_HELP_H,
|
|
|
|
OPT_HELP_QUESTION_MARK,
|
|
|
|
OPT_OUT_TYPE
|
|
|
|
};
|
|
|
|
|
|
|
|
static int32_t
|
|
|
|
fileSize(FILE *f) {
|
|
|
|
int32_t size;
|
|
|
|
|
|
|
|
fseek(f, 0, SEEK_END);
|
|
|
|
size=(int32_t)ftell(f);
|
|
|
|
fseek(f, 0, SEEK_SET);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identifies and then transforms the ICU data piece in-place, or determines
|
|
|
|
* its length. See UDataSwapFn.
|
|
|
|
* This function handles .dat data packages as well as single data pieces
|
|
|
|
* and internally dispatches to per-type swap functions.
|
|
|
|
* Sets a U_UNSUPPORTED_ERROR if the data format is not recognized.
|
|
|
|
*
|
|
|
|
* @see UDataSwapFn
|
|
|
|
* @see udata_openSwapper
|
|
|
|
* @see udata_openSwapperForInputData
|
|
|
|
* @draft ICU 2.8
|
|
|
|
*/
|
|
|
|
static int32_t
|
|
|
|
udata_swap(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode);
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
/**
|
|
|
|
* Swap an ICU .dat package, including swapping of enclosed items.
|
|
|
|
*/
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
udata_swapPackage(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode);
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
/*
|
|
|
|
* udata_swapPackage() needs to rename ToC name entries from the old package
|
|
|
|
* name to the new one.
|
|
|
|
* We store the filenames here, and udata_swapPackage() will extract the
|
|
|
|
* package names.
|
|
|
|
*/
|
|
|
|
static const char *inFilename, *outFilename;
|
|
|
|
|
2004-01-06 21:19:44 +00:00
|
|
|
U_CDECL_BEGIN
|
2003-08-09 00:27:31 +00:00
|
|
|
static void U_CALLCONV
|
|
|
|
printError(void *context, const char *fmt, va_list args) {
|
|
|
|
vfprintf((FILE *)context, fmt, args);
|
|
|
|
}
|
2004-01-06 21:19:44 +00:00
|
|
|
U_CDECL_END
|
2003-08-09 00:27:31 +00:00
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
static int
|
|
|
|
printUsage(const char *pname, UBool ishelp) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
|
|
|
|
ishelp ? 'U' : 'u', pname);
|
|
|
|
if(ishelp) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"\nOptions: -h, -?, --help print this message and exit\n"
|
|
|
|
" Read the input file, swap its platform properties according\n"
|
|
|
|
" to the -t or --type option, and write the result to the output file.\n"
|
|
|
|
" -tl change to little-endian/ASCII charset family\n"
|
2003-08-28 00:33:35 +00:00
|
|
|
" -tb change to big-endian/ASCII charset family\n"
|
2004-01-15 00:23:07 +00:00
|
|
|
" -te change to big-endian/EBCDIC charset family\n");
|
2003-08-08 23:39:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return !ishelp;
|
|
|
|
}
|
|
|
|
|
|
|
|
extern int
|
|
|
|
main(int argc, char *argv[]) {
|
|
|
|
FILE *in, *out;
|
|
|
|
const char *pname;
|
|
|
|
char *data;
|
|
|
|
int32_t length;
|
|
|
|
UBool ishelp;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
UDataSwapper *ds;
|
|
|
|
UErrorCode errorCode;
|
|
|
|
uint8_t outCharset;
|
|
|
|
UBool outIsBigEndian;
|
|
|
|
|
|
|
|
U_MAIN_INIT_ARGS(argc, argv);
|
|
|
|
|
|
|
|
/* get the program basename */
|
2003-08-09 00:27:31 +00:00
|
|
|
pname=strrchr(argv[0], U_FILE_SEP_CHAR);
|
2003-08-08 23:39:34 +00:00
|
|
|
if(pname==NULL) {
|
2003-08-09 00:27:31 +00:00
|
|
|
pname=strrchr(argv[0], '/');
|
2003-08-08 23:39:34 +00:00
|
|
|
}
|
|
|
|
if(pname!=NULL) {
|
|
|
|
++pname;
|
|
|
|
} else {
|
|
|
|
pname=argv[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
|
|
|
|
ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
|
|
|
|
if(ishelp || argc!=3) {
|
|
|
|
return printUsage(pname, ishelp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse the output type option */
|
|
|
|
data=(char *)options[OPT_OUT_TYPE].value;
|
|
|
|
if(data[0]==0 || data[1]!=0) {
|
|
|
|
/* the type must be exactly one letter */
|
|
|
|
return printUsage(pname, FALSE);
|
|
|
|
}
|
|
|
|
switch(data[0]) {
|
|
|
|
case 'l':
|
|
|
|
outIsBigEndian=FALSE;
|
|
|
|
outCharset=U_ASCII_FAMILY;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
outIsBigEndian=TRUE;
|
|
|
|
outCharset=U_ASCII_FAMILY;
|
|
|
|
break;
|
|
|
|
case 'e':
|
|
|
|
outIsBigEndian=TRUE;
|
|
|
|
outCharset=U_EBCDIC_FAMILY;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return printUsage(pname, FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
in=out=NULL;
|
|
|
|
data=NULL;
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
/* udata_swapPackage() needs the filenames */
|
|
|
|
inFilename=argv[1];
|
|
|
|
outFilename=argv[2];
|
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
/* open the input file, get its length, allocate memory for it, read the file */
|
|
|
|
in=fopen(argv[1], "rb");
|
|
|
|
if(in==NULL) {
|
|
|
|
fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
|
|
|
|
rc=2;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
length=fileSize(in);
|
|
|
|
if(length<=0) {
|
|
|
|
fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
|
|
|
|
rc=2;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
/*
|
|
|
|
* +15: udata_swapPackage() may need to add a few padding bytes to the
|
|
|
|
* last item if charset swapping is done,
|
|
|
|
* because the last item may be resorted into the middle and then needs
|
|
|
|
* additional padding bytes
|
|
|
|
*/
|
|
|
|
data=(char *)malloc(length+15);
|
2003-08-08 23:39:34 +00:00
|
|
|
if(data==NULL) {
|
|
|
|
fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
|
|
|
|
rc=2;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
/* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
|
|
|
|
uprv_memset(data+length-15, 0xaa, 15);
|
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
if(length!=(int32_t)fread(data, 1, length, in)) {
|
|
|
|
fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
|
|
|
|
rc=3;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(in);
|
|
|
|
in=NULL;
|
|
|
|
|
|
|
|
/* swap the data in-place */
|
|
|
|
errorCode=U_ZERO_ERROR;
|
|
|
|
ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
|
|
|
|
pname, argv[1], u_errorName(errorCode));
|
|
|
|
rc=4;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2003-08-09 00:27:31 +00:00
|
|
|
ds->printError=printError;
|
|
|
|
ds->printErrorContext=stderr;
|
2003-08-08 23:39:34 +00:00
|
|
|
|
|
|
|
length=udata_swap(ds, data, length, data, &errorCode);
|
|
|
|
udata_closeSwapper(ds);
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
|
|
|
|
pname, argv[1], u_errorName(errorCode));
|
|
|
|
rc=4;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
out=fopen(argv[2], "wb");
|
|
|
|
if(out==NULL) {
|
|
|
|
fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
|
|
|
|
rc=5;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(length!=(int32_t)fwrite(data, 1, length, out)) {
|
|
|
|
fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
|
|
|
|
rc=6;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(out);
|
|
|
|
out=NULL;
|
|
|
|
|
|
|
|
/* all done */
|
|
|
|
rc=0;
|
|
|
|
|
|
|
|
done:
|
|
|
|
if(in!=NULL) {
|
|
|
|
fclose(in);
|
|
|
|
}
|
|
|
|
if(out!=NULL) {
|
|
|
|
fclose(out);
|
|
|
|
}
|
|
|
|
if(data!=NULL) {
|
|
|
|
free(data);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* swap the data ------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static const struct {
|
|
|
|
uint8_t dataFormat[4];
|
|
|
|
UDataSwapFn *swapFn;
|
|
|
|
} swapFns[]={
|
2003-08-12 00:26:07 +00:00
|
|
|
{ { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */
|
2003-09-22 13:57:19 +00:00
|
|
|
#if !UCONFIG_NO_LEGACY_CONVERSION
|
|
|
|
{ { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */
|
2003-09-16 00:30:57 +00:00
|
|
|
{ { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */
|
2003-09-22 13:57:19 +00:00
|
|
|
#endif
|
2003-09-18 21:33:37 +00:00
|
|
|
{ { 0x43, 0x6d, 0x6e, 0x44 }, udata_swapPackage }, /* dataFormat="CmnD" */
|
2003-09-22 22:51:37 +00:00
|
|
|
#if !UCONFIG_NO_IDNA
|
|
|
|
{ { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */
|
|
|
|
#endif
|
2003-08-12 00:26:07 +00:00
|
|
|
/* insert data formats here, descending by expected frequency of occurrence */
|
2003-08-12 12:23:56 +00:00
|
|
|
{ { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */
|
2003-09-22 21:57:14 +00:00
|
|
|
#if !UCONFIG_NO_NORMALIZATION
|
|
|
|
{ { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
|
|
|
|
#endif
|
2003-09-22 13:57:19 +00:00
|
|
|
#if !UCONFIG_NO_COLLATION
|
2003-09-12 18:49:09 +00:00
|
|
|
{ { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
|
|
|
|
{ { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
|
2003-09-29 17:24:15 +00:00
|
|
|
#endif
|
|
|
|
#if !UCONFIG_NO_BREAK_ITERATION
|
|
|
|
{ { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */
|
2003-09-22 13:57:19 +00:00
|
|
|
#endif
|
2003-09-23 21:06:24 +00:00
|
|
|
{ { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
|
2003-09-16 00:30:57 +00:00
|
|
|
{ { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */
|
2003-08-08 23:39:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static int32_t
|
|
|
|
udata_swap(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode) {
|
2003-10-12 17:37:11 +00:00
|
|
|
char dataFormatChars[4];
|
2003-08-08 23:39:34 +00:00
|
|
|
const UDataInfo *pInfo;
|
2003-10-12 17:37:11 +00:00
|
|
|
int32_t headerSize, i, swappedLength;
|
2003-08-08 23:39:34 +00:00
|
|
|
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Preflight the header first; checks for illegal arguments, too.
|
|
|
|
* Do not swap the header right away because the format-specific swapper
|
|
|
|
* will swap it, get the headerSize again, and also use the header
|
|
|
|
* information. Otherwise we would have to pass some of the information
|
|
|
|
* and not be able to use the UDataSwapFn signature.
|
|
|
|
*/
|
|
|
|
headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we wanted udata_swap() to also handle non-loadable data like a UTrie,
|
|
|
|
* then we could check here for further known magic values and structures.
|
|
|
|
*/
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0; /* the data format was not recognized */
|
|
|
|
}
|
|
|
|
|
|
|
|
pInfo=(const UDataInfo *)((const char *)inData+4);
|
2003-10-12 17:37:11 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
/* convert the data format from ASCII to Unicode to the system charset */
|
|
|
|
UChar u[4]={
|
|
|
|
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
|
|
|
pInfo->dataFormat[2], pInfo->dataFormat[3]
|
|
|
|
};
|
|
|
|
|
|
|
|
if(uprv_isInvariantUString(u, 4)) {
|
|
|
|
u_UCharsToChars(u, dataFormatChars, 4);
|
|
|
|
} else {
|
|
|
|
dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* dispatch to the swap function for the dataFormat */
|
2003-08-08 23:39:34 +00:00
|
|
|
for(i=0; i<LENGTHOF(swapFns); ++i) {
|
|
|
|
if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
|
2003-10-12 17:37:11 +00:00
|
|
|
swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
|
2003-09-18 21:33:37 +00:00
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
2003-10-12 17:37:11 +00:00
|
|
|
udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
|
|
|
|
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
|
|
|
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
|
|
|
dataFormatChars[0], dataFormatChars[1],
|
|
|
|
dataFormatChars[2], dataFormatChars[3],
|
|
|
|
u_errorName(*pErrorCode));
|
|
|
|
} else if(swappedLength<(length-15)) {
|
|
|
|
/* swapped less than expected */
|
|
|
|
udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
|
|
|
|
swappedLength, length,
|
|
|
|
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
|
|
|
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
|
|
|
dataFormatChars[0], dataFormatChars[1],
|
|
|
|
dataFormatChars[2], dataFormatChars[3],
|
|
|
|
u_errorName(*pErrorCode));
|
2003-09-18 21:33:37 +00:00
|
|
|
}
|
|
|
|
|
2003-10-12 17:37:11 +00:00
|
|
|
return swappedLength;
|
2003-08-08 23:39:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the dataFormat was not recognized */
|
2003-10-12 17:37:11 +00:00
|
|
|
udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
|
|
|
|
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
|
|
|
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
|
|
|
dataFormatChars[0], dataFormatChars[1],
|
|
|
|
dataFormatChars[2], dataFormatChars[3]);
|
2003-08-28 00:33:35 +00:00
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
*pErrorCode=U_UNSUPPORTED_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
/* swap .dat package files -------------------------------------------------- */
|
|
|
|
|
|
|
|
static int32_t
|
|
|
|
extractPackageName(const UDataSwapper *ds, const char *filename,
|
|
|
|
char pkg[], int32_t capacity,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const char *basename;
|
|
|
|
int32_t len;
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
basename=findBasename(filename);
|
|
|
|
len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
|
|
|
|
|
|
|
|
if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
|
|
|
|
basename);
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(len>=capacity) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
|
|
|
|
(long)capacity);
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uprv_memcpy(pkg, basename, len);
|
|
|
|
pkg[len]=0;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
struct ToCEntry {
|
|
|
|
uint32_t nameOffset, inOffset, outOffset, length;
|
|
|
|
};
|
|
|
|
|
2004-01-06 21:19:44 +00:00
|
|
|
U_CDECL_BEGIN
|
2004-01-10 00:34:53 +00:00
|
|
|
static int32_t U_CALLCONV
|
2003-09-18 21:33:37 +00:00
|
|
|
compareToCEntries(const void *context, const void *left, const void *right) {
|
|
|
|
const char *chars=(const char *)context;
|
|
|
|
return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
|
|
|
|
chars+((const ToCEntry *)right)->nameOffset);
|
|
|
|
}
|
2004-01-06 21:19:44 +00:00
|
|
|
U_CDECL_END
|
2003-09-18 21:33:37 +00:00
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
udata_swapPackage(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode) {
|
|
|
|
const UDataInfo *pInfo;
|
|
|
|
int32_t headerSize;
|
|
|
|
|
|
|
|
const uint8_t *inBytes;
|
|
|
|
uint8_t *outBytes;
|
|
|
|
|
|
|
|
uint32_t itemCount, offset, i;
|
|
|
|
int32_t itemLength;
|
|
|
|
|
|
|
|
const UDataOffsetTOCEntry *inEntries;
|
|
|
|
UDataOffsetTOCEntry *outEntries;
|
|
|
|
|
|
|
|
ToCEntry *table;
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
char inPkgName[32], outPkgName[32];
|
|
|
|
int32_t inPkgNameLength, outPkgNameLength;
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
/* udata_swapDataHeader checks the arguments */
|
|
|
|
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check data format and format version */
|
|
|
|
pInfo=(const UDataInfo *)((const char *)inData+4);
|
|
|
|
if(!(
|
|
|
|
pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
|
|
|
|
pInfo->dataFormat[1]==0x6d &&
|
|
|
|
pInfo->dataFormat[2]==0x6e &&
|
|
|
|
pInfo->dataFormat[3]==0x44 &&
|
|
|
|
pInfo->formatVersion[0]==1
|
|
|
|
)) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
|
|
|
|
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
|
|
|
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
|
|
|
pInfo->formatVersion[0]);
|
|
|
|
*pErrorCode=U_UNSUPPORTED_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
/*
|
|
|
|
* We need to change the ToC name entries so that they have the correct
|
|
|
|
* package name prefix.
|
|
|
|
* Extract the package names from the in/out filenames.
|
|
|
|
*/
|
|
|
|
inPkgNameLength=extractPackageName(
|
|
|
|
ds, inFilename,
|
|
|
|
inPkgName, (int32_t)sizeof(inPkgName),
|
|
|
|
pErrorCode);
|
|
|
|
outPkgNameLength=extractPackageName(
|
|
|
|
ds, outFilename,
|
|
|
|
outPkgName, (int32_t)sizeof(outPkgName),
|
|
|
|
pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is possible to work with inPkgNameLength!=outPkgNameLength,
|
|
|
|
* but then the length of the data file would change more significantly,
|
|
|
|
* which we are not currently prepared for.
|
|
|
|
*/
|
|
|
|
if(inPkgNameLength!=outPkgNameLength) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
|
|
|
|
inPkgName, outPkgName);
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
inBytes=(const uint8_t *)inData+headerSize;
|
|
|
|
inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
|
|
|
|
|
|
|
|
if(length<0) {
|
|
|
|
/* preflighting */
|
|
|
|
itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
|
|
|
|
if(itemCount==0) {
|
|
|
|
/* no items: count only the item count and return */
|
|
|
|
return headerSize+4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read the last item's offset and preflight it */
|
|
|
|
offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
|
|
|
|
itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
|
|
|
|
|
|
|
|
if(U_SUCCESS(*pErrorCode)) {
|
|
|
|
return headerSize+offset+(uint32_t)itemLength;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* check that the itemCount fits, then the ToC table, then at least the header of the last item */
|
|
|
|
length-=headerSize;
|
|
|
|
if(length<4) {
|
|
|
|
/* itemCount does not fit */
|
|
|
|
offset=0xffffffff;
|
|
|
|
itemCount=0; /* make compilers happy */
|
|
|
|
} else {
|
|
|
|
itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
|
|
|
|
if(itemCount==0) {
|
|
|
|
offset=4;
|
|
|
|
} else if((uint32_t)length<(4+8*itemCount)) {
|
|
|
|
/* ToC table does not fit */
|
|
|
|
offset=0xffffffff;
|
|
|
|
} else {
|
|
|
|
/* offset of the last item plus at least 20 bytes for its header */
|
|
|
|
offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if((uint32_t)length<offset) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n",
|
|
|
|
length);
|
|
|
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
outBytes=(uint8_t *)outData+headerSize;
|
|
|
|
|
|
|
|
/* swap the item count */
|
|
|
|
ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
|
|
|
|
|
|
|
|
if(itemCount==0) {
|
|
|
|
/* no items: just return now */
|
|
|
|
return headerSize+4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* swap the item name strings */
|
|
|
|
offset=4+8*itemCount;
|
|
|
|
itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
|
|
|
|
udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* keep offset and itemLength in case we allocate and copy the strings below */
|
|
|
|
|
2004-01-15 00:23:07 +00:00
|
|
|
/* swap the package names into the output charset */
|
|
|
|
if(ds->outCharset!=U_CHARSET_FAMILY) {
|
|
|
|
UDataSwapper *ds2;
|
|
|
|
ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
|
|
|
|
ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
|
|
|
|
ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
|
|
|
|
udata_closeSwapper(ds2);
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* change the prefix of each ToC entry name from the old to the new package name */
|
|
|
|
{
|
|
|
|
char *entryName;
|
|
|
|
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
|
|
|
|
|
|
|
|
if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
|
|
|
|
uprv_memcpy(entryName, outPkgName, inPkgNameLength);
|
|
|
|
} else {
|
|
|
|
udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
|
|
|
|
(long)i);
|
|
|
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-09-18 21:33:37 +00:00
|
|
|
/*
|
|
|
|
* Allocate the ToC table and, if necessary, a temporary buffer for
|
|
|
|
* pseudo-in-place swapping.
|
|
|
|
*
|
|
|
|
* We cannot swap in-place because:
|
|
|
|
*
|
|
|
|
* 1. If the swapping of an item fails mid-way, then in-place swapping
|
|
|
|
* has destroyed its data.
|
|
|
|
* Out-of-place swapping allows us to then copy its original data.
|
|
|
|
*
|
|
|
|
* 2. If swapping changes the charset family, then we must resort
|
|
|
|
* not only the ToC table but also the data items themselves.
|
|
|
|
* This requires a permutation and is best done with separate in/out
|
|
|
|
* buffers.
|
|
|
|
*
|
|
|
|
* We swapped the strings above to avoid the malloc below if string swapping fails.
|
|
|
|
*/
|
|
|
|
if(inData==outData) {
|
|
|
|
/* +15: prepare for extra padding of a newly-last item */
|
|
|
|
table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15);
|
|
|
|
if(table!=NULL) {
|
|
|
|
outBytes=(uint8_t *)(table+itemCount);
|
|
|
|
|
|
|
|
/* copy the item count and the swapped strings */
|
|
|
|
uprv_memcpy(outBytes, inBytes, 4);
|
|
|
|
uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
|
|
|
|
}
|
|
|
|
if(table==NULL) {
|
|
|
|
udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
|
|
|
|
inData==outData ?
|
|
|
|
itemCount*sizeof(ToCEntry)+length+15 :
|
|
|
|
itemCount*sizeof(ToCEntry));
|
|
|
|
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
|
|
|
|
|
|
|
|
/* read the ToC table */
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
|
|
|
|
table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
|
|
|
|
if(i>0) {
|
|
|
|
table[i-1].length=table[i].inOffset-table[i-1].inOffset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
|
|
|
|
|
|
|
|
if(ds->inCharset==ds->outCharset) {
|
|
|
|
/* no charset swapping, no resorting: keep item offsets the same */
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
table[i].outOffset=table[i].inOffset;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* charset swapping: resort items by their swapped names */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before the actual sorting, we need to make sure that each item
|
|
|
|
* has a length that is a multiple of 16 bytes so that all items
|
|
|
|
* are 16-aligned.
|
|
|
|
* Only the old last item may be missing up to 15 padding bytes.
|
|
|
|
* Add padding bytes for it.
|
|
|
|
* Since the icuswap main() function has already allocated enough
|
|
|
|
* input buffer space and set the last 15 bytes there to 0xaa,
|
|
|
|
* we only need to increase the total data length and the length
|
|
|
|
* of the last item here.
|
|
|
|
*/
|
|
|
|
if((length&0xf)!=0) {
|
|
|
|
int32_t delta=16-(length&0xf);
|
|
|
|
length+=delta;
|
|
|
|
table[itemCount-1].length+=(uint32_t)delta;
|
|
|
|
}
|
|
|
|
|
|
|
|
uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
|
|
|
|
compareToCEntries, outBytes, FALSE, pErrorCode);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: Before sorting, the inOffset values were in order.
|
|
|
|
* Now the outOffset values are in order.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* assign outOffset values */
|
|
|
|
offset=table[0].inOffset;
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
table[i].outOffset=offset;
|
|
|
|
offset+=table[i].length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* write the output ToC table */
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
|
|
|
|
ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* swap each data item */
|
|
|
|
for(i=0; i<itemCount; ++i) {
|
|
|
|
/* first copy the item bytes to make sure that unreachable bytes are copied */
|
|
|
|
uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
|
|
|
|
|
|
|
|
/* swap the item */
|
|
|
|
udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
|
|
|
|
outBytes+table[i].outOffset, pErrorCode);
|
|
|
|
|
|
|
|
if(U_FAILURE(*pErrorCode)) {
|
|
|
|
if(ds->outCharset==U_CHARSET_FAMILY) {
|
|
|
|
udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
|
|
|
|
" at inOffset 0x%x length 0x%x - %s\n"
|
|
|
|
" the data item will be copied, not swapped\n\n",
|
|
|
|
(char *)outBytes+table[i].nameOffset,
|
|
|
|
table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
|
|
|
|
} else {
|
|
|
|
udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
|
|
|
|
" at inOffset 0x%x length 0x%x - %s\n"
|
|
|
|
" the data item will be copied, not swapped\n\n",
|
|
|
|
table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
|
|
|
|
}
|
|
|
|
/* reset the error code, copy the data item, and continue */
|
|
|
|
*pErrorCode=U_ZERO_ERROR;
|
|
|
|
uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(inData==outData) {
|
|
|
|
/* copy the data from the temporary buffer to the in-place buffer */
|
|
|
|
uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
|
|
|
|
}
|
|
|
|
uprv_free(table);
|
|
|
|
|
|
|
|
return headerSize+length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-08-08 23:39:34 +00:00
|
|
|
/*
|
|
|
|
* Hey, Emacs, please set the following:
|
|
|
|
*
|
|
|
|
* Local Variables:
|
|
|
|
* indent-tabs-mode: nil
|
|
|
|
* End:
|
|
|
|
*
|
|
|
|
*/
|