ICU-157 make common, memory-mappable file

X-SVN-Rev: 212
This commit is contained in:
Markus Scherer 1999-11-22 17:56:30 +00:00
parent 968971f95f
commit 17e50b1b8f
3 changed files with 388 additions and 0 deletions

View File

@ -0,0 +1,281 @@
/*
*******************************************************************************
* *
* COPYRIGHT: *
* (C) Copyright International Business Machines Corporation, 1999 *
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
* US Government Users Restricted Rights - Use, duplication, or disclosure *
* restricted by GSA ADP Schedule Contract with IBM Corp. *
* *
*******************************************************************************
* file name: gencmn.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999nov01
* created by: Markus W. Scherer
*
* This program reads a list of data files and combines them
* into one common, memory-mappable file.
*/
#include <stdio.h>
#include <stdlib.h>
#include "utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "../toolutil/toolutil.h"
#include "../toolutil/unewdata.h"
#define STRING_STORE_SIZE 100000
#define MAX_FILE_COUNT 2000
#define COMMON_DATA_NAME "icudata"
#define DATA_TYPE "dat"
#define DATA_COPYRIGHT "\n" \
"*******************************************************************************\n" \
"* COPYRIGHT: *\n" \
"* (C) Copyright International Business Machines Corporation, 1999 *\n" \
"* Licensed Material - Program-Property of IBM - All Rights Reserved. *\n" \
"* US Government Users Restricted Rights - Use, duplication, or disclosure *\n" \
"* restricted by GSA ADP Schedule Contract with IBM Corp. *\n" \
"*******************************************************************************\n"
/* UDataInfo cf. udata.h */
static const UDataInfo dataInfo={
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
sizeof(UChar),
0,
0x43, 0x6d, 0x6e, 0x44, /* dataFormat="CmnD" */
1, 0, 0, 0, /* formatVersion */
3, 0, 0, 0 /* dataVersion */
};
static uint32_t maxSize;
static char stringStore[STRING_STORE_SIZE];
static uint32_t stringTop=0, basenameTotal=0;
typedef struct {
char *pathname, *basename;
uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
} File;
static File files[MAX_FILE_COUNT];
static uint32_t fileCount=0;
/* prototypes --------------------------------------------------------------- */
static void
addFile(const char *filename);
static char *
allocString(uint32_t length);
static int
compareFiles(const void *file1, const void *file2);
/* -------------------------------------------------------------------------- */
extern int
main(int argc, char *argv[]) {
static uint8_t buffer[4096];
char line[512];
FileStream *in, *file;
UNewDataMemory *out;
char *s;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t i, fileOffset, basenameOffset, length;
if(argc<=1) {
fprintf(stderr,
"usage: %s maxsize [list-filename]\n"
"\tread the list file (default: stdin) and \n"
"\tcreate " COMMON_DATA_NAME "." DATA_TYPE " from all the files listed but each not larger than maxsize\n",
argv[0]);
}
if(argc<2) {
return U_ILLEGAL_ARGUMENT_ERROR;
}
maxSize=icu_strtoul(argv[1], NULL, 0);
if(maxSize==0) {
fprintf(stderr, "gencmn: maxSize %s not valid\n", argv[1]);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
if(argc==2) {
in=T_FileStream_stdin();
} else {
in=T_FileStream_open(argv[2], "r");
if(in==NULL) {
fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
exit(U_FILE_ACCESS_ERROR);
}
}
/* read the list of files and get their lengths */
while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
/* remove trailing newline characters */
s=line;
while(*s!=0) {
if(*s=='\r' || *s=='\n') {
*s=0;
break;
}
++s;
}
addFile(getLongPathname(line));
}
if(in!=T_FileStream_stdin()) {
T_FileStream_close(in);
}
/* sort the files by basename */
qsort(files, fileCount, sizeof(File), compareFiles);
/* determine the offsets of all basenames and files in this common one */
basenameOffset=4+8*fileCount;
fileOffset=basenameOffset+(basenameTotal+15)&~0xf;
for(i=0; i<fileCount; ++i) {
files[i].fileOffset=fileOffset;
fileOffset+=(files[i].fileSize+15)&~0xf;
files[i].basenameOffset=basenameOffset;
basenameOffset+=files[i].basenameLength;
}
/* create the output file */
out=udata_create(DATA_TYPE, COMMON_DATA_NAME, &dataInfo, DATA_COPYRIGHT, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencmn: unable to open output file - error %s\n", errorName(errorCode));
exit(errorCode);
}
/* write the table of contents */
udata_write32(out, fileCount);
for(i=0; i<fileCount; ++i) {
udata_write32(out, files[i].basenameOffset);
udata_write32(out, files[i].fileOffset);
}
/* write the basenames */
for(i=0; i<fileCount; ++i) {
udata_writeString(out, files[i].basename, files[i].basenameLength);
}
length=4+8*fileCount+basenameTotal;
/* copy the files */
for(i=0; i<fileCount; ++i) {
/* pad to 16-align the next file */
length&=0xf;
if(length!=0) {
length=16-length;
icu_memset(buffer, 0, length);
udata_writeBlock(out, buffer, length);
}
/* copy the next file */
file=T_FileStream_open(files[i].pathname, "rb");
if(file==NULL) {
fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
exit(U_FILE_ACCESS_ERROR);
}
for(;;) {
length=T_FileStream_read(file, buffer, sizeof(buffer));
if(length==0) {
break;
}
udata_writeBlock(out, buffer, length);
}
T_FileStream_close(file);
length=files[i].fileSize;
}
/* finish */
udata_finish(out, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencmn: error finishing output file - %s\n", errorName(errorCode));
exit(errorCode);
}
return 0;
}
static void
addFile(const char *filename) {
FileStream *file;
char *s;
uint32_t length;
if(fileCount==MAX_FILE_COUNT) {
fprintf(stderr, "gencmn: too many files\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
/* try to open the file */
file=T_FileStream_open(filename, "rb");
if(file==NULL) {
fprintf(stderr, "gencmn: unable to open listed file %s\n", filename);
exit(U_FILE_ACCESS_ERROR);
}
/* get the file length */
length=T_FileStream_size(file);
if(T_FileStream_error(file) || length<=20) {
fprintf(stderr, "gencmn: unable to get length of listed file %s\n", filename);
exit(U_FILE_ACCESS_ERROR);
}
T_FileStream_close(file);
/* do not add files that are longer than maxSize */
if(length>maxSize) {
return;
}
files[fileCount].fileSize=length;
/* store the pathname */
length=icu_strlen(filename)+1;
s=allocString(length);
icu_memcpy(s, filename, length);
files[fileCount].pathname=s;
/* get the basename */
s=(char *)findBasename(s);
files[fileCount].basename=s;
length=icu_strlen(s)+1;
files[fileCount].basenameLength=length;
basenameTotal+=length;
++fileCount;
}
static char *
allocString(uint32_t length) {
uint32_t top=stringTop+length;
char *p;
if(top>STRING_STORE_SIZE) {
fprintf(stderr, "gencmn: out of memory\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
p=stringStore+stringTop;
stringTop=top;
return p;
}
static int
compareFiles(const void *file1, const void *file2) {
/* sort by basename */
return icu_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
}

View File

@ -0,0 +1,102 @@
# Microsoft Developer Studio Project File - Name="gencmn" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=gencmn - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "gencmn.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "gencmn.mak" CFG="gencmn - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "gencmn - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "gencmn - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "gencmn - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /Za /W3 /GX /O2 /I "..\..\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 toolutil.lib icuuc.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release"
!ELSEIF "$(CFG)" == "gencmn - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
# ADD CPP /nologo /Za /W3 /Gm /GX /ZI /Od /I "..\..\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 toolutil.lib icuuc.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug"
!ENDIF
# Begin Target
# Name "gencmn - Win32 Release"
# Name "gencmn - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=.\gencmn.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# End Group
# Begin Group "Resource Files"
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

View File

@ -37,6 +37,11 @@ echo unames_dat.obj>>mkdll.tmp
type mkdll.lk>>mkdll.tmp
link @mkdll.tmp
echo create the common, memory-mappable file
del "%ICU_DATA%icudata.dat"
echo %ICU_DATA%unames.dat>mkmap.tmp
gencmn\%toolversion%\gencmn 1000000 mkmap.tmp
goto :end
:error