diff --git a/NEWS b/NEWS index 939c1ef9..079483a3 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ v0.4.4 Fixed : high compression modes for Windows 32 bits -new : external dictionary API extended to buffered mode +new : external dictionary API extended to buffered mode and accessible through command line new : windows DLL project, thanks to Christophe Chevalier v0.4.3 : diff --git a/programs/Makefile b/programs/Makefile index b116fc6b..822f2d24 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -94,8 +94,8 @@ zstd-noBench: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \ zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY) $(CC) $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT) -zstd-frugal: clean - CFLAGS=-Os $(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0 +zstd-frugal: clean + $(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0 fullbench : $(ZSTD_FILES) \ datagen.c fullbench.c diff --git a/programs/fileio.c b/programs/fileio.c index 27c72c57..3ee5faee 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -121,6 +121,7 @@ #define CACHELINE 64 +#define MAX_DICT_SIZE (512 KB) /* ************************************* * Macros @@ -235,12 +236,13 @@ static U64 FIO_getFileSize(const char* infilename) } -unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, int cLevel) +unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, + const char* dictFileName, int cLevel) { U64 filesize = 0; U64 compressedfilesize = 0; - BYTE* inBuff; - BYTE* outBuff; + U64 dictSize = 0; + BYTE* inBuff, *outBuff, *dictBuff=NULL; size_t inBuffSize = ZBUFF_recommendedCInSize(); size_t outBuffSize = ZBUFF_recommendedCOutSize(); FILE* finput; @@ -252,16 +254,43 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* ctx = ZBUFF_createCCtx(); inBuff = (BYTE*)malloc(inBuffSize); outBuff = (BYTE*)malloc(outBuffSize); - if (!inBuff || !outBuff || !ctx) EXM_THROW(21, "Allocation error : not enough memory"); + if (!inBuff || !outBuff || !ctx) EXM_THROW(20, "Allocation error : not enough memory"); + + /* dictionary */ + if (dictFileName) + { + FILE* dictHandle; + size_t read; + DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); + dictHandle = fopen(dictFileName, "rb"); + if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName); + dictSize = FIO_getFileSize(dictFileName); + if (dictSize > MAX_DICT_SIZE) + { + int seekResult; + if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ + DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); + seekResult = fseek(dictHandle, dictSize-MAX_DICT_SIZE, SEEK_SET); /* use end of file */ + if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); + dictSize = MAX_DICT_SIZE; + } + dictBuff = (BYTE*)malloc(dictSize); + if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff"); + read = fread(dictBuff, 1, (size_t)dictSize, dictHandle); + if (read!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName); + fclose(dictHandle); + } /* init */ FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); - filesize = FIO_getFileSize(input_filename); + filesize = FIO_getFileSize(input_filename) + dictSize; errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize)); if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression"); - filesize = 0; + errorCode = ZBUFF_compressWithDictionary(ctx, dictBuff, dictSize); + if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing dictionary"); /* Main compression loop */ + filesize = 0; while (1) { size_t inSize; @@ -311,6 +340,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* /* clean */ free(inBuff); free(outBuff); + free(dictBuff); ZBUFF_freeCCtx(ctx); fclose(finput); if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename); @@ -322,6 +352,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded, BYTE* outBuff, size_t outBuffSize, + BYTE* dictBuff, size_t dictSize, ZBUFF_DCtx* dctx) { U64 frameSize = 0; @@ -329,6 +360,7 @@ unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, /* Main decompression Loop */ ZBUFF_decompressInit(dctx); + ZBUFF_decompressWithDictionary(dctx, dictBuff, dictSize); while (1) { /* Decode */ @@ -359,16 +391,42 @@ unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, } -unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename) +unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename, const char* dictFileName) { FILE* finput, *foutput; BYTE* inBuff=NULL; size_t inBuffSize = ZBUFF_recommendedDInSize(); BYTE* outBuff=NULL; size_t outBuffSize = ZBUFF_recommendedDOutSize(); + BYTE* dictBuff=NULL; + size_t dictSize = 0; U64 filesize = 0; size_t toRead; + /* dictionary */ + if (dictFileName) + { + FILE* dictHandle; + size_t read; + DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); + dictHandle = fopen(dictFileName, "rb"); + if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName); + dictSize = FIO_getFileSize(dictFileName); + if (dictSize > MAX_DICT_SIZE) + { + int seekResult; + if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ + DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); + seekResult = fseek(dictHandle, dictSize-MAX_DICT_SIZE, SEEK_SET); /* use end of file */ + if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); + dictSize = MAX_DICT_SIZE; + } + dictBuff = (BYTE*)malloc(dictSize); + if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff"); + read = fread(dictBuff, 1, (size_t)dictSize, dictHandle); + if (read!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName); + fclose(dictHandle); + } /* Init */ ZBUFF_DCtx* dctx = ZBUFF_createDCtx(); @@ -396,7 +454,11 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha } #endif /* ZSTD_LEGACY_SUPPORT */ - filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, toRead, outBuff, outBuffSize, dctx); + filesize += FIO_decompressFrame(foutput, finput, + inBuff, inBuffSize, toRead, + outBuff, outBuffSize, + dictBuff, dictSize, + dctx); } DISPLAYLEVEL(2, "\r%79s\r", ""); @@ -405,6 +467,7 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha /* clean */ free(inBuff); free(outBuff); + free(dictBuff); ZBUFF_freeDCtx(dctx); fclose(finput); if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename); diff --git a/programs/fileio.h b/programs/fileio.h index 037c819e..0f508787 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -52,8 +52,8 @@ void FIO_setNotificationLevel(unsigned level); /* ************************************* * Stream/File functions ***************************************/ -unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename, int compressionLevel); -unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename); +unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName, int compressionLevel); +unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName); /** FIO_compressFilename : @result : size of compressed file diff --git a/programs/playTests.sh b/programs/playTests.sh index 56327ff7..74993739 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -42,6 +42,13 @@ echo "**** flush write error test **** " echo foo | $ZSTD > /dev/full && die "write error not detected!" echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!" +echo "*** dictionary tests *** " + +./datagen > tmpDict +./datagen -g1M | md5sum > tmp1 +./datagen -g1M | ./zstd -D tmpDict | ./zstd -D tmpDict -dv | md5sum > tmp2 +diff -q tmp1 tmp2 + echo "**** zstd round-trip tests **** " roundTripTest diff --git a/programs/zstdcli.c b/programs/zstdcli.c index ef30f813..e04e40fd 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,8 +118,7 @@ static int usage(const char* programName) DISPLAY( "input : a filename\n"); DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); DISPLAY( "Arguments :\n"); - DISPLAY( " -1 : Fast compression (default) \n"); - DISPLAY( " -19 : High compression \n"); + DISPLAY( " -# : # compression level (1-19, default:1) \n"); DISPLAY( " -d : decompression (default for %s extension)\n", ZSTD_EXTENSION); //DISPLAY( " -z : force compression\n"); DISPLAY( " -f : overwrite output without prompting \n"); @@ -137,6 +136,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -v : verbose mode\n"); DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); + DISPLAY( " -D file: use file content as Dictionary \n"); //DISPLAY( " -t : test compressed file integrity\n"); #ifndef ZSTD_NOBENCH DISPLAY( "Benchmark arguments :\n"); @@ -171,11 +171,13 @@ int main(int argCount, const char** argv) bench=0, decode=0, forceStdout=0, - main_pause=0; + main_pause=0, + nextEntryIsDictionary=0; unsigned cLevel = 1; const char* programName = argv[0]; const char* inFileName = NULL; const char* outFileName = NULL; + const char* dictFileName = NULL; char* dynNameSpace = NULL; const char extension[] = ZSTD_EXTENSION; unsigned fileNameStart = 0; @@ -249,8 +251,11 @@ int main(int argCount, const char** argv) /* Force stdout, even if stdout==console */ case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break; - // Test - //case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break; + /* Use file content as dictionary */ + case 'D': nextEntryIsDictionary = 1; argument++; break; + + /* Test -- not implemented */ + /* case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break; */ /* Overwrite */ case 'f': FIO_overwriteMode(); argument++; break; @@ -261,7 +266,7 @@ int main(int argCount, const char** argv) /* Quiet mode */ case 'q': displayLevel--; argument++; break; - /* keep source file (default anyway, so useless; only for xz/lzma compatibility) */ + /* keep source file (default anyway, so useless; for gzip/xz compatibility) */ case 'k': argument++; break; #ifndef ZSTD_NOBENCH @@ -310,6 +315,14 @@ int main(int argCount, const char** argv) continue; } + /* dictionary */ + if (nextEntryIsDictionary) + { + nextEntryIsDictionary = 0; + dictFileName = argument; + continue; + } + /* first provided filename is input */ if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argCount-i; continue; } @@ -381,9 +394,9 @@ int main(int argCount, const char** argv) /* IO Stream/File */ FIO_setNotificationLevel(displayLevel); if (decode) - FIO_decompressFilename(outFileName, inFileName); + FIO_decompressFilename(outFileName, inFileName, dictFileName); else - FIO_compressFilename(outFileName, inFileName, cLevel); + FIO_compressFilename(outFileName, inFileName, dictFileName, cLevel); _end: if (main_pause) waitEnter();