From ac8bace6b14f0e484cb1d3d0c364ade49dfe13f1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 7 Sep 2016 14:54:23 +0200 Subject: [PATCH 1/9] support large skippable frames --- lib/compress/zstd_compress.c | 3 +-- programs/fileio.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9f5ff403..f832e081 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -122,13 +122,12 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface * } -#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } - /** ZSTD_checkParams() : ensure param values remain within authorized range. @return : 0, or an error code if one value is beyond authorized range */ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) { +# define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); diff --git a/programs/fileio.c b/programs/fileio.c index b7b201e0..1023009e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -20,7 +20,7 @@ ***************************************/ #ifndef ZSTD_LEGACY_SUPPORT /* LEGACY_SUPPORT : - * decompressor can decode older formats (starting from Zstd 0.1+) */ + * decompressor can decode older formats (starting from zstd 0.1+) */ # define ZSTD_LEGACY_SUPPORT 1 #endif @@ -613,22 +613,22 @@ unsigned long long FIO_decompressFrame(dRess_t ress, while (1) { ZSTD_inBuffer inBuff = { ress.srcBuffer, readSize, 0 }; ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 }; - size_t const toRead = ZSTD_decompressStream(ress.dctx, &outBuff, &inBuff ); - if (ZSTD_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZSTD_getErrorName(toRead)); + size_t const readSizeHint = ZSTD_decompressStream(ress.dctx, &outBuff, &inBuff ); + if (ZSTD_isError(readSizeHint)) EXM_THROW(36, "Decoding error : %s", ZSTD_getErrorName(readSizeHint)); /* Write block */ storedSkips = FIO_fwriteSparse(foutput, ress.dstBuffer, outBuff.pos, storedSkips); frameSize += outBuff.pos; DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(frameSize>>20) ); - if (toRead == 0) break; /* end of frame */ + if (readSizeHint == 0) break; /* end of frame */ if (inBuff.size != inBuff.pos) EXM_THROW(37, "Decoding error : should consume entire input"); /* Fill input buffer */ - if (toRead > ress.srcBufferSize) EXM_THROW(38, "too large block"); - readSize = fread(ress.srcBuffer, 1, toRead, finput); - if (readSize == 0) EXM_THROW(39, "Read error : premature end"); - } + { size_t const toRead = MIN(readSizeHint, ress.srcBufferSize); /* support large skippable frames */ + readSize = fread(ress.srcBuffer, 1, toRead, finput); + if (readSize < toRead) EXM_THROW(39, "Read error : premature end"); + } } FIO_fwriteSparseEnd(foutput, storedSkips); @@ -686,7 +686,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName) if (readSomething==0) { DISPLAY("zstd: %s: unexpected end of file \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */ break; /* no more input */ } - readSomething = 1; + readSomething = 1; /* there is at least >= 4 bytes in srcFile */ if (sizeCheck != toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */ { U32 const magic = MEM_readLE32(ress.srcBuffer); if (((magic & 0xFFFFFFF0U) != ZSTD_MAGIC_SKIPPABLE_START) & (magic != ZSTD_MAGICNUMBER) From e48fbb9f4c0c8c7f9e2d5ed051840b3dc2b0c2be Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 7 Sep 2016 14:39:32 -0700 Subject: [PATCH 2/9] Specify that dictionary ID is little-endian --- zstd_compression_format.md | 1 + 1 file changed, 1 insertion(+) diff --git a/zstd_compression_format.md b/zstd_compression_format.md index 9d27f6cd..bc4c5ffc 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -301,6 +301,7 @@ This is a variable size field, which contains the ID of the dictionary required to properly decode the frame. Note that this field is optional. When it's not present, it's up to the caller to make sure it uses the correct dictionary. +Format is little-endian. Field size depends on `Dictionary_ID_flag`. 1 byte can represent an ID 0-255. From 01c199226afe82279f8381cbf4441246dd78c461 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 8 Sep 2016 19:29:04 +0200 Subject: [PATCH 3/9] updated decompression streaming example --- examples/Makefile | 6 +++--- examples/streaming_decompression.c | 26 +++++++++++++++++++------- lib/zstd.h | 4 ++-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index a9d60877..fa7328fb 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -55,8 +55,8 @@ streaming_decompression : streaming_decompression.c clean: @rm -f core *.o tmp* result* *.zst \ simple_compression simple_decompression \ - dictionary_compression dictionary_decompression \ - streaming_compression streaming_decompression + dictionary_compression dictionary_decompression \ + streaming_compression streaming_decompression @echo Cleaning completed test: all @@ -64,7 +64,7 @@ test: all @echo starting simple compression ./simple_compression tmp ./simple_decompression tmp.zst - ./streaming_decompression tmp.zst + ./streaming_decompression tmp.zst > /dev/null @echo starting streaming compression ./streaming_compression tmp ./streaming_decompression tmp.zst diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index d4dfacb2..62c78026 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -42,6 +42,16 @@ static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) exit(4); } +static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) +{ + size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); + if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ + /* error */ + perror("fwrite"); + exit(5); +} + + static size_t fclose_orDie(FILE* file) { if (!fclose(file)) return 0; @@ -54,28 +64,30 @@ static size_t fclose_orDie(FILE* file) static void decompressFile_orDie(const char* fname) { FILE* const fin = fopen_orDie(fname, "rb"); - size_t const buffInSize = ZSTD_DStreamInSize();; + size_t const buffInSize = ZSTD_DStreamInSize(); void* const buffIn = malloc_orDie(buffInSize); - size_t const buffOutSize = ZSTD_DStreamOutSize();; + size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ void* const buffOut = malloc_orDie(buffOutSize); - size_t read, toRead = buffInSize; + FILE* const fout = stdout; ZSTD_DStream* const dstream = ZSTD_createDStream(); if (dstream==NULL) { fprintf(stderr, "ZSTD_createDStream() error \n"); exit(10); } size_t const initResult = ZSTD_initDStream(dstream); if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initDStream() error \n"); exit(11); } + size_t toRead = initResult; - while( (read = fread_orDie(buffIn, toRead, fin)) ) { + size_t read; + while ( (read = fread_orDie(buffIn, toRead, fin)) ) { ZSTD_inBuffer input = { buffIn, read, 0 }; while (input.pos < input.size) { ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - toRead = ZSTD_decompressStream(dstream, &output , &input); - /* note : data is just "sinked" into buffOut - a more complete example would write it to disk or stdout */ + toRead = ZSTD_decompressStream(dstream, &output , &input); /* toRead : size of next compressed block */ + fwrite_orDie(buffOut, output.pos, fout); } } fclose_orDie(fin); + fclose_orDie(fout); free(buffIn); free(buffOut); } diff --git a/lib/zstd.h b/lib/zstd.h index e05fc293..10312fe8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -229,7 +229,7 @@ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); @@ -268,7 +268,7 @@ ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 264c733ad6eb7e0095d901009459f47358247f0d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 8 Sep 2016 19:39:00 +0200 Subject: [PATCH 4/9] clarified tests --- examples/Makefile | 2 +- examples/README.md | 13 +++++++++++++ examples/streaming_decompression.c | 8 +++----- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index fa7328fb..f568bc00 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -67,7 +67,7 @@ test: all ./streaming_decompression tmp.zst > /dev/null @echo starting streaming compression ./streaming_compression tmp - ./streaming_decompression tmp.zst + ./streaming_decompression tmp.zst > /dev/null @echo starting dictionary compression ./dictionary_compression tmp README.md ./dictionary_decompression tmp.zst README.md diff --git a/examples/README.md b/examples/README.md index 2f460388..d00fa0d7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,6 +7,8 @@ Zstandard library : usage examples - [Simple decompression](simple_decompression.c) Decompress a single file compressed by zstd. + Only compatible with simple compression. + Result remains in memory. Introduces usage of : `ZSTD_decompress()` - [Dictionary compression](dictionary_compression.c) @@ -15,4 +17,15 @@ Zstandard library : usage examples - [Dictionary decompression](dictionary_decompression.c) Decompress multiple files using the same dictionary. + Result remains in memory. Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()` + +- [Streaming compression](streaming_compression.c) + Compress a single file. + Introduces usage of : `ZSTD_compressStream()` + +- [Streaming decompression](streaming_decompression.c) + Decompress a single file compressed by zstd. + Compatible with simple and streaming compression. + Result is sent to stdout. + Introduces usage of : `ZSTD_decompressStream()` diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 62c78026..2966ec6e 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -99,14 +99,12 @@ int main(int argc, const char** argv) const char* const inFilename = argv[1]; if (argc!=2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE\n", exeName); + fprintf(stderr, "wrong arguments\n"); + fprintf(stderr, "usage:\n"); + fprintf(stderr, "%s FILE\n", exeName); return 1; } decompressFile_orDie(inFilename); - printf("%s correctly decoded (in memory). \n", inFilename); - return 0; } From 75ba29b1174d7d9821c5954104aadcc6a3676f87 Mon Sep 17 00:00:00 2001 From: codeshef Date: Fri, 9 Sep 2016 02:23:29 +0530 Subject: [PATCH 5/9] modification in line51 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fa59de83..2c8e707e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ For a larger picture including very slow modes, [click on this link](images/DCsp Previous charts provide results applicable to typical file and stream scenarios (several MB). Small data comes with different perspectives. The smaller the amount of data to compress, the more difficult it is to achieve any significant compression. -This problem is common to any compression algorithm. The reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new file, there is no "past" to build upon. +This problem is common to many compression algorithms. The reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new file, there is no "past" to build upon. To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data, by providing it with a few samples. The result of the training is stored in a file called "dictionary", which can be loaded before compression and decompression. Using this dictionary, the compression ratio achievable on small data improves dramatically: From b94fcc8d8a9a5723bd050925c36bd904ed17f7ca Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 8 Sep 2016 19:48:04 +0200 Subject: [PATCH 6/9] clarified doc --- examples/README.md | 30 +++++++++++++++--------------- examples/streaming_decompression.c | 7 +++---- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/examples/README.md b/examples/README.md index d00fa0d7..ba132f6c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,31 +1,31 @@ Zstandard library : usage examples ================================== -- [Simple compression](simple_compression.c) +- [Simple compression](simple_compression.c) : Compress a single file. Introduces usage of : `ZSTD_compress()` -- [Simple decompression](simple_decompression.c) - Decompress a single file compressed by zstd. +- [Simple decompression](simple_decompression.c) : + Decompress a single file. Only compatible with simple compression. Result remains in memory. Introduces usage of : `ZSTD_decompress()` -- [Dictionary compression](dictionary_compression.c) - Compress multiple files using the same dictionary. - Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()` - -- [Dictionary decompression](dictionary_decompression.c) - Decompress multiple files using the same dictionary. - Result remains in memory. - Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()` - -- [Streaming compression](streaming_compression.c) +- [Streaming compression](streaming_compression.c) : Compress a single file. Introduces usage of : `ZSTD_compressStream()` -- [Streaming decompression](streaming_decompression.c) +- [Streaming decompression](streaming_decompression.c) : Decompress a single file compressed by zstd. - Compatible with simple and streaming compression. + Compatible with both simple and streaming compression. Result is sent to stdout. Introduces usage of : `ZSTD_decompressStream()` + +- [Dictionary compression](dictionary_compression.c) : + Compress multiple files using the same dictionary. + Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()` + +- [Dictionary decompression](dictionary_decompression.c) : + Decompress multiple files using the same dictionary. + Result remains in memory. + Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()` diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 2966ec6e..4c9d2209 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -66,17 +66,16 @@ static void decompressFile_orDie(const char* fname) FILE* const fin = fopen_orDie(fname, "rb"); size_t const buffInSize = ZSTD_DStreamInSize(); void* const buffIn = malloc_orDie(buffInSize); + FILE* const fout = stdout; size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ void* const buffOut = malloc_orDie(buffOutSize); - FILE* const fout = stdout; ZSTD_DStream* const dstream = ZSTD_createDStream(); if (dstream==NULL) { fprintf(stderr, "ZSTD_createDStream() error \n"); exit(10); } size_t const initResult = ZSTD_initDStream(dstream); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initDStream() error \n"); exit(11); } - size_t toRead = initResult; + if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initDStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - size_t read; + size_t read, toRead = initResult; while ( (read = fread_orDie(buffIn, toRead, fin)) ) { ZSTD_inBuffer input = { buffIn, read, 0 }; while (input.pos < input.size) { From b3060f7a9ea3555c6045606be58dddc86bbb099b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 9 Sep 2016 16:44:16 +0200 Subject: [PATCH 7/9] changed streaming decoder behavior : now, when all compressed frame is consumed, it means decompression is completed, with regenerated data fully flushed. --- lib/decompress/zstd_decompress.c | 29 ++++++++++++++++++++++------- lib/zstd.h | 10 ++++------ tests/playTests.sh | 2 ++ tests/zstreamtest.c | 10 +++++----- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6acb259b..c6bb5329 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1286,6 +1286,7 @@ struct ZSTD_DStream_s { void* legacyContext; U32 previousLegacyVersion; U32 legacyVersion; + U32 hostageByte; }; /* typedef'd to ZSTD_DStream within "zstd.h" */ @@ -1349,6 +1350,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di zds->dictSize = dictSize; } zds->legacyVersion = 0; + zds->hostageByte = 0; return ZSTD_frameHeaderSize_prefix; } @@ -1371,11 +1373,11 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) { - return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->zd) + zds->inBuffSize + zds->outBuffSize; + return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->zd) + zds->inBuffSize + zds->outBuffSize + zds->dictSize; } -/* *** Decompression *** */ +/* ***** Decompression ***** */ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { @@ -1445,7 +1447,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_unsupported); - /* Frame header instruct buffer sizes */ + /* Adapt buffer sizes to frame header instructions */ { size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); size_t const neededOutSize = zds->fParams.windowSize + blockSize; zds->blockSize = blockSize; @@ -1479,7 +1481,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (ZSTD_isError(decodedSize)) return decodedSize; ip += neededInSize; if (!decodedSize && !isSkipFrame) break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; + zds->outEnd = zds->outStart + decodedSize; zds->stage = zdss_flush; break; } @@ -1522,7 +1524,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->outStart = zds->outEnd = 0; break; } - /* cannot flush everything */ + /* cannot complete flush */ someMoreWork = 0; break; } @@ -1533,8 +1535,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB input->pos += (size_t)(ip-istart); output->pos += (size_t)(op-ostart); { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->zd); - if (!nextSrcSizeHint) return (zds->outEnd != zds->outStart); /* return 0 only if fully flushed too */ - nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->zd) == ZSTDnit_block); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { zds->stage = zdss_read; return 1; } /* can't release hostage (not present) */ + input->pos++; /* release hostage */ + } + return 0; + } + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->zd) == ZSTDnit_block); /* preload header of next block */ if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */ nextSrcSizeHint -= zds->inPos; /* already loaded*/ return nextSrcSizeHint; diff --git a/lib/zstd.h b/lib/zstd.h index 10312fe8..5cc40c63 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -252,15 +252,13 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * * Use ZSTD_decompressStream() repetitively to consume your input. * The function will update both `pos` fields. -* If `input.pos < input.size`, some input is not consumed. +* If `input.pos < input.size`, some input has not been consumed. * It's up to the caller to present again remaining data. -* If `output.pos == output.size`, there is probably some more data to flush, still stored inside internal buffers. +* If `output.pos < output.size`, decoder has flushed everything it could. * @return : 0 when a frame is completely decoded and fully flushed, * an error code, which can be tested using ZSTD_isError(), -* any value > 0, which means there is still some work to do to complete the frame. -* In general, the return value is a suggested next input size (merely a hint, to help latency). -* 1 is a special value, which means either "there is still some data to flush", or "need 1 more byte as input". -* In which case, start by flushing. When flush is completed, if return value is still `1`, it means "need 1 more byte". +* any other value > 0, which means there is still some work to do to complete the frame. +* The return value is a suggested next input size (just an hint, to help latency). * *******************************************************************************/ typedef struct ZSTD_DStream_s ZSTD_DStream; diff --git a/tests/playTests.sh b/tests/playTests.sh index 64b3fd95..21e98bf9 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -45,7 +45,9 @@ file $ZSTD $ECHO "\n**** simple tests **** " ./datagen > tmp +$ECHO "test : basic compression " $ZSTD -f tmp # trivial compression case, creates tmp.zst +$ECHO "test : basic decompression" $ZSTD -df tmp.zst # trivial decompression case (overwrites tmp) $ECHO "test : too large compression level (must fail)" $ZSTD -99 -f tmp # too large compression level, automatic sized down diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 30536387..97fbaa18 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -58,7 +58,7 @@ static U32 g_displayLevel = 2; if ((FUZ_GetClockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \ if (g_displayLevel>=4) fflush(stdout); } } -static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6; static clock_t g_displayClock = 0; static clock_t g_clockTime = 0; @@ -118,8 +118,7 @@ static void freeFunction(void* opaque, void* address) static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem) { - int testResult = 0; - size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; void* CNBuffer = malloc(CNBufferSize); size_t const skippableFrameSize = 11; size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); @@ -127,6 +126,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo size_t const decodedBufferSize = CNBufferSize; void* decodedBuffer = malloc(decodedBufferSize); size_t cSize; + int testResult = 0; U32 testNb=0; ZSTD_CStream* zc = ZSTD_createCStream_advanced(customMem); ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem); @@ -437,7 +437,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; maxTestSize = FUZ_rLogLength(&lseed, testLog); - dictSize = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; /* random dictionary selection */ { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); dict = srcBuffer + dictStart; @@ -446,7 +446,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; { size_t const initError = ZSTD_initCStream_advanced(zc, dict, dictSize, params, 0); - CHECK (ZSTD_isError(initError),"init error : %s", ZSTD_getErrorName(initError)); + CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); } } } /* multi-segments compression test */ From 7b0c261623707e20dd8f896b046928e865181cf5 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Fri, 9 Sep 2016 19:02:40 +0200 Subject: [PATCH 8/9] Smallish typo fixes in format documentation --- zstd_compression_format.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zstd_compression_format.md b/zstd_compression_format.md index bc4c5ffc..b14f5553 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -732,7 +732,7 @@ This size is deducted from `blockSize - literalSectionSize`. #### `Sequences_Section_Header` -Consists in 2 items : +Consists of 2 items: - `Number_of_Sequences` - Symbol compression modes @@ -873,7 +873,7 @@ and can be translated into an `Offset_Value` using the following formulas : Offset_Value = (1 << offsetCode) + readNBits(offsetCode); if (Offset_Value > 3) offset = Offset_Value - 3; ``` -It means that maximum `Offset_Value` is `2^(N+1))-1` and it supports back-reference distance up to `2^(N+1))-4` +It means that maximum `Offset_Value` is `(2^(N+1))-1` and it supports back-reference distance up to `(2^(N+1))-4` but is limited by [maximum back-reference distance](#window_descriptor). `Offset_Value` from 1 to 3 are special : they define "repeat codes", @@ -894,7 +894,7 @@ If any sequence in the compressed block requires an offset larger than this, it's not possible to use the default distribution to represent it. ``` -short offsetCodes_defaultDistribution[53] = +short offsetCodes_defaultDistribution[29] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; ``` From a2664649df3a7c450ee8f6299df29713e1081b21 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 9 Sep 2016 19:33:56 +0200 Subject: [PATCH 9/9] better error handling --- Makefile | 4 ++-- examples/Makefile | 5 +++-- examples/dictionary_compression.c | 10 ++++++---- examples/dictionary_decompression.c | 4 ++-- examples/simple_compression.c | 4 ++-- examples/simple_decompression.c | 4 ++-- examples/streaming_compression.c | 5 +++-- examples/streaming_decompression.c | 2 +- 8 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index c9f1fe41..7860ce1d 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ clean: @$(MAKE) -C $(PRGDIR) $@ > $(VOID) @$(MAKE) -C $(TESTDIR) $@ > $(VOID) @$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID) - @rm -f zstd + @$(RM) zstd @echo Cleaning completed @@ -121,7 +121,7 @@ endif ifneq (,$(filter $(HOST_OS),MSYS POSIX)) cmaketest: cmake --version - rm -rf projects/cmake/build + $(RM) -r projects/cmake/build mkdir projects/cmake/build cd projects/cmake/build ; cmake -DPREFIX:STRING=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall diff --git a/examples/Makefile b/examples/Makefile index f568bc00..54602dfe 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -61,6 +61,7 @@ clean: test: all cp README.md tmp + cp Makefile tmp2 @echo starting simple compression ./simple_compression tmp ./simple_decompression tmp.zst @@ -69,6 +70,6 @@ test: all ./streaming_compression tmp ./streaming_decompression tmp.zst > /dev/null @echo starting dictionary compression - ./dictionary_compression tmp README.md - ./dictionary_decompression tmp.zst README.md + ./dictionary_compression tmp2 tmp README.md + ./dictionary_decompression tmp2.zst tmp.zst README.md @echo tests completed diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c index 08d639c0..adcc3b4d 100644 --- a/examples/dictionary_compression.c +++ b/examples/dictionary_compression.c @@ -73,12 +73,12 @@ static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSi /* createDict() : `dictFileName` is supposed to have been created using `zstd --train` */ -static ZSTD_CDict* createCDict_orDie(const char* dictFileName) +static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel) { size_t dictSize; printf("loading dictionary %s \n", dictFileName); void* const dictBuffer = loadFile_orDie(dictFileName, &dictSize); - ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 3); + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel); if (!cdict) { fprintf(stderr, "ZSTD_createCDict error \n"); exit(7); @@ -96,6 +96,7 @@ static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdi void* const cBuff = malloc_orDie(cBuffSize); ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (cctx==NULL) { fprintf(stderr, "ZSTD_createCCtx() error \n"); exit(10); } size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict); if (ZSTD_isError(cSize)) { fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize)); @@ -107,7 +108,7 @@ static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdi /* success */ printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); - ZSTD_freeCCtx(cctx); + ZSTD_freeCCtx(cctx); /* never fails */ free(fBuff); free(cBuff); } @@ -127,6 +128,7 @@ static char* createOutFilename_orDie(const char* filename) int main(int argc, const char** argv) { const char* const exeName = argv[0]; + int const cLevel = 3; if (argc<3) { fprintf(stderr, "wrong arguments\n"); @@ -137,7 +139,7 @@ int main(int argc, const char** argv) /* load dictionary only once */ const char* const dictName = argv[argc-1]; - ZSTD_CDict* const dictPtr = createCDict_orDie(dictName); + ZSTD_CDict* const dictPtr = createCDict_orDie(dictName, cLevel); int u; for (u=1; u