modified streaming compression API

This commit is contained in:
Yann Collet 2016-01-26 16:31:22 +01:00
parent 7b51a2948f
commit 1c8e194297
9 changed files with 52 additions and 84 deletions

View File

@ -119,7 +119,7 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
#define MIN(a,b) ( ((a)<(b)) ? (a) : (b) )
#define BLOCKSIZE (128 * 1024) /* a bit too "magic", should come from reference */
size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params)
size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, ZSTD_parameters params)
{
size_t neededInBuffSize;
@ -143,7 +143,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params)
if (zbc->outBuff == NULL) return ERROR(memory_allocation);
}
zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, params);
zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params);
if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize;
zbc->inToCompress = 0;
@ -156,14 +156,13 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params)
size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
{
return ZBUFF_compressInit_advanced(zbc, ZSTD_getParams(compressionLevel, 0));
return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0));
}
ZSTDLIB_API size_t ZBUFF_compressWithDictionary(ZBUFF_CCtx* zbc, const void* src, size_t srcSize)
ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compress_insertDictionary(zbc->zc, src, srcSize);
return 0;
return ZBUFF_compressInit_advanced(zbc, dict, dictSize, ZSTD_getParams(compressionLevel, 0));
}

View File

@ -69,7 +69,8 @@ ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
ZSTDLIB_API size_t ZBUFF_compressWithDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr);
ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr);
@ -79,11 +80,11 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst
*
* A ZBUFF_CCtx object is required to track streaming operation.
* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
* Use ZBUFF_compressInit() to start a new compression operation.
* ZBUFF_CCtx objects can be reused multiple times.
*
* Optionally, a reference to a static dictionary can be created with ZBUFF_compressWithDictionary()
* Note that the dictionary content must remain accessible during the compression process.
* Start by initializing ZBUF_CCtx.
* Use ZBUFF_compressInit() to start a new compression operation.
* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary.
*
* Use ZBUFF_compressContinue() repetitively to consume input stream.
* *srcSizePtr and *maxDstSizePtr can be any size.
@ -93,9 +94,10 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst
* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
* or an error code, which can be tested using ZBUFF_isError().
*
* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
* Note that it will not output more than *maxDstSizePtr.
* Therefore, some content might still be left into its internal buffer if dst buffer is too small.
* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush().
* The nb of bytes written into `dst` will be reported into *maxDstSizePtr.
* Note that the function cannot output more than the size of `dst` buffer (initial value of *maxDstSizePtr).
* Therefore, some content might still be left into internal buffer if dst buffer is too small.
* @return : nb of bytes still present into internal buffer (0 if it's empty)
* or an error code, which can be tested using ZBUFF_isError().
*
@ -108,7 +110,7 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst
* or an error code, which can be tested using ZBUFF_isError().
*
* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value.
* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering).
* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
* By using both, you ensure that input will be entirely consumed, and output will always contain the result.
* **************************************************/

View File

@ -45,14 +45,14 @@ extern "C" {
/* *************************************
* Includes
***************************************/
#include "zstd_static.h"
#include "zstd_static.h" /* ZSTD_parameters */
#include "zstd_buffered.h"
/* *************************************
* Advanced Streaming functions
***************************************/
ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, ZSTD_parameters params);
ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params);
#if defined (__cplusplus)

View File

@ -2167,7 +2167,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
return hufHeaderSize;
}
size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
{
if (dict && dictSize)
{
@ -2186,22 +2186,23 @@ size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t di
/*! ZSTD_compressBegin_advanced
* @return : 0, or an error code */
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc,
const void* dict, size_t dictSize,
ZSTD_parameters params)
{
size_t errorCode;
ZSTD_validateParams(&params);
errorCode = ZSTD_resetCCtx_advanced(ctx, params);
errorCode = ZSTD_resetCCtx_advanced(zc, params);
if (ZSTD_isError(errorCode)) return errorCode;
MEM_writeLE32(ctx->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */
((BYTE*)ctx->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
ctx->hbSize = ZSTD_frameHeaderSize_min;
ctx->stage = 0;
MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */
((BYTE*)zc->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
zc->hbSize = ZSTD_frameHeaderSize_min;
zc->stage = 0;
return 0;
return ZSTD_compress_insertDictionary(zc, dict, dictSize);
}
@ -2219,15 +2220,14 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
return result;
}
/* to do
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, int compressionLevel)
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel)
{
return 0;
}*/
return ZSTD_compressBegin_advanced(zc, dict, dictSize, ZSTD_getParams(compressionLevel, 0));
}
size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, int compressionLevel)
size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
{
return ZSTD_compressBegin_advanced(ctx, ZSTD_getParams(compressionLevel, 0));
return ZSTD_compressBegin_advanced(zc, NULL, 0, ZSTD_getParams(compressionLevel, 0));
}
@ -2269,17 +2269,10 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
BYTE* op = ostart;
size_t oSize;
/* Header */
oSize = ZSTD_compressBegin_advanced(ctx, params);
/* Init */
oSize = ZSTD_compressBegin_advanced(ctx, dict, dictSize, params);
if(ZSTD_isError(oSize)) return oSize;
/* dictionary */
if (dict)
{
oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize);
if (ZSTD_isError(oSize)) return oSize;
}
/* body (compression) */
oSize = ZSTD_compressContinue (ctx, op, maxDstSize, src, srcSize);
if(ZSTD_isError(oSize)) return oSize;

View File

@ -132,12 +132,8 @@ ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx(
****************************************/
ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, int compressionLevel);
//ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params);
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, ZSTD_parameters params);
ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx);
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params);
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);
ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
@ -149,18 +145,10 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it.
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
First operation is to start a new frame.
Use ZSTD_compressBegin().
You may also prefer the advanced derivative ZSTD_compressBegin_advanced(), for finer parameter control.
It's then possible to add a dictionary with ZSTD_compress_insertDictionary()
Note that dictionary presence is a "hidden" information,
the decoder needs to be aware that it is required for proper decoding, or decoding will fail.
If you want to compress a lot of messages using same dictionary,
it can be beneficial to duplicate compression context rather than reloading dictionary each time.
In such case, use ZSTD_duplicateCCtx(), which will need an already created ZSTD_CCtx,
in order to duplicate compression context into it.
Start by initializing a context.
Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
or ZSTD_compressBegin_advanced(), for finer parameter control.
It's also possible to duplicate a reference context which has been initialized, using ZSTD_copyCCtx()
Then, consume your input using ZSTD_compressContinue().
The interface is synchronous, so all input will be consumed and produce a compressed output.
@ -168,7 +156,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz
Worst case evaluation is provided by ZSTD_compressBound().
Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
Without it, the frame will be considered incomplete by decoders.
Without the epilogue, frames will be considered incomplete by decoder.
You can then reuse ZSTD_CCtx to compress some new frame.
*/
@ -176,7 +164,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz
ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx);
ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);

View File

@ -295,8 +295,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
ZSTD_compressBegin_advanced(refCtx, ZSTD_getParams(cLevel, dictBufferSize+largestBlockSize));
ZSTD_compress_insertDictionary(refCtx, dictBuffer, dictBufferSize);
ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, ZSTD_getParams(cLevel, dictBufferSize+largestBlockSize));
for (blockNb=0; blockNb<nbBlocks; blockNb++)
{
ZSTD_copyCCtx(ctx, refCtx);

View File

@ -355,19 +355,14 @@ static int FIO_compressFilename_extRess(cRess_t ress,
/* init */
filesize = FIO_getFileSize(srcFileName) + dictSize;
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ZSTD_getParams(cLevel, filesize));
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression");
errorCode = ZBUFF_compressWithDictionary(ress.ctx, ress.dictBuffer, ress.dictBufferSize);
if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing dictionary");
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, ZSTD_getParams(cLevel, filesize));
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
/* Main compression loop */
filesize = 0;
while (1)
{
size_t inSize;
while (1) {
/* Fill input Buffer */
inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
if (inSize==0) break;
filesize += inSize;
DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20));

View File

@ -203,9 +203,7 @@ static int basicUnitTests(U32 seed, double compressibility)
size_t cSizeOrig;
DISPLAYLEVEL(4, "test%3i : load dictionary into context : ", testNb++);
result = ZSTD_compressBegin(ctxOrig, 2);
if (ZSTD_isError(result)) goto _output_error;
result = ZSTD_compress_insertDictionary(ctxOrig, CNBuffer, dictSize);
result = ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2);
if (ZSTD_isError(result)) goto _output_error;
result = ZSTD_copyCCtx(ctxDuplicated, ctxOrig);
if (ZSTD_isError(result)) goto _output_error;
@ -293,9 +291,7 @@ static int basicUnitTests(U32 seed, double compressibility)
/* dictionary block compression */
DISPLAYLEVEL(4, "test%3i : Dictionary Block compression test : ", testNb++);
result = ZSTD_compressBegin(cctx, 5);
if (ZSTD_isError(result)) goto _output_error;
result = ZSTD_compress_insertDictionary(cctx, CNBuffer, dictSize);
result = ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5);
if (ZSTD_isError(result)) goto _output_error;
cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
if (ZSTD_isError(cSize)) goto _output_error;
@ -569,10 +565,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
dict = srcBuffer + sampleStart;
dictSize = sampleSize;
errorCode = ZSTD_compressBegin(refCtx, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
CHECK (ZSTD_isError(errorCode), "start streaming error : %s", ZSTD_getErrorName(errorCode));
errorCode = ZSTD_compress_insertDictionary(refCtx, dict, dictSize);
CHECK (ZSTD_isError(errorCode), "dictionary insertion error : %s", ZSTD_getErrorName(errorCode));
errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
errorCode = ZSTD_copyCCtx(ctx, refCtx);
CHECK (ZSTD_isError(errorCode), "context duplication error : %s", ZSTD_getErrorName(errorCode));
totalTestSize = 0; cSize = 0;

View File

@ -158,10 +158,9 @@ static int basicUnitTests(U32 seed, double compressibility)
/* Basic compression test */
DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
ZBUFF_compressInit(zc, 1);
ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1);
readSize = CNBufferSize;
genSize = compressedBufferSize;
ZBUFF_compressWithDictionary(zc, CNBuffer, 128 KB);
result = ZBUFF_compressContinue(zc, compressedBuffer, &genSize, CNBuffer, &readSize);
if (ZBUFF_isError(result)) goto _output_error;
if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */
@ -317,7 +316,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
maxTestSize = (size_t)1 << sampleSizeLog;
maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
sampleSize = (size_t)1 << sampleSizeLog;
@ -325,7 +323,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
dict = srcBuffer + sampleStart;
dictSize = sampleSize;
ZBUFF_compressWithDictionary(zc, dict, dictSize);
ZBUFF_compressInitDictionary(zc, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
totalTestSize = 0;
cSize = 0;