From 2e23333094e6df6f84b7e21b2dcf64deff88ec30 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 16 Jan 2018 15:28:43 -0800 Subject: [PATCH] ZSTDMT can now work in non-blocking mode with 1 thread it still fallbacks to single-thread blocking invocation when input is small (<1job) or when invoking ZSTDMT_compress(), which is blocking. Also : fixed a bug in new block-granular compression routine. --- lib/compress/zstd_compress.c | 3 +- lib/compress/zstdmt_compress.c | 71 ++++++++++++++++++-------------- lib/compress/zstdmt_compress.h | 2 +- lib/decompress/zstd_decompress.c | 12 +++--- tests/zstreamtest.c | 7 ++++ 5 files changed, 57 insertions(+), 38 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 49f73dc5..d2703f23 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1962,7 +1962,8 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, { size_t fhSize = 0; - DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage); + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (U32)srcSize); if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ if (frame && (cctx->stage==ZSTDcs_init)) { diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 3a5b58a7..47b8923a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -365,11 +365,12 @@ void ZSTDMT_compressChunk(void* jobDescription) if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; } + DEBUGLOG(5, "ZSTDMT_compressChunk: flush and overwrite %u bytes of frame header (not first chunk)", (U32)hSize); ZSTD_invalidateRepCodes(cctx); } /* compress */ -#if 1 +#if 0 job->cSize = (job->lastChunk) ? ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize); @@ -382,26 +383,28 @@ void ZSTDMT_compressChunk(void* jobDescription) BYTE* op = ostart; BYTE* oend = op + dstBuff.size; int blockNb; + DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks); job->cSize = 0; - for (blockNb = 0; blockNb < nbBlocks-1; blockNb++) { + for (blockNb = 1; blockNb < nbBlocks; blockNb++) { size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, ZSTD_BLOCKSIZE_MAX); if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } ip += ZSTD_BLOCKSIZE_MAX; op += cSize; assert(op < oend); /* stats */ job->cSize += cSize; - job->readSize = ZSTD_BLOCKSIZE_MAX * (blockNb+1); + job->readSize = ZSTD_BLOCKSIZE_MAX * blockNb; } /* last block */ - { size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1); - size_t const lastBlockSize = (lastBlockSize1==0) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1; + if ((nbBlocks > 0) | job->lastChunk /*need to output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=ZSTD_BLOCKSIZE_MAX)) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1; size_t const cSize = (job->lastChunk) ? ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } /* stats */ job->cSize += cSize; - job->readSize = ZSTD_BLOCKSIZE_MAX * (blockNb+1); + job->readSize = job->srcSize; } } #endif @@ -443,7 +446,7 @@ struct ZSTDMT_CCtx_s { size_t targetDictSize; inBuff_t inBuff; XXH64_state_t xxhState; - unsigned singleThreaded; + unsigned singleBlockingThread; unsigned jobIDMask; unsigned doneJobID; unsigned nextJobID; @@ -457,7 +460,7 @@ struct ZSTDMT_CCtx_s { /* Sets parameters relevant to the compression job, initializing others to * default values. Notably, nbThreads should probably be zero. */ -static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params) +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) { ZSTD_CCtx_params jobParams; memset(&jobParams, 0, sizeof(jobParams)); @@ -646,17 +649,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, /* ===== Multi-threaded compression ===== */ /* ------------------------------------------ */ -static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) { - size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2); - size_t const chunkMaxSize = chunkSizeTarget << 2; - size_t const passSizeMax = chunkMaxSize * nbThreads; - unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; - unsigned const nbChunksLarge = multiplier * nbThreads; - unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1; - unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads); - return (multiplier>1) ? nbChunksLarge : nbChunksSmall; -} +static unsigned ZSTDMT_computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) { + assert(nbThreads>0); + { size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2); + size_t const chunkMaxSize = chunkSizeTarget << 2; + size_t const passSizeMax = chunkMaxSize * nbThreads; + unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; + unsigned const nbChunksLarge = multiplier * nbThreads; + unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1; + unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads); + return (multiplier>1) ? nbChunksLarge : nbChunksSmall; +} } +/* ZSTDMT_compress_advanced_internal() : + * This is a blocking function : it will only give back control to caller after finishing its compression job. + */ static size_t ZSTDMT_compress_advanced_internal( ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, @@ -664,10 +671,10 @@ static size_t ZSTDMT_compress_advanced_internal( const ZSTD_CDict* cdict, ZSTD_CCtx_params const params) { - ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params); + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog; size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog); - unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads); + unsigned nbChunks = ZSTDMT_computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads); size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ const char* const srcStart = (const char*)src; @@ -678,14 +685,16 @@ static size_t ZSTDMT_compress_advanced_internal( assert(jobParams.nbThreads == 0); assert(mtctx->cctxPool->totalCCtx == params.nbThreads); - DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ", + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ", nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize); - if (nbChunks==1) { /* fallback to single-thread mode */ + + if ((nbChunks==1) | (params.nbThreads<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); } - assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */ + + assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) ); XXH64_reset(&xxh64, 0); @@ -695,6 +704,7 @@ static size_t ZSTDMT_compress_advanced_internal( mtctx->jobIDMask = 0; mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem); if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ mtctx->jobIDMask = nbJobs - 1; } @@ -827,10 +837,10 @@ size_t ZSTDMT_initCStream_internal( assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ assert(zcs->cctxPool->totalCCtx == params.nbThreads); - zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ + zcs->singleBlockingThread = pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN; /* do not trigger multi-threading when srcSize is too small */ - if (zcs->singleThreaded) { - ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params); + if (zcs->singleBlockingThread) { + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); DEBUGLOG(4, "single thread mode"); assert(singleThreadParams.nbThreads == 0); return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0], @@ -921,10 +931,11 @@ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) } size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); ZSTD_CCtx_params cctxParams = zcs->params; cctxParams.cParams = params.cParams; cctxParams.fParams = params.fParams; + DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel); return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); } @@ -1071,7 +1082,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, assert(output->pos <= output->size); assert(input->pos <= input->size); - if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */ + if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); } @@ -1166,7 +1177,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(5, "ZSTDMT_flushStream"); - if (mtctx->singleThreaded) + if (mtctx->singleBlockingThread) return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */); } @@ -1174,7 +1185,7 @@ size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(4, "ZSTDMT_endStream"); - if (mtctx->singleThreaded) + if (mtctx->singleBlockingThread) return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */); } diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index d12f0adb..b6e68684 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ typedef enum { ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ - ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ + ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ } ZSTDMT_parameter; /* ZSTDMT_setMTCtxParameter() : diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index ab6c2edc..a573cc19 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1759,7 +1759,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip * or an error code, which can be tested using ZSTD_isError() */ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - DEBUGLOG(5, "ZSTD_decompressContinue"); + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize); /* Sanity check */ if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ if (dstCapacity) ZSTD_checkContinuity(dctx, dst); @@ -1820,12 +1820,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decompressLastBlock: case ZSTDds_decompressBlock: - DEBUGLOG(5, "case ZSTDds_decompressBlock"); + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); { size_t rSize; switch(dctx->bType) { case bt_compressed: - DEBUGLOG(5, "case bt_compressed"); + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); break; case bt_raw : @@ -1839,12 +1839,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c return ERROR(corruption_detected); } if (ZSTD_isError(rSize)) return rSize; - DEBUGLOG(5, "decoded size from block : %u", (U32)rSize); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize); dctx->decodedSize += rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize); + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize); if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { if (dctx->decodedSize != dctx->fParams.frameContentSize) { return ERROR(corruption_detected); @@ -1868,7 +1868,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c assert(srcSize == 4); /* guaranteed by dctx->expected */ { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32); if (check32 != h32) return ERROR(checksum_wrong); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 5590c9af..5cd1ea0f 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1243,6 +1243,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel); CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) ); } } else { @@ -1301,9 +1302,12 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp if ((FUZ_rand(&lseed) & 15) == 0) { size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const previousPos = outBuff.pos; outBuff.size = outBuff.pos + adjustedDstSize; DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (U32)adjustedDstSize); CHECK_Z( ZSTDMT_flushStream(zc, &outBuff) ); + assert(outBuff.pos >= previousPos); + DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_flushStream \n", (U32)(outBuff.pos-previousPos)); } } /* final frame epilogue */ @@ -1311,10 +1315,13 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp while (remainingToFlush) { size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + size_t const previousPos = outBuff.pos; outBuff.size = outBuff.pos + adjustedDstSize; DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (U32)adjustedDstSize); remainingToFlush = ZSTDMT_endStream(zc, &outBuff); CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush)); + assert(outBuff.pos >= previousPos); + DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_endStream \n", (U32)(outBuff.pos-previousPos)); DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (U32)remainingToFlush); } } crcOrig = XXH64_digest(&xxhState);