ZSTDMT_compress() creates a single frame
The new strategy involves cutting frame at block level. The result is a single frame, preserving ZSTD_getDecompressedSize() As a consequence, bench can now make a full round-trip, since the result is compatible with ZSTD_decompress(). This strategy will not make it possible to decode the frame with multiple threads since the exact cut between independent blocks is not known. MT decoding needs further discussions.
This commit is contained in:
parent
04cbc36499
commit
5eb749e734
2
Makefile
2
Makefile
@ -88,7 +88,7 @@ travis-install:
|
||||
$(MAKE) install PREFIX=~/install_test_dir
|
||||
|
||||
gpptest: clean
|
||||
$(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
|
||||
CC=g++ $(MAKE) -C programs all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
|
||||
|
||||
gcc5test: clean
|
||||
gcc-5 -v
|
||||
|
@ -2408,12 +2408,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
|
||||
cctx->nextSrc = ip + srcSize;
|
||||
|
||||
{ size_t const cSize = frame ?
|
||||
if (srcSize) {
|
||||
size_t const cSize = frame ?
|
||||
ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
||||
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
|
||||
if (ZSTD_isError(cSize)) return cSize;
|
||||
return cSize + fhSize;
|
||||
}
|
||||
} else
|
||||
return fhSize;
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,8 +28,8 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
|
||||
unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
|
||||
unsigned long long elapsedTime = (afterTime-beforeTime); \
|
||||
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
|
||||
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread %li took %llu microseconds to acquire mutex %s \n", \
|
||||
(long int) pthread_self(), elapsedTime, #mutex); \
|
||||
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
|
||||
elapsedTime, #mutex); \
|
||||
} \
|
||||
} else pthread_mutex_lock(mutex);
|
||||
|
||||
@ -112,6 +112,7 @@ typedef struct {
|
||||
buffer_t dstBuff;
|
||||
int compressionLevel;
|
||||
unsigned frameID;
|
||||
unsigned long long fullFrameSize;
|
||||
size_t cSize;
|
||||
unsigned jobCompleted;
|
||||
pthread_mutex_t* jobCompleted_mutex;
|
||||
@ -122,9 +123,26 @@ typedef struct {
|
||||
void ZSTDMT_compressFrame(void* jobDescription)
|
||||
{
|
||||
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
|
||||
job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel);
|
||||
buffer_t dstBuff = job->dstBuff;
|
||||
ZSTD_parameters const params = ZSTD_getParams(job->compressionLevel, job->fullFrameSize, 0);
|
||||
size_t hSize = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, params, job->fullFrameSize);
|
||||
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
|
||||
hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */
|
||||
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
|
||||
if ((job->frameID & 1) == 0) { /* preserve frame header when it is first beginning of frame */
|
||||
dstBuff.start = (char*)dstBuff.start + hSize;
|
||||
dstBuff.size -= hSize;
|
||||
} else
|
||||
hSize = 0;
|
||||
|
||||
job->cSize = (job->frameID>=2) ? /* last chunk signal */
|
||||
ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) :
|
||||
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize);
|
||||
if (!ZSTD_isError(job->cSize)) job->cSize += hSize;
|
||||
DEBUGLOG(5, "frame %u : compressed %u bytes into %u bytes ", (unsigned)job->frameID, (unsigned)job->srcSize, (unsigned)job->cSize);
|
||||
pthread_mutex_lock(job->jobCompleted_mutex);
|
||||
|
||||
_endJob:
|
||||
PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
|
||||
job->jobCompleted = 1;
|
||||
pthread_cond_signal(job->jobCompleted_cond);
|
||||
pthread_mutex_unlock(job->jobCompleted_mutex);
|
||||
@ -254,10 +272,11 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
|
||||
mtctx->jobs[u].srcStart = srcStart + frameStartPos;
|
||||
mtctx->jobs[u].srcSize = frameSize;
|
||||
mtctx->jobs[u].fullFrameSize = srcSize;
|
||||
mtctx->jobs[u].compressionLevel = compressionLevel;
|
||||
mtctx->jobs[u].dstBuff = dstBuffer;
|
||||
mtctx->jobs[u].cctx = cctx;
|
||||
mtctx->jobs[u].frameID = u;
|
||||
mtctx->jobs[u].frameID = (u>0) | ((u==nbFrames-1)<<1);
|
||||
mtctx->jobs[u].jobCompleted = 0;
|
||||
mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
|
||||
mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
|
||||
@ -275,7 +294,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
for (frameID=0; frameID<nbFrames; frameID++) {
|
||||
DEBUGLOG(3, "ready to write frame %u ", frameID);
|
||||
|
||||
pthread_mutex_lock(&mtctx->jobCompleted_mutex);
|
||||
PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
|
||||
while (mtctx->jobs[frameID].jobCompleted==0) {
|
||||
DEBUGLOG(4, "waiting for jobCompleted signal from frame %u", frameID);
|
||||
pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
|
||||
|
@ -561,10 +561,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
|
||||
In which case, it will "discard" the relevant memory section from its history.
|
||||
|
||||
Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
|
||||
It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
|
||||
Without last block mark, frames will be considered unfinished (broken) by decoders.
|
||||
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
|
||||
Without last block mark, frames will be considered unfinished (corrupted) by decoders.
|
||||
|
||||
You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
|
||||
`ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
|
||||
*/
|
||||
|
||||
/*===== Buffer-less streaming compression functions =====*/
|
||||
|
@ -129,7 +129,7 @@ gzstd:
|
||||
|
||||
zstdmt: CPPFLAGS += -DZSTD_PTHREAD
|
||||
zstdmt: LDFLAGS += -lpthread
|
||||
zstdmt: clean zstd
|
||||
zstdmt: zstd
|
||||
|
||||
generate_res:
|
||||
windres/generate_res.bat
|
||||
|
@ -321,7 +321,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 0 /* disable decompression test */
|
||||
dCompleted=1;
|
||||
(void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */
|
||||
#else
|
||||
@ -330,13 +330,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
|
||||
UTIL_sleepMilli(1); /* give processor time to other processes */
|
||||
UTIL_waitForNextTick(ticksPerSecond);
|
||||
UTIL_getTime(&clockStart);
|
||||
|
||||
if (!dCompleted) {
|
||||
U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
|
||||
U32 nbLoops = 0;
|
||||
clock_us_t clockStart;
|
||||
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
|
||||
if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
|
||||
clockStart = BMK_clockMicroSec();
|
||||
do {
|
||||
U32 blockNb;
|
||||
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
|
||||
@ -345,19 +346,19 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
|
||||
ddict);
|
||||
if (ZSTD_isError(regenSize)) {
|
||||
DISPLAY("ZSTD_decompress_usingDDict() failed on block %u : %s \n",
|
||||
blockNb, ZSTD_getErrorName(regenSize));
|
||||
DISPLAY("ZSTD_decompress_usingDDict() failed on block %u of size %u : %s \n",
|
||||
blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
|
||||
clockLoop = 0; /* force immediate test end */
|
||||
break;
|
||||
}
|
||||
blockTable[blockNb].resSize = regenSize;
|
||||
}
|
||||
nbLoops++;
|
||||
} while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
|
||||
} while (BMK_clockMicroSec() - clockStart < clockLoop);
|
||||
ZSTD_freeDDict(ddict);
|
||||
{ U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
|
||||
if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
|
||||
totalDTime += clockSpan;
|
||||
{ clock_us_t const clockSpanMicro = BMK_clockMicroSec() - clockStart;
|
||||
if (clockSpanMicro < fastestD*nbLoops) fastestD = clockSpanMicro / nbLoops;
|
||||
totalDTime += clockSpanMicro;
|
||||
dCompleted = (totalDTime >= maxTime);
|
||||
} }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user