From c71de79688ee778c284c5367388b8c45546da25b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 10 Sep 2014 22:17:03 +0100 Subject: [PATCH] Improved lz4frame compression speed Added : fullbench test -c14 (LZ4F_compressFrame) --- lz4frame.c | 135 +++++++++++++++---------------------------- lz4frame.h | 12 ++-- programs/Makefile | 4 +- programs/frametest.c | 8 +++ programs/fullbench.c | 8 ++- 5 files changed, 68 insertions(+), 99 deletions(-) diff --git a/lz4frame.c b/lz4frame.c index 071fb9e..55da791 100644 --- a/lz4frame.c +++ b/lz4frame.c @@ -58,7 +58,7 @@ #include /* malloc, calloc, free */ #define ALLOCATOR(s) calloc(1,s) #define FREEMEM free -#include /* memset, memcpy */ +#include /* memset, memcpy, memmove */ #define MEM_INIT memset @@ -119,8 +119,6 @@ typedef struct { size_t maxBlockSize; size_t maxBufferSize; BYTE* tmpBuff; - BYTE* tmpDict; - size_t tmpDictSize; BYTE* tmpIn; size_t tmpInSize; XXH32_stateSpace_t xxh; @@ -209,30 +207,16 @@ int LZ4F_isError(LZ4F_errorCode_t code) **************************************/ size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_frameInfo_t* frameInfoPtr) { - const LZ4F_frameInfo_t frameInfoNull = { 0 }; + LZ4F_preferences_t prefs = { 0 }; size_t headerSize; - size_t blockInfoSize; - size_t blockSize; - unsigned nbBlocks; - size_t frameSuffixSize; - size_t totalBound; + size_t streamSize; - if (frameInfoPtr==NULL) frameInfoPtr = &frameInfoNull; /* all parameters set to default */ + if (frameInfoPtr!=NULL) prefs.frameInfo = *frameInfoPtr; headerSize = 7; /* basic header size (no option) including magic number */ - blockInfoSize = 4; /* basic blockInfo size (no option) for one block */ + streamSize = LZ4F_compressBound(srcSize, &prefs); - blockSize = LZ4F_getBlockSize(frameInfoPtr->blockSizeID); - nbBlocks = (srcSize + (blockSize-1)) / blockSize; - blockInfoSize *= nbBlocks; /* total block info size */ - - frameSuffixSize = 4; /* basic frameSuffixSize (no option) */ - if (frameInfoPtr->contentChecksumFlag == contentChecksumEnabled) frameSuffixSize += 4; - - totalBound = headerSize + srcSize + blockInfoSize + frameSuffixSize; - if (totalBound < srcSize) return -ERROR_srcSize_tooLarge; /* overflow error */ - - return totalBound; + return headerSize + streamSize; } @@ -253,11 +237,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf LZ4F_errorCode_t errorCode; BYTE* const dstStart = (BYTE*) dstBuffer; BYTE* dstPtr = dstStart; - size_t blockSize = LZ4F_getBlockSize(frameInfoPtr->blockSizeID); - unsigned nbBlocks = (srcSize + (blockSize-1)) / blockSize; - unsigned blockNb; - const BYTE* srcPtr = (const BYTE*) srcBuffer; - const size_t dstBlockSize = LZ4F_compressBound(blockSize, frameInfoPtr); + BYTE* const dstEnd = dstStart + dstMaxSize; if (dstMaxSize < LZ4F_compressFrameBound(srcSize, frameInfoPtr)) @@ -270,24 +250,12 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf if (LZ4F_isError(errorCode)) return errorCode; dstPtr += errorCode; /* header size */ - for (blockNb=1; blockNbmaxBufferSize < cctxPtr->maxBlockSize + (cctxPtr->prefs.frameInfo.blockMode == blockLinked)) { cctxPtr->maxBufferSize = cctxPtr->maxBlockSize; - if (cctxPtr->prefs.frameInfo.blockMode == blockLinked) cctxPtr->maxBufferSize += 64 KB; + if (cctxPtr->prefs.frameInfo.blockMode == blockLinked) cctxPtr->maxBufferSize += 128 KB; FREEMEM(cctxPtr->tmpBuff); cctxPtr->tmpBuff = ALLOCATOR(cctxPtr->maxBufferSize); if (cctxPtr->tmpBuff == NULL) return -ERROR_allocation_failed; - cctxPtr->tmpDict = cctxPtr->tmpBuff; cctxPtr->tmpIn = cctxPtr->tmpBuff; - if (cctxPtr->prefs.frameInfo.blockMode == blockLinked) cctxPtr->tmpIn += 64 KB; } - cctxPtr->tmpDictSize = 0; cctxPtr->tmpInSize = 0; XXH32_resetState(&(cctxPtr->xxh), 0); LZ4_resetStream(&(cctxPtr->lz4ctx)); @@ -400,40 +365,21 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds * The LZ4F_frameInfo_t structure is optional : * you can provide NULL as argument, all preferences will then be set to default. * */ -size_t LZ4F_compressBound(size_t srcSize, const LZ4F_frameInfo_t* frameInfoPtr) +size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr) { + LZ4F_frameInfo_t* frameInfoPtr = (LZ4F_frameInfo_t*)preferencesPtr; /* works because prefs starts with frameInfo */ blockSizeID_t bid = (frameInfoPtr==NULL) ? LZ4F_BLOCKSIZEID_DEFAULT : frameInfoPtr->blockSizeID; size_t blockSize = LZ4F_getBlockSize(bid); - size_t vSrcSize = srcSize + (blockSize-1); /* worst case : tmp buffer almost filled */ - unsigned nbBlocks = vSrcSize / blockSize; + unsigned bufferize = !(preferencesPtr->autoFlush); + unsigned nbBlocks = (srcSize / blockSize) + 1; + size_t lastBlockSize = bufferize ? blockSize : srcSize % blockSize; size_t blockInfo = 4; /* default, without block CRC option */ - size_t frameEnd = 4 + frameInfoPtr->contentChecksumFlag*4; - size_t lastBlockSize = blockInfo + (blockSize-1) + frameEnd; - size_t result = (blockSize + blockInfo) * nbBlocks; + size_t frameEnd = 4 + (frameInfoPtr->contentChecksumFlag*4); + size_t result = (blockInfo * nbBlocks) + (blockSize * (nbBlocks-1)) + lastBlockSize + frameEnd; - if (result < lastBlockSize) result = lastBlockSize; return result; } -/* LZ4F_getMaxSrcSize() : gives max allowed srcSize given dstMaxSize to handle worst case situations. - * You can use dstMaxSize==0 to know the "natural" srcSize instead (block size). - * The LZ4F_frameInfo_t structure is optional : - * you can provide NULL as argument, all preferences will then be set to default. - * */ -size_t LZ4F_getMaxSrcSize(size_t dstMaxSize, const LZ4F_frameInfo_t* frameInfoPtr) -{ - blockSizeID_t bid = (frameInfoPtr==NULL) ? LZ4F_BLOCKSIZEID_DEFAULT : frameInfoPtr->blockSizeID; - size_t blockSize = LZ4F_getBlockSize(bid); - size_t worstCBlockSize = blockSize + 4; /* default, with no block CRC option */ - unsigned nbBlocks = dstMaxSize / worstCBlockSize; - size_t maxSrcSize = nbBlocks * blockSize; - - if (dstMaxSize == 0) return blockSize; - if (nbBlocks == 0) return -ERROR_dstMaxSize_tooSmall; /* can't even fit one block */ - - return maxSrcSize; -} - /* LZ4F_compress() * You can then call LZ4F_compress() repetitively to compress as much data as necessary. @@ -460,7 +406,7 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff if (cctxPtr->cStage != 1) return -ERROR_GENERIC; - if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs.frameInfo))) return -ERROR_dstMaxSize_tooSmall; + if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs))) return -ERROR_dstMaxSize_tooSmall; if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull; /* select compression function */ @@ -507,7 +453,7 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff /* compress one block */ BYTE* cSizePtr = dstPtr; U32 cSize; - lastBlockCompressed = 1; + lastBlockCompressed = 2; dstPtr += 4; /* space for cSizePtr */ cSize = (U32)compress(&(cctxPtr->lz4ctx), (const char*)srcPtr, (char*)dstPtr, (int)(blockSize), (int)(blockSize-1)); dstPtr += cSize; @@ -524,11 +470,20 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff if ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) && (lastBlockCompressed)) { - /* last 64 KB of input become dictionary */ - /* assumption : previous block size was at least 64 KB */ - int result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpDict), 64 KB); - if (!result) return ERROR_GENERIC; - cctxPtr->tmpIn = cctxPtr->tmpDict + result; + /* last compressed input up to 64 KB become dictionary */ + if (0 && (lastBlockCompressed==1) && + (cctxPtr->tmpBuff + cctxPtr->maxBufferSize > cctxPtr->tmpIn + cctxPtr->tmpInSize + cctxPtr->maxBlockSize)) + { + /* in theory, no need to "save", everything is properly stacked and tracked, so where is the problem ? */ + cctxPtr->tmpIn += cctxPtr->tmpInSize; + } + else + { + int result; + result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpBuff), 64 KB); + if (result==0) return -ERROR_GENERIC; + cctxPtr->tmpIn = cctxPtr->tmpBuff + result; + } } if (srcPtr < srcEnd) /* some input data left */ @@ -565,7 +520,7 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, if (cctxPtr->tmpInSize == 0) return 0; /* nothing to flush */ if (cctxPtr->cStage != 1) return -ERROR_GENERIC; - if (dstMaxSize < LZ4F_compressBound(1, &(cctxPtr->prefs.frameInfo))) return -ERROR_dstMaxSize_tooSmall; + if (dstMaxSize < (cctxPtr->tmpInSize + 16)) return -ERROR_dstMaxSize_tooSmall; if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull; /* select compression function */ @@ -590,12 +545,13 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, cctxPtr->tmpInSize = 0; } - if (cctxPtr->prefs.frameInfo.blockMode == blockLinked) + if ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) + )//&& (cctxPtr->maxBufferSize < (cctxPtr->tmpIn - cctxPtr->tmpDict) + cctxPtr->tmpInSize + cctxPtr->maxBlockSize )) { /* last 64 KB of input become dictionary */ - int result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpDict), 64 KB); + int result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpBuff), 64 KB); if (!result) return ERROR_GENERIC; - cctxPtr->tmpIn = cctxPtr->tmpDict + result; + cctxPtr->tmpIn = cctxPtr->tmpBuff + result; } return dstPtr - dstStart; @@ -678,6 +634,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt { BYTE FLG, BD, HC; unsigned version, blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictFlag, blockSizeID; + size_t bufferNeeded; /* need to decode header to get frameInfo */ if (srcSize < 7) return -ERROR_GENERIC; /* minimal header size */ @@ -722,12 +679,12 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt if (contentChecksumFlag) XXH32_resetState(&(dctxPtr->xxh), 0); /* alloc */ - if (dctxPtr->maxBlockSize + (dctxPtr->frameInfo.blockMode==blockLinked) > dctxPtr->maxBufferSize) /* tmp buffers too small */ + bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 64 KB); + if (bufferNeeded > dctxPtr->maxBufferSize) /* tmp buffers too small */ { FREEMEM(dctxPtr->tmpIn); FREEMEM(dctxPtr->tmpOutBuffer); - dctxPtr->maxBufferSize = dctxPtr->maxBlockSize; - if (dctxPtr->frameInfo.blockMode==blockLinked) dctxPtr->maxBufferSize += 64 KB; + dctxPtr->maxBufferSize = bufferNeeded; dctxPtr->tmpIn = ALLOCATOR(dctxPtr->maxBlockSize); if (dctxPtr->tmpIn == NULL) return -ERROR_GENERIC; dctxPtr->tmpOutBuffer= ALLOCATOR(dctxPtr->maxBufferSize); diff --git a/lz4frame.h b/lz4frame.h index a8f8544..bd55a0a 100644 --- a/lz4frame.h +++ b/lz4frame.h @@ -91,7 +91,7 @@ typedef struct { typedef struct { LZ4F_frameInfo_t frameInfo; unsigned compressionLevel; /* from 0 to 16 */ - unsigned autoFlush; /* 1 == automatic flush after each call to LZ4F_compress() */ + unsigned autoFlush; /* 1 == always flush; reduce need for tmp buffer */ unsigned reserved[4]; } LZ4F_preferences_t; @@ -152,12 +152,10 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds * or an error code (can be tested using LZ4F_isError()) */ -size_t LZ4F_compressBound(size_t srcSize, const LZ4F_frameInfo_t* frameInfoPtr); -size_t LZ4F_getMaxSrcSize(size_t dstMaxSize, const LZ4F_frameInfo_t* frameInfoPtr); -/* LZ4F_compressBound() : gives the size of Dst buffer given a srcSize to handle worst case situations. - * LZ4F_getMaxSrcSize() : gives max allowed srcSize given dstMaxSize to handle worst case situations. - * You can use dstMaxSize==0 to know the "natural" srcSize instead (block size). - * The LZ4F_frameInfo_t structure is optional : you can provide NULL as argument, all preferences will then be set to default. +size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr); +/* LZ4F_compressBound() : + * Provides the minimum size of Dst buffer given srcSize to handle worst case situations. + * preferencesPtr is optional : you can provide NULL as argument, all preferences will then be set to default. */ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr); diff --git a/programs/Makefile b/programs/Makefile index b28c42c..cc139f1 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -79,10 +79,10 @@ lz4c : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c lz4c32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -fullbench : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c +fullbench : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c xxhash.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) -fullbench32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c +fullbench32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c xxhash.c fullbench.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) fuzzer : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fuzzer.c diff --git a/programs/frametest.c b/programs/frametest.c index 2c45f85..9c332d3 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -454,6 +454,14 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi if (oSize > (size_t)(oend-op)) oSize = oend-op; oSize = oend-op; result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); + if (result == (size_t)-ERROR_checksum_invalid) + { + int p=0; + BYTE* b1=(BYTE*)srcBuffer+srcStart; + BYTE* b2=(BYTE*)decodedBuffer; + while (b1[p]==b2[p]) p++; + printf("Error at pos %i : %02X != %02X \n", p, b1[p], b2[p]); + } CHECK(LZ4F_isError(result), "Decompression failed (error %i)", (int)result); op += oSize; ip += iSize; diff --git a/programs/fullbench.c b/programs/fullbench.c index f1d3cc1..b6a1c02 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -321,6 +321,11 @@ static int local_LZ4_compressHC_limitedOutput_continue(const char* in, char* out return LZ4_compressHC_limitedOutput_continue(ctx, in, out, inSize, LZ4_compressBound(inSize)); } +static int local_LZ4F_compressFrame(const char* in, char* out, int inSize) +{ + return LZ4F_compressFrame(out, 2*inSize, in, inSize, NULL); +} + static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize) { (void)inSize; @@ -358,7 +363,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) { int fileIdx=0; char* orig_buff; -# define NB_COMPRESSION_ALGORITHMS 13 +# define NB_COMPRESSION_ALGORITHMS 14 # define MINCOMPRESSIONCHAR '0' double totalCTime[NB_COMPRESSION_ALGORITHMS+1] = {0}; double totalCSize[NB_COMPRESSION_ALGORITHMS+1] = {0}; @@ -494,6 +499,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) case 11: compressionFunction = local_LZ4_compressHC_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_continue"; break; case 12: compressionFunction = local_LZ4_compressHC_limitedOutput_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_limitedOutput_continue"; break; case 13: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; + case 14: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; break; default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); free(chunkP); return 1; }