From f2a3b6e7b4d4a98975c1b0b7687034ba6b9d01e8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 31 May 2016 18:13:56 +0200 Subject: [PATCH] added : frame content checksum --- .gitignore | 4 +- lib/common/zstd_static.h | 6 +- lib/compress/zstd_compress.c | 124 +++++++++++++++++-------------- lib/decompress/zstd_decompress.c | 9 +++ programs/.clang_complete | 3 - programs/.gitignore | 1 + programs/fuzzer.c | 10 ++- 7 files changed, 91 insertions(+), 66 deletions(-) delete mode 100644 programs/.clang_complete diff --git a/.gitignore b/.gitignore index 2d3b6917..c9021249 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,4 @@ projects/VS2015 _codelite/ _zstdbench/ zlib_wrapper/ - -# CMake -contrib/cmake/ +.clang_complete diff --git a/lib/common/zstd_static.h b/lib/common/zstd_static.h index d9f619f2..6b79b366 100644 --- a/lib/common/zstd_static.h +++ b/lib/common/zstd_static.h @@ -86,8 +86,9 @@ typedef struct { } ZSTD_compressionParameters; typedef struct { - U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ - U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */ + U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ + U32 checksumFlag; /* 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ + U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */ } ZSTD_frameParameters; typedef struct { @@ -196,6 +197,7 @@ typedef struct { U64 frameContentSize; U32 windowLog; U32 dictID; + U32 checksumFlag; } ZSTD_frameParams; #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c7ba0305..a9a2dc79 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -51,9 +51,11 @@ /*-************************************* * Dependencies ***************************************/ -#include /* malloc */ -#include /* memset */ +#include /* malloc */ +#include /* memset */ #include "mem.h" +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH_reset, update, digest */ #include "fse_static.h" #include "huf_static.h" #include "zstd_internal.h" @@ -104,6 +106,7 @@ struct ZSTD_CCtx_s void* workSpace; size_t workSpaceSize; size_t blockSize; + XXH64_state_t xxhState; ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; @@ -266,6 +269,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } } if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ + XXH64_reset(&zc->xxhState, 0); zc->hashTable3 = (U32*)(zc->workSpace); zc->hashTable = zc->hashTable3 + h3Size; zc->chainTable = zc->hashTable + hSize; @@ -1938,7 +1942,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* catch up */ if (offset) { - U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ @@ -2043,19 +2047,22 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa -static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) +static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) { - size_t blockSize = zc->blockSize; + size_t blockSize = cctx->blockSize; size_t remaining = srcSize; const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; - const U32 maxDist = 1 << zc->params.cParams.windowLog; - ZSTD_stats_t* stats = &zc->seqStore.stats; + const U32 maxDist = 1 << cctx->params.cParams.windowLog; + ZSTD_stats_t* stats = &cctx->seqStore.stats; ZSTD_statsInit(stats); + if (cctx->params.fParams.checksumFlag) + XXH64_update(&cctx->xxhState, src, srcSize); + while (remaining) { size_t cSize; ZSTD_statsResetFreqs(stats); @@ -2063,14 +2070,14 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ if (remaining < blockSize) blockSize = remaining; - if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { + if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { /* enforce maxDist */ - U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; - if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; - if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; + U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; + if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; + if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit; } - cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); if (ZSTD_isError(cSize)) return cSize; if (cSize == 0) { /* block is not compressible */ @@ -2090,7 +2097,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, op += cSize; } - ZSTD_statsPrint(stats, zc->params.cParams.searchLength); + ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); return op-ostart; } @@ -2104,7 +2111,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, BYTE const fAllocByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ | (fcsId << 6) ); U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - BYTE const fCheckByte = (BYTE)(dictIDSizeCode&3); + BYTE const fCheckByte = (BYTE)((dictIDSizeCode&3) + (params.fParams.checksumFlag<<4)); size_t pos; if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); @@ -2261,40 +2268,45 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) { /* note : magic number already checked */ - size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; - short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; - short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + size_t const dictSizeStart = dictSize; - size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); - if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); - zc->flagStaticTables = 1; - dict = (const char*)dict + hufHeaderSize; - dictSize -= hufHeaderSize; + { size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + zc->flagStaticTables = 1; + dict = (const char*)dict + hufHeaderSize; + dictSize -= hufHeaderSize; + } - offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + offcodeHeaderSize; - dictSize -= offcodeHeaderSize; + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dict = (const char*)dict + offcodeHeaderSize; + dictSize -= offcodeHeaderSize; + } - matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + matchlengthHeaderSize; - dictSize -= matchlengthHeaderSize; + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dict = (const char*)dict + matchlengthHeaderSize; + dictSize -= matchlengthHeaderSize; + } - litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); - if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictSize -= litlengthHeaderSize; + } - return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; + return (dictSizeStart-dictSize); } /** ZSTD_compress_insertDictionary() : @@ -2366,30 +2378,34 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) /*! ZSTD_compressEnd() : * Write frame epilogue. * @return : nb of bytes written into dst (or an error code) */ -size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) +size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) { BYTE* op = (BYTE*)dst; size_t fhSize = 0; /* not even init ! */ - if (zc->stage==0) return ERROR(stage_wrong); + if (cctx->stage==0) return ERROR(stage_wrong); /* special case : empty frame */ - if (zc->stage==1) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0, 0); + if (cctx->stage==1) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); if (ZSTD_isError(fhSize)) return fhSize; dstCapacity -= fhSize; op += fhSize; - zc->stage = 2; + cctx->stage = 2; } /* frame epilogue */ if (dstCapacity < 3) return ERROR(dstSize_tooSmall); - op[0] = (BYTE)(bt_end << 6); - op[1] = 0; - op[2] = 0; + { U32 const checksum = cctx->params.fParams.checksumFlag ? + (U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) : + 0; + op[0] = (BYTE)((bt_end<<6) + (checksum>>16)); + op[1] = (BYTE)(checksum>>8); + op[2] = (BYTE)checksum; + } - zc->stage = 0; /* return to "created by not init" status */ + cctx->stage = 0; /* return to "created but not init" status */ return 3+fhSize; } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index b0831617..6ade3871 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -57,6 +57,8 @@ #include /* memcpy, memmove */ #include /* debug only : printf */ #include "mem.h" /* low level memory routines */ +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH64_* */ #include "zstd_internal.h" #include "fse_static.h" #include "huf_static.h" @@ -116,6 +118,7 @@ struct ZSTD_DCtx_s size_t expected; size_t headerSize; ZSTD_frameParams fParams; + XXH64_state_t xxhState; ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ @@ -339,6 +342,8 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t U32 const dictIDSizeCode = checkByte&3; fparamsPtr->windowLog = (allocByte & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; if ((allocByte & 0x30) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */ + if ((checkByte & 0xEC) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */ + fparamsPtr->checksumFlag = checkByte & 0x10; switch(dictIDSizeCode) /* fcsId */ { default: /* impossible */ @@ -367,6 +372,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize); if ((MEM_32bits()) && (dctx->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); return result; } @@ -1021,6 +1027,9 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) return dctx->expected; } +/** ZSTD_decompressContinue() : +* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) +* or an error code, which can be tested using ZSTD_isError() */ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { /* Sanity check */ diff --git a/programs/.clang_complete b/programs/.clang_complete deleted file mode 100644 index 658aa00b..00000000 --- a/programs/.clang_complete +++ /dev/null @@ -1,3 +0,0 @@ --I../lib/common --I../lib/legacy --I./legacy diff --git a/programs/.gitignore b/programs/.gitignore index f7061d3b..0f391d2d 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -40,6 +40,7 @@ grillResults.txt _* tmp* *.zst +result # fuzzer afl diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 7fbf906c..0c338928 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -216,7 +216,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++); { size_t const testSize = CNBuffSize / 3; { ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize); - ZSTD_frameParameters const fPar = { 1 , 0 }; + ZSTD_frameParameters const fPar = { 1 , 0 , 0 }; ZSTD_parameters p; p.cParams = cPar; p.fParams = fPar; CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) ); @@ -276,7 +276,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++); - { ZSTD_frameParameters const fParams = { 0, 1 /*NoDictID*/ }; + { ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ }; ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize); ZSTD_parameters p; p.cParams = cParams; p.fParams = fParams; @@ -639,12 +639,14 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD dictSize = FUZ_randomLength(&lseed, maxSampleLog); /* needed also for decompression */ dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); - if (FUZ_rand(&lseed) & 15) { + if (FUZ_rand(&lseed) & 0xF) { size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel); CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); } else { ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize); - ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1, FUZ_rand(&lseed)&1 }; /* note : since dictionary is fake, dictIDflag has no impact */ + ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */, + !(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/, + 0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */ ZSTD_parameters p; size_t errorCode; p.cParams = cPar; p.fParams = fpar;