added : frame content checksum
This commit is contained in:
parent
e3f4e6cbda
commit
f2a3b6e7b4
4
.gitignore
vendored
4
.gitignore
vendored
@ -32,6 +32,4 @@ projects/VS2015
|
||||
_codelite/
|
||||
_zstdbench/
|
||||
zlib_wrapper/
|
||||
|
||||
# CMake
|
||||
contrib/cmake/
|
||||
.clang_complete
|
||||
|
@ -87,6 +87,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */
|
||||
U32 checksumFlag; /* 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
|
||||
U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */
|
||||
} ZSTD_frameParameters;
|
||||
|
||||
@ -196,6 +197,7 @@ typedef struct {
|
||||
U64 frameContentSize;
|
||||
U32 windowLog;
|
||||
U32 dictID;
|
||||
U32 checksumFlag;
|
||||
} ZSTD_frameParams;
|
||||
|
||||
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
|
||||
|
@ -54,6 +54,8 @@
|
||||
#include <stdlib.h> /* malloc */
|
||||
#include <string.h> /* memset */
|
||||
#include "mem.h"
|
||||
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
#include "xxhash.h" /* XXH_reset, update, digest */
|
||||
#include "fse_static.h"
|
||||
#include "huf_static.h"
|
||||
#include "zstd_internal.h"
|
||||
@ -104,6 +106,7 @@ struct ZSTD_CCtx_s
|
||||
void* workSpace;
|
||||
size_t workSpaceSize;
|
||||
size_t blockSize;
|
||||
XXH64_state_t xxhState;
|
||||
ZSTD_allocFunction customAlloc;
|
||||
ZSTD_freeFunction customFree;
|
||||
|
||||
@ -266,6 +269,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
||||
} }
|
||||
|
||||
if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */
|
||||
XXH64_reset(&zc->xxhState, 0);
|
||||
zc->hashTable3 = (U32*)(zc->workSpace);
|
||||
zc->hashTable = zc->hashTable3 + h3Size;
|
||||
zc->chainTable = zc->hashTable + hSize;
|
||||
@ -1938,7 +1942,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
|
||||
/* catch up */
|
||||
if (offset) {
|
||||
U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
|
||||
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
|
||||
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
|
||||
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
|
||||
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
||||
@ -2043,19 +2047,22 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
|
||||
|
||||
|
||||
|
||||
static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
|
||||
static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
size_t blockSize = zc->blockSize;
|
||||
size_t blockSize = cctx->blockSize;
|
||||
size_t remaining = srcSize;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* op = ostart;
|
||||
const U32 maxDist = 1 << zc->params.cParams.windowLog;
|
||||
ZSTD_stats_t* stats = &zc->seqStore.stats;
|
||||
const U32 maxDist = 1 << cctx->params.cParams.windowLog;
|
||||
ZSTD_stats_t* stats = &cctx->seqStore.stats;
|
||||
ZSTD_statsInit(stats);
|
||||
|
||||
if (cctx->params.fParams.checksumFlag)
|
||||
XXH64_update(&cctx->xxhState, src, srcSize);
|
||||
|
||||
while (remaining) {
|
||||
size_t cSize;
|
||||
ZSTD_statsResetFreqs(stats);
|
||||
@ -2063,14 +2070,14 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
|
||||
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
|
||||
if (remaining < blockSize) blockSize = remaining;
|
||||
|
||||
if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) {
|
||||
if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
|
||||
/* enforce maxDist */
|
||||
U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist;
|
||||
if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit;
|
||||
if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit;
|
||||
U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
|
||||
if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
|
||||
if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
|
||||
}
|
||||
|
||||
cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
|
||||
cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
|
||||
if (ZSTD_isError(cSize)) return cSize;
|
||||
|
||||
if (cSize == 0) { /* block is not compressible */
|
||||
@ -2090,7 +2097,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
ZSTD_statsPrint(stats, zc->params.cParams.searchLength);
|
||||
ZSTD_statsPrint(stats, cctx->params.cParams.searchLength);
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
@ -2104,7 +2111,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
||||
BYTE const fAllocByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */
|
||||
| (fcsId << 6) );
|
||||
U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
|
||||
BYTE const fCheckByte = (BYTE)(dictIDSizeCode&3);
|
||||
BYTE const fCheckByte = (BYTE)((dictIDSizeCode&3) + (params.fParams.checksumFlag<<4));
|
||||
size_t pos;
|
||||
|
||||
if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
|
||||
@ -2261,40 +2268,45 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
||||
static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
|
||||
{
|
||||
/* note : magic number already checked */
|
||||
size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode;
|
||||
short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
|
||||
short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
|
||||
short litlengthNCount[MaxLL+1];
|
||||
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
|
||||
size_t const dictSizeStart = dictSize;
|
||||
|
||||
size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
|
||||
{ size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
|
||||
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
zc->flagStaticTables = 1;
|
||||
dict = (const char*)dict + hufHeaderSize;
|
||||
dictSize -= hufHeaderSize;
|
||||
}
|
||||
|
||||
offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
|
||||
{ short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
|
||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
|
||||
{ size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||
dict = (const char*)dict + offcodeHeaderSize;
|
||||
dictSize -= offcodeHeaderSize;
|
||||
}
|
||||
|
||||
matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
|
||||
{ short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
|
||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
|
||||
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
|
||||
{ size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||
dict = (const char*)dict + matchlengthHeaderSize;
|
||||
dictSize -= matchlengthHeaderSize;
|
||||
}
|
||||
|
||||
litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
|
||||
{ short litlengthNCount[MaxLL+1];
|
||||
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
|
||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
|
||||
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
|
||||
{ size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
|
||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||
dictSize -= litlengthHeaderSize;
|
||||
}
|
||||
|
||||
return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
|
||||
return (dictSizeStart-dictSize);
|
||||
}
|
||||
|
||||
/** ZSTD_compress_insertDictionary() :
|
||||
@ -2366,30 +2378,34 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
|
||||
/*! ZSTD_compressEnd() :
|
||||
* Write frame epilogue.
|
||||
* @return : nb of bytes written into dst (or an error code) */
|
||||
size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity)
|
||||
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
||||
{
|
||||
BYTE* op = (BYTE*)dst;
|
||||
size_t fhSize = 0;
|
||||
|
||||
/* not even init ! */
|
||||
if (zc->stage==0) return ERROR(stage_wrong);
|
||||
if (cctx->stage==0) return ERROR(stage_wrong);
|
||||
|
||||
/* special case : empty frame */
|
||||
if (zc->stage==1) {
|
||||
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0, 0);
|
||||
if (cctx->stage==1) {
|
||||
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
|
||||
if (ZSTD_isError(fhSize)) return fhSize;
|
||||
dstCapacity -= fhSize;
|
||||
op += fhSize;
|
||||
zc->stage = 2;
|
||||
cctx->stage = 2;
|
||||
}
|
||||
|
||||
/* frame epilogue */
|
||||
if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
|
||||
op[0] = (BYTE)(bt_end << 6);
|
||||
op[1] = 0;
|
||||
op[2] = 0;
|
||||
{ U32 const checksum = cctx->params.fParams.checksumFlag ?
|
||||
(U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) :
|
||||
0;
|
||||
op[0] = (BYTE)((bt_end<<6) + (checksum>>16));
|
||||
op[1] = (BYTE)(checksum>>8);
|
||||
op[2] = (BYTE)checksum;
|
||||
}
|
||||
|
||||
zc->stage = 0; /* return to "created by not init" status */
|
||||
cctx->stage = 0; /* return to "created but not init" status */
|
||||
return 3+fhSize;
|
||||
}
|
||||
|
||||
|
@ -57,6 +57,8 @@
|
||||
#include <string.h> /* memcpy, memmove */
|
||||
#include <stdio.h> /* debug only : printf */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
#include "xxhash.h" /* XXH64_* */
|
||||
#include "zstd_internal.h"
|
||||
#include "fse_static.h"
|
||||
#include "huf_static.h"
|
||||
@ -116,6 +118,7 @@ struct ZSTD_DCtx_s
|
||||
size_t expected;
|
||||
size_t headerSize;
|
||||
ZSTD_frameParams fParams;
|
||||
XXH64_state_t xxhState;
|
||||
ZSTD_allocFunction customAlloc;
|
||||
ZSTD_freeFunction customFree;
|
||||
blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
|
||||
@ -339,6 +342,8 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
|
||||
U32 const dictIDSizeCode = checkByte&3;
|
||||
fparamsPtr->windowLog = (allocByte & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
|
||||
if ((allocByte & 0x30) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
|
||||
if ((checkByte & 0xEC) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
|
||||
fparamsPtr->checksumFlag = checkByte & 0x10;
|
||||
switch(dictIDSizeCode) /* fcsId */
|
||||
{
|
||||
default: /* impossible */
|
||||
@ -367,6 +372,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr
|
||||
size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
|
||||
if ((MEM_32bits()) && (dctx->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
|
||||
if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
|
||||
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1021,6 +1027,9 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
|
||||
return dctx->expected;
|
||||
}
|
||||
|
||||
/** ZSTD_decompressContinue() :
|
||||
* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
|
||||
* or an error code, which can be tested using ZSTD_isError() */
|
||||
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
/* Sanity check */
|
||||
|
@ -1,3 +0,0 @@
|
||||
-I../lib/common
|
||||
-I../lib/legacy
|
||||
-I./legacy
|
1
programs/.gitignore
vendored
1
programs/.gitignore
vendored
@ -40,6 +40,7 @@ grillResults.txt
|
||||
_*
|
||||
tmp*
|
||||
*.zst
|
||||
result
|
||||
|
||||
# fuzzer
|
||||
afl
|
||||
|
@ -216,7 +216,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
|
||||
{ size_t const testSize = CNBuffSize / 3;
|
||||
{ ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize);
|
||||
ZSTD_frameParameters const fPar = { 1 , 0 };
|
||||
ZSTD_frameParameters const fPar = { 1 , 0 , 0 };
|
||||
ZSTD_parameters p;
|
||||
p.cParams = cPar; p.fParams = fPar;
|
||||
CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
|
||||
@ -276,7 +276,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++);
|
||||
{ ZSTD_frameParameters const fParams = { 0, 1 /*NoDictID*/ };
|
||||
{ ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ };
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize);
|
||||
ZSTD_parameters p;
|
||||
p.cParams = cParams; p.fParams = fParams;
|
||||
@ -639,12 +639,14 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
|
||||
dictSize = FUZ_randomLength(&lseed, maxSampleLog); /* needed also for decompression */
|
||||
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
|
||||
|
||||
if (FUZ_rand(&lseed) & 15) {
|
||||
if (FUZ_rand(&lseed) & 0xF) {
|
||||
size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel);
|
||||
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
|
||||
} else {
|
||||
ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
|
||||
ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1, FUZ_rand(&lseed)&1 }; /* note : since dictionary is fake, dictIDflag has no impact */
|
||||
ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
|
||||
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
|
||||
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
|
||||
ZSTD_parameters p;
|
||||
size_t errorCode;
|
||||
p.cParams = cPar; p.fParams = fpar;
|
||||
|
Loading…
Reference in New Issue
Block a user