commit
2b61f74b1e
3
Makefile
3
Makefile
@ -168,6 +168,9 @@ bmix32test: clean
|
||||
|
||||
bmi32test: clean
|
||||
CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(PRGDIR) test
|
||||
|
||||
staticAnalyze: clean
|
||||
CPPFLAGS=-g scan-build --status-bugs -v $(MAKE) all
|
||||
endif
|
||||
|
||||
|
||||
|
5
NEWS
5
NEWS
@ -1,3 +1,8 @@
|
||||
v0.7.2
|
||||
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski
|
||||
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
|
||||
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
|
||||
|
||||
v0.7.1
|
||||
fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
|
||||
fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
|
||||
|
@ -44,10 +44,8 @@ extern "C" {
|
||||
/* ***************************************************************
|
||||
* Compiler specifics
|
||||
*****************************************************************/
|
||||
/*!
|
||||
* ZSTD_DLL_EXPORT :
|
||||
* Enable exporting of functions when building a Windows DLL
|
||||
*/
|
||||
/* ZSTD_DLL_EXPORT :
|
||||
* Enable exporting of functions when building a Windows DLL */
|
||||
#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZSTDLIB_API __declspec(dllexport)
|
||||
#else
|
||||
@ -103,8 +101,8 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
|
||||
* @return : nb of bytes still present into internal buffer (0 if it's empty)
|
||||
* or an error code, which can be tested using ZBUFF_isError().
|
||||
*
|
||||
* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
|
||||
* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering).
|
||||
* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize()
|
||||
* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency)
|
||||
* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
|
||||
* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
|
||||
* **************************************************/
|
||||
|
@ -61,7 +61,7 @@ extern "C" {
|
||||
***************************************/
|
||||
#define ZSTD_VERSION_MAJOR 0
|
||||
#define ZSTD_VERSION_MINOR 7
|
||||
#define ZSTD_VERSION_RELEASE 1
|
||||
#define ZSTD_VERSION_RELEASE 2
|
||||
|
||||
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
|
||||
#define ZSTD_QUOTE(str) #str
|
||||
@ -200,7 +200,6 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
||||
/*--- Dependency ---*/
|
||||
#include "mem.h" /* U32 */
|
||||
|
||||
|
||||
/*--- Constants ---*/
|
||||
#define ZSTD_MAGICNUMBER 0xFD2FB527 /* v0.7 */
|
||||
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
|
||||
@ -270,16 +269,21 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
|
||||
|
||||
ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
|
||||
|
||||
/*! ZSTD_getParams() :
|
||||
* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
|
||||
* All fields of `ZSTD_frameParameters` are set to default (0) */
|
||||
ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize, size_t dictSize);
|
||||
|
||||
/*! ZSTD_getCParams() :
|
||||
* @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
|
||||
* `srcSize` value is optional, select 0 if not known */
|
||||
ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);
|
||||
|
||||
/*! ZSTD_checkParams() :
|
||||
/*! ZSTD_checkCParams() :
|
||||
* Ensure param values remain within authorized range */
|
||||
ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
|
||||
|
||||
/*! ZSTD_adjustParams() :
|
||||
/*! ZSTD_adjustCParams() :
|
||||
* optimize params for a given `srcSize` and `dictSize`.
|
||||
* both values are optional, select `0` if unknown. */
|
||||
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U64 srcSize, size_t dictSize);
|
||||
@ -408,6 +412,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
|
||||
|
||||
A few rules to respect :
|
||||
- Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
|
||||
+ If you need to compress more, it's recommended to use ZSTD_compress() instead, since frame metadata costs become negligible.
|
||||
- Compressing or decompressing requires a context structure
|
||||
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
|
||||
- It is necessary to init context before starting
|
||||
|
@ -51,7 +51,7 @@
|
||||
/*-*************************************
|
||||
* Common constants
|
||||
***************************************/
|
||||
#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs
|
||||
#define ZSTD_OPT_DEBUG 0 /* 3 = compression stats; 5 = check encoded sequences; 9 = full logs */
|
||||
#include <stdio.h>
|
||||
#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
|
||||
#define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
|
||||
@ -233,6 +233,6 @@ int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
|
||||
/* custom memory allocation functions */
|
||||
void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
|
||||
void ZSTD_defaultFreeFunction(void* opaque, void* address);
|
||||
static ZSTD_customMem const defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
|
||||
static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
|
||||
|
||||
#endif /* ZSTD_CCOMMON_H_MODULE */
|
||||
|
@ -170,9 +170,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
||||
|
||||
size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
|
||||
return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
|
||||
}
|
||||
|
||||
|
@ -427,21 +427,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
*/
|
||||
|
||||
|
||||
/* Frame descriptor
|
||||
/* Frame header :
|
||||
|
||||
// old
|
||||
1 byte - Alloc :
|
||||
bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h)
|
||||
bit 4 : reserved for windowLog (must be zero)
|
||||
bit 5 : reserved (must be zero)
|
||||
bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
|
||||
|
||||
1 byte - checker :
|
||||
bit 0-1 : dictID (0, 1, 2 or 4 bytes)
|
||||
bit 2-7 : reserved (must be zero)
|
||||
|
||||
|
||||
// new
|
||||
1 byte - FrameHeaderDescription :
|
||||
bit 0-1 : dictID (0, 1, 2 or 4 bytes)
|
||||
bit 2-4 : reserved (must be zero)
|
||||
@ -453,24 +440,24 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
bit 0-2 : octal Fractional (1/8th)
|
||||
bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)
|
||||
|
||||
Optional : content size (0, 1, 2, 4 or 8 bytes)
|
||||
0 : unknown
|
||||
1 : 0-255 bytes
|
||||
2 : 256 - 65535+256
|
||||
8 : up to 16 exa
|
||||
|
||||
Optional : dictID (0, 1, 2 or 4 bytes)
|
||||
Automatic adaptation
|
||||
0 : no dictID
|
||||
1 : 1 - 255
|
||||
2 : 256 - 65535
|
||||
4 : all other values
|
||||
|
||||
Optional : content size (0, 1, 2, 4 or 8 bytes)
|
||||
0 : unknown
|
||||
1 : 0-255 bytes
|
||||
2 : 256 - 65535+256
|
||||
8 : up to 16 exa
|
||||
*/
|
||||
|
||||
|
||||
/* Block format description
|
||||
|
||||
Block = Literal Section - Sequences Section
|
||||
Block = Literals Section - Sequences Section
|
||||
Prerequisite : size of (compressed) block, maximum size of regenerated data
|
||||
|
||||
1) Literal Section
|
||||
@ -478,7 +465,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
1.1) Header : 1-5 bytes
|
||||
flags: 2 bits
|
||||
00 compressed by Huff0
|
||||
01 unused
|
||||
01 repeat
|
||||
10 is Raw (uncompressed)
|
||||
11 is Rle
|
||||
Note : using 01 => Huff0 with precomputed table ?
|
||||
@ -514,7 +501,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
else => 5 bytes (2-2-18-18)
|
||||
big endian convention
|
||||
|
||||
1- CTable available (stored into workspace ?)
|
||||
1- CTable available (stored into workspace)
|
||||
2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
|
||||
|
||||
|
||||
@ -936,7 +923,7 @@ _check_compressibility:
|
||||
`offsetCode` : distance to match, or 0 == repCode.
|
||||
`matchCode` : matchLength - MINMATCH
|
||||
*/
|
||||
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, size_t offsetCode, size_t matchCode)
|
||||
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
|
||||
{
|
||||
#if 0 /* for debug */
|
||||
static const BYTE* g_start = NULL;
|
||||
@ -957,7 +944,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
||||
*seqStorePtr->litLength++ = (U16)litLength;
|
||||
|
||||
/* match offset */
|
||||
*(seqStorePtr->offset++) = (U32)offsetCode + 1;
|
||||
*(seqStorePtr->offset++) = offsetCode + 1;
|
||||
|
||||
/* match Length */
|
||||
if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); }
|
||||
@ -1063,7 +1050,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
|
||||
***************************************/
|
||||
static const U32 prime3bytes = 506832829U;
|
||||
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
|
||||
static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }
|
||||
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
|
||||
|
||||
static const U32 prime4bytes = 2654435761U;
|
||||
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
|
||||
@ -1129,13 +1116,14 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
||||
const BYTE* const lowest = base + lowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1];
|
||||
U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
/* init */
|
||||
ip += (ip==lowest);
|
||||
{ U32 const maxRep = (U32)(ip-lowest);
|
||||
if (offset_1 > maxRep) offset_1 = 0;
|
||||
if (offset_2 > maxRep) offset_2 = 0;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
@ -1152,13 +1140,13 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
size_t offset;
|
||||
U32 offset;
|
||||
if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
|
||||
ip += ((ip-anchor) >> g_searchStrength) + 1;
|
||||
continue;
|
||||
}
|
||||
mLength = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
|
||||
offset = ip-match;
|
||||
offset = (U32)(ip-match);
|
||||
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
@ -1180,7 +1168,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
|
||||
{ size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
||||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
@ -1189,8 +1177,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
||||
} } }
|
||||
|
||||
/* save reps for next block */
|
||||
cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend - base) + 1;
|
||||
cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend - base) + 1;
|
||||
cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Last Literals */
|
||||
{ size_t const lastLLSize = iend - anchor;
|
||||
@ -1364,17 +1352,19 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
|
||||
const U32 windowLow = zc->lowLimit;
|
||||
U32 matchEndIdx = current+8;
|
||||
size_t bestLength = 8;
|
||||
#ifdef ZSTD_C_PREDICT
|
||||
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
|
||||
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
|
||||
predictedSmall += (predictedSmall>0);
|
||||
predictedLarge += (predictedLarge>0);
|
||||
#endif /* ZSTD_C_PREDICT */
|
||||
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
U32* nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
#if 0 /* note : can create issues when hlog small <= 11 */
|
||||
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
||||
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
||||
if (matchIndex == predictedSmall) {
|
||||
/* no need to check length, result known */
|
||||
@ -1731,17 +1721,15 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
|
||||
size_t* offsetPtr,
|
||||
U32 maxNbAttempts, U32 matchLengthSearch);
|
||||
searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
|
||||
U32 rep[ZSTD_REP_INIT];
|
||||
U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
|
||||
|
||||
/* init */
|
||||
ip += (ip==base);
|
||||
ctx->nextToUpdate3 = ctx->nextToUpdate;
|
||||
{ U32 i;
|
||||
U32 const maxRep = (U32)(ip-base);
|
||||
for (i=0; i<ZSTD_REP_INIT; i++) {
|
||||
rep[i]=ctx->rep[i];
|
||||
if (rep[i]>maxRep) rep[i]=0;
|
||||
} }
|
||||
{ U32 const maxRep = (U32)(ip-base);
|
||||
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Match Loop */
|
||||
while (ip < ilimit) {
|
||||
@ -1750,9 +1738,9 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
|
||||
const BYTE* start=ip+1;
|
||||
|
||||
/* check repCode */
|
||||
if ((rep[0]>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0]))) {
|
||||
if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
|
||||
/* repcode : we take it */
|
||||
matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
|
||||
matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
|
||||
if (depth==0) goto _storeSequence;
|
||||
}
|
||||
|
||||
@ -1772,8 +1760,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
|
||||
if (depth>=1)
|
||||
while (ip<ilimit) {
|
||||
ip ++;
|
||||
if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
|
||||
size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
|
||||
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
||||
size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
|
||||
int const gain2 = (int)(mlRep * 3);
|
||||
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
|
||||
if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
|
||||
@ -1791,8 +1779,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
|
||||
/* let's find an even better one */
|
||||
if ((depth==2) && (ip<ilimit)) {
|
||||
ip ++;
|
||||
if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
|
||||
size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
|
||||
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
||||
size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
|
||||
int const gain2 = (int)(ml2 * 4);
|
||||
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
|
||||
if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
|
||||
@ -1813,23 +1801,23 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
|
||||
if (offset) {
|
||||
while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */
|
||||
{ start--; matchLength++; }
|
||||
rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
|
||||
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
|
||||
}
|
||||
|
||||
/* store sequence */
|
||||
_storeSequence:
|
||||
{ size_t const litLength = start - anchor;
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
||||
anchor = ip = start + matchLength;
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& ((rep[1]>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) {
|
||||
&& ((offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32;
|
||||
offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */
|
||||
matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
|
||||
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
|
||||
ip += matchLength;
|
||||
anchor = ip;
|
||||
@ -1837,11 +1825,8 @@ _storeSequence:
|
||||
} }
|
||||
|
||||
/* Save reps for next block */
|
||||
{ int i;
|
||||
for (i=0; i<ZSTD_REP_NUM; i++) {
|
||||
if (!rep[i]) rep[i] = (U32)(iend - ctx->base) + 1; /* in case some zero are left */
|
||||
ctx->savedRep[i] = rep[i];
|
||||
} }
|
||||
ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
|
||||
ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
|
||||
|
||||
/* Last Literals */
|
||||
{ size_t const lastLLSize = iend - anchor;
|
||||
@ -1900,10 +1885,9 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
U32 maxNbAttempts, U32 matchLengthSearch);
|
||||
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
||||
|
||||
/* init */
|
||||
U32 rep[ZSTD_REP_INIT];
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
|
||||
U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
|
||||
|
||||
/* init */
|
||||
ctx->nextToUpdate3 = ctx->nextToUpdate;
|
||||
ip += (ip == prefixStart);
|
||||
|
||||
@ -1915,7 +1899,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
U32 current = (U32)(ip-base);
|
||||
|
||||
/* check repCode */
|
||||
{ const U32 repIndex = (U32)(current+1 - rep[0]);
|
||||
{ const U32 repIndex = (U32)(current+1 - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
|
||||
@ -1945,7 +1929,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
current++;
|
||||
/* check repCode */
|
||||
if (offset) {
|
||||
const U32 repIndex = (U32)(current - rep[0]);
|
||||
const U32 repIndex = (U32)(current - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
|
||||
@ -1975,7 +1959,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
current++;
|
||||
/* check repCode */
|
||||
if (offset) {
|
||||
const U32 repIndex = (U32)(current - rep[0]);
|
||||
const U32 repIndex = (U32)(current - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
|
||||
@ -2007,19 +1991,19 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
|
||||
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
|
||||
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
|
||||
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
||||
rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
|
||||
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
|
||||
}
|
||||
|
||||
/* store sequence */
|
||||
_storeSequence:
|
||||
{ size_t const litLength = start - anchor;
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
||||
anchor = ip = start + matchLength;
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while (ip <= ilimit) {
|
||||
const U32 repIndex = (U32)((ip-base) - rep[1]);
|
||||
const U32 repIndex = (U32)((ip-base) - offset_2);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
|
||||
@ -2027,7 +2011,7 @@ _storeSequence:
|
||||
/* repcode detected we should take it */
|
||||
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
||||
matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
|
||||
offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */
|
||||
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
|
||||
ip += matchLength;
|
||||
anchor = ip;
|
||||
@ -2037,7 +2021,7 @@ _storeSequence:
|
||||
} }
|
||||
|
||||
/* Save reps for next block */
|
||||
ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2];
|
||||
ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
|
||||
|
||||
/* Last Literals */
|
||||
{ size_t const lastLLSize = iend - anchor;
|
||||
@ -2068,18 +2052,27 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* The optimal parser */
|
||||
#include "zstd_opt.h"
|
||||
|
||||
static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
|
||||
{
|
||||
#ifdef ZSTD_OPT_H_91842398743
|
||||
ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
|
||||
#else
|
||||
(void)ctx; (void)src; (void)srcSize;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
|
||||
{
|
||||
#ifdef ZSTD_OPT_H_91842398743
|
||||
ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
|
||||
#else
|
||||
(void)ctx; (void)src; (void)srcSize;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -2426,9 +2419,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
||||
|
||||
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", cctx->base, compressionLevel);
|
||||
return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
|
||||
}
|
||||
@ -2538,11 +2529,9 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
|
||||
|
||||
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
|
||||
params.cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
|
||||
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
|
||||
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
|
||||
}
|
||||
|
||||
@ -2577,7 +2566,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_pa
|
||||
if (!customMem.customAlloc && !customMem.customFree)
|
||||
customMem = defaultCustomMem;
|
||||
|
||||
if (!customMem.customAlloc || !customMem.customFree)
|
||||
if (!customMem.customAlloc || !customMem.customFree) /* can't have 1/2 custom alloc/free as NULL */
|
||||
return NULL;
|
||||
|
||||
{ ZSTD_CDict* const cdict = (ZSTD_CDict*) customMem.customAlloc(customMem.opaque, sizeof(*cdict));
|
||||
@ -2772,3 +2761,14 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, si
|
||||
cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
|
||||
return cp;
|
||||
}
|
||||
|
||||
/*! ZSTD_getParams() :
|
||||
* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
|
||||
* All fields of `ZSTD_frameParameters` are set to default (0) */
|
||||
ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize, size_t dictSize) {
|
||||
ZSTD_parameters params;
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = cParams;
|
||||
return params;
|
||||
}
|
||||
|
@ -34,6 +34,10 @@
|
||||
/* Note : this file is intended to be included within zstd_compress.c */
|
||||
|
||||
|
||||
#ifndef ZSTD_OPT_H_91842398743
|
||||
#define ZSTD_OPT_H_91842398743
|
||||
|
||||
|
||||
#define ZSTD_FREQ_DIV 5
|
||||
|
||||
/*-*************************************
|
||||
@ -110,7 +114,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
|
||||
|
||||
/* literals */
|
||||
if (ssPtr->cachedLiterals == literals) {
|
||||
U32 additional = litLength - ssPtr->cachedLitLength;
|
||||
U32 const additional = litLength - ssPtr->cachedLitLength;
|
||||
const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
|
||||
price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
|
||||
for (u=0; u < additional; u++)
|
||||
@ -150,7 +154,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
|
||||
FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
|
||||
{
|
||||
/* offset */
|
||||
BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
|
||||
BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
|
||||
U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
|
||||
|
||||
/* match Length */
|
||||
@ -196,7 +200,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
||||
}
|
||||
|
||||
/* match offset */
|
||||
{ BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
|
||||
{ BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
|
||||
seqStorePtr->offCodeSum++;
|
||||
seqStorePtr->offCodeFreq[offCode]++;
|
||||
}
|
||||
@ -232,7 +236,6 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
||||
|
||||
|
||||
|
||||
|
||||
/* Update hashTable3 up to ip (excluded)
|
||||
Assumption : always within prefix (ie. not within extDict) */
|
||||
FORCE_INLINE
|
||||
@ -1039,3 +1042,5 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
|
||||
seqStorePtr->lit += lastLLSize;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ZSTD_OPT_H_91842398743 */
|
||||
|
@ -173,7 +173,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
|
||||
if (ZSTD_isError(hSize)) return hSize;
|
||||
if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */
|
||||
memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
|
||||
zbd->lhSize += iend-ip; ip = iend; notDone = 0;
|
||||
zbd->lhSize += iend-ip;
|
||||
*dstCapacityPtr = 0;
|
||||
return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
|
||||
}
|
||||
|
@ -207,20 +207,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
||||
*/
|
||||
|
||||
|
||||
/* Frame descriptor
|
||||
/* Frame Header :
|
||||
|
||||
// old
|
||||
1 byte - Alloc :
|
||||
bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h)
|
||||
bit 4 : reserved for windowLog (must be zero)
|
||||
bit 5 : reserved (must be zero)
|
||||
bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
|
||||
|
||||
1 byte - checker :
|
||||
bit 0-1 : dictID (0, 1, 2 or 4 bytes)
|
||||
bit 2-7 : reserved (must be zero)
|
||||
|
||||
// new
|
||||
1 byte - FrameHeaderDescription :
|
||||
bit 0-1 : dictID (0, 1, 2 or 4 bytes)
|
||||
bit 2 : checksumFlag
|
||||
@ -454,16 +442,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) src;
|
||||
litBlockType_t lbt;
|
||||
|
||||
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
|
||||
lbt = (litBlockType_t)(istart[0]>> 6);
|
||||
|
||||
switch(lbt)
|
||||
switch((litBlockType_t)(istart[0]>> 6))
|
||||
{
|
||||
case lbt_huffman:
|
||||
{ size_t litSize, litCSize, singleStream=0;
|
||||
U32 lhSize = ((istart[0]) >> 4) & 3;
|
||||
U32 lhSize = (istart[0] >> 4) & 3;
|
||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
|
||||
switch(lhSize)
|
||||
{
|
||||
@ -930,8 +916,11 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
size_t dSize;
|
||||
ZSTD_checkContinuity(dctx, dst);
|
||||
return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
|
||||
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
|
||||
dctx->previousDstEnd = (char*)dst + dSize;
|
||||
return dSize;
|
||||
}
|
||||
|
||||
|
||||
|
@ -826,7 +826,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
||||
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
||||
#endif
|
||||
dstPtr += 12;
|
||||
eSize += 12;
|
||||
|
||||
_cleanup:
|
||||
@ -906,6 +905,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
||||
}
|
||||
|
||||
#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
|
||||
#define EXIT(e) { dictSize = ERROR(e); goto _cleanup; }
|
||||
/*! ZDICT_trainFromBuffer_unsafe() :
|
||||
* `samplesBuffer` must be followed by noisy guard band.
|
||||
* @return : size of dictionary.
|
||||
@ -923,12 +923,12 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
||||
size_t dictSize = 0;
|
||||
|
||||
/* checks */
|
||||
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
|
||||
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) EXIT(dstSize_tooSmall);
|
||||
if (!dictList) return ERROR(memory_allocation);
|
||||
|
||||
/* init */
|
||||
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
|
||||
if (sBuffSize < DIB_MINSAMPLESSIZE) return 0; /* not enough source to create dictionary */
|
||||
if (sBuffSize < DIB_MINSAMPLESSIZE) EXIT(no_error); /* not enough source to create dictionary */
|
||||
ZDICT_initDictItem(dictList);
|
||||
g_displayLevel = params.notificationLevel;
|
||||
if (selectivity==0) selectivity = g_selectivity_default;
|
||||
@ -948,9 +948,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
||||
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
||||
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
||||
for (u=1; u<=nb; u++) {
|
||||
U32 p = dictList[u].pos;
|
||||
U32 l = dictList[u].length;
|
||||
U32 d = MIN(40, l);
|
||||
U32 const p = dictList[u].pos;
|
||||
U32 const l = dictList[u].length;
|
||||
U32 const d = MIN(40, l);
|
||||
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
||||
u, l, p, dictList[u].savings);
|
||||
ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
|
||||
@ -966,7 +966,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
||||
for (u=1; u<dictList->pos; u++) {
|
||||
U32 l = dictList[u].length;
|
||||
ptr -= l;
|
||||
if (ptr<(BYTE*)dictBuffer) return ERROR(GENERIC); /* should not happen */
|
||||
if (ptr<(BYTE*)dictBuffer) EXIT(GENERIC); /* should not happen */
|
||||
memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
|
||||
} }
|
||||
|
||||
@ -983,7 +983,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
||||
params);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_cleanup :
|
||||
free(dictList);
|
||||
return dictSize;
|
||||
}
|
||||
|
1
programs/.gitignore
vendored
1
programs/.gitignore
vendored
@ -43,6 +43,7 @@ _*
|
||||
tmp*
|
||||
*.zst
|
||||
result
|
||||
out
|
||||
|
||||
# fuzzer
|
||||
afl
|
||||
|
@ -148,16 +148,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
|
||||
void* const compressedBuffer = malloc(maxCompressedSize);
|
||||
void* const resultBuffer = malloc(srcSize);
|
||||
ZSTD_CCtx* refCtx = ZSTD_createCCtx();
|
||||
ZSTD_CCtx* ctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* refDCtx = ZSTD_createDCtx();
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
U32 nbBlocks;
|
||||
UTIL_time_t ticksPerSecond;
|
||||
|
||||
/* checks */
|
||||
if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx)
|
||||
EXM_THROW(31, "not enough memory");
|
||||
if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
|
||||
EXM_THROW(31, "allocation error : not enough memory");
|
||||
|
||||
/* init */
|
||||
if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */
|
||||
@ -217,8 +215,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
UTIL_waitForNextTick(ticksPerSecond);
|
||||
UTIL_getTime(&clockStart);
|
||||
|
||||
{ U32 nbLoops = 0;
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict(dictBuffer, dictBufferSize, cLevel);
|
||||
{ size_t const refSrcSize = (nbBlocks == 1) ? srcSize : 0;
|
||||
ZSTD_parameters const zparams = ZSTD_getParams(cLevel, refSrcSize, dictBufferSize);
|
||||
ZSTD_customMem const cmem = { NULL, NULL, NULL };
|
||||
U32 nbLoops = 0;
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, zparams, cmem);
|
||||
if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict() allocation failure");
|
||||
do {
|
||||
U32 blockNb;
|
||||
@ -227,7 +228,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
blockTable[blockNb].cPtr, blockTable[blockNb].cRoom,
|
||||
blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize,
|
||||
cdict);
|
||||
if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingPreparedCCtx() failed : %s", ZSTD_getErrorName(rSize));
|
||||
if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", ZSTD_getErrorName(rSize));
|
||||
blockTable[blockNb].cSize = rSize;
|
||||
}
|
||||
nbLoops++;
|
||||
@ -264,7 +265,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
|
||||
ddict);
|
||||
if (ZSTD_isError(regenSize)) {
|
||||
DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s \n",
|
||||
DISPLAY("ZSTD_decompress_usingDDict() failed on block %u : %s \n",
|
||||
blockNb, ZSTD_getErrorName(regenSize));
|
||||
clockLoop = 0; /* force immediate test end */
|
||||
break;
|
||||
@ -321,9 +322,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
||||
free(blockTable);
|
||||
free(compressedBuffer);
|
||||
free(resultBuffer);
|
||||
ZSTD_freeCCtx(refCtx);
|
||||
ZSTD_freeCCtx(ctx);
|
||||
ZSTD_freeDCtx(refDCtx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,12 +23,19 @@
|
||||
- source repository : https://github.com/Cyan4973/zstd
|
||||
*/
|
||||
|
||||
/* *************************************
|
||||
* Compiler Options
|
||||
***************************************/
|
||||
#define _CRT_SECURE_NO_WARNINGS /* removes Visual warning on strerror() */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Includes
|
||||
**************************************/
|
||||
#include <stdlib.h> /* malloc */
|
||||
#include <stdio.h> /* FILE, fwrite, fprintf */
|
||||
#include <string.h> /* memcpy */
|
||||
#include <errno.h> /* errno */
|
||||
#include "mem.h" /* U32 */
|
||||
|
||||
|
||||
@ -104,7 +111,7 @@ static BYTE RDG_genChar(U32* seed, const BYTE* ldt)
|
||||
U32 const id = RDG_rand(seed) & LTMASK;
|
||||
//TRACE(" %u : \n", id);
|
||||
//TRACE(" %4u [%4u] ; val : %4u \n", id, id&255, ldt[id]);
|
||||
return (ldt[id]); /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with 0.0. Checked : table is fully initialized */
|
||||
return ldt[id]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
|
||||
}
|
||||
|
||||
|
||||
@ -115,8 +122,7 @@ static U32 RDG_rand15Bits (unsigned* seedPtr)
|
||||
|
||||
static U32 RDG_randLength(unsigned* seedPtr)
|
||||
{
|
||||
if (RDG_rand(seedPtr) & 7)
|
||||
return (RDG_rand(seedPtr) & 0xF);
|
||||
if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF); /* small length */
|
||||
return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
|
||||
}
|
||||
|
||||
@ -185,10 +191,10 @@ void RDG_genStdout(unsigned long long size, double matchProba, double litProba,
|
||||
size_t const stdDictSize = 32 KB;
|
||||
BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize);
|
||||
U64 total = 0;
|
||||
BYTE ldt[LTSIZE];
|
||||
BYTE ldt[LTSIZE]; /* literals distribution table */
|
||||
|
||||
/* init */
|
||||
if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); }
|
||||
if (buff==NULL) { fprintf(stderr, "datagen: error: %s \n", strerror(errno)); exit(1); }
|
||||
if (litProba<=0.0) litProba = matchProba / 4.5;
|
||||
memset(ldt, '0', sizeof(ldt));
|
||||
RDG_fillLiteralDistrib(ldt, litProba);
|
||||
|
@ -42,12 +42,13 @@
|
||||
* Compiler Options
|
||||
***************************************/
|
||||
#define _POSIX_SOURCE 1 /* enable %llu on Windows */
|
||||
#define _CRT_SECURE_NO_WARNINGS /* removes Visual warning on strerror() */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Includes
|
||||
***************************************/
|
||||
#include "util.h" /* Compiler options, UTIL_GetFileSize */
|
||||
#include "util.h" /* Compiler options, UTIL_GetFileSize, _LARGEFILE64_SOURCE */
|
||||
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
@ -58,7 +59,6 @@
|
||||
#include "fileio.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
|
||||
#include "zstd.h"
|
||||
#include "zstd_internal.h" /* MIN, KB, MB */
|
||||
#define ZBUFF_STATIC_LINKING_ONLY
|
||||
#include "zbuff.h"
|
||||
|
||||
@ -84,6 +84,10 @@
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define KB *(1<<10)
|
||||
#define MB *(1<<20)
|
||||
#define GB *(1U<<30)
|
||||
|
||||
#define _1BIT 0x01
|
||||
#define _2BITS 0x03
|
||||
#define _3BITS 0x07
|
||||
@ -113,21 +117,17 @@ static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result
|
||||
void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
|
||||
if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
|
||||
if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
|
||||
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
|
||||
if (g_displayLevel>=4) fflush(stdout); } }
|
||||
static const unsigned refreshRate = 150;
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
static unsigned FIO_GetMilliSpan(clock_t nPrevious)
|
||||
{
|
||||
clock_t const nCurrent = clock();
|
||||
return (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
|
||||
}
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Local Parameters
|
||||
* Local Parameters - Not thread safe
|
||||
***************************************/
|
||||
static U32 g_overwrite = 0;
|
||||
void FIO_overwriteMode(void) { g_overwrite=1; }
|
||||
@ -175,7 +175,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
|
||||
f = fopen(srcFileName, "rb");
|
||||
}
|
||||
|
||||
if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: No such file\n", srcFileName);
|
||||
if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
|
||||
|
||||
return f;
|
||||
}
|
||||
@ -201,18 +201,20 @@ static FILE* FIO_openDstFile(const char* dstFileName)
|
||||
if (g_displayLevel <= 1) {
|
||||
/* No interaction possible */
|
||||
DISPLAY("zstd: %s already exists; not overwritten \n", dstFileName);
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
|
||||
{ int ch = getchar();
|
||||
if ((ch!='Y') && (ch!='y')) {
|
||||
DISPLAY(" not overwritten \n");
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
|
||||
} } }
|
||||
f = fopen( dstFileName, "wb" );
|
||||
}
|
||||
|
||||
if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
|
||||
return f;
|
||||
}
|
||||
|
||||
@ -233,18 +235,18 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
|
||||
|
||||
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
|
||||
fileHandle = fopen(fileName, "rb");
|
||||
if (fileHandle==0) EXM_THROW(31, "Error opening file %s", fileName);
|
||||
if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno));
|
||||
fileSize = UTIL_getFileSize(fileName);
|
||||
if (fileSize > MAX_DICT_SIZE) {
|
||||
int seekResult;
|
||||
if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName); /* avoid extreme cases */
|
||||
DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, MAX_DICT_SIZE);
|
||||
seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */
|
||||
if (seekResult != 0) EXM_THROW(33, "Error seeking into file %s", fileName);
|
||||
if (seekResult != 0) EXM_THROW(33, "zstd: %s: %s", fileName, strerror(errno));
|
||||
fileSize = MAX_DICT_SIZE;
|
||||
}
|
||||
*bufferPtr = (BYTE*)malloc((size_t)fileSize);
|
||||
if (*bufferPtr==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer");
|
||||
*bufferPtr = malloc((size_t)fileSize);
|
||||
if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno));
|
||||
{ size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
|
||||
if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); }
|
||||
fclose(fileHandle);
|
||||
@ -273,14 +275,14 @@ static cRess_t FIO_createCResources(const char* dictFileName)
|
||||
cRess_t ress;
|
||||
|
||||
ress.ctx = ZBUFF_createCCtx();
|
||||
if (ress.ctx == NULL) EXM_THROW(30, "Allocation error : can't create ZBUFF context");
|
||||
if (ress.ctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZBUFF context");
|
||||
|
||||
/* Allocate Memory */
|
||||
ress.srcBufferSize = ZBUFF_recommendedCInSize();
|
||||
ress.srcBuffer = malloc(ress.srcBufferSize);
|
||||
ress.dstBufferSize = ZBUFF_recommendedCOutSize();
|
||||
ress.dstBuffer = malloc(ress.dstBufferSize);
|
||||
if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
|
||||
if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "zstd: allocation error : not enough memory");
|
||||
|
||||
/* dictionary */
|
||||
ress.dictBufferSize = FIO_loadFile(&(ress.dictBuffer), dictFileName);
|
||||
@ -295,7 +297,7 @@ static void FIO_freeCResources(cRess_t ress)
|
||||
free(ress.dstBuffer);
|
||||
free(ress.dictBuffer);
|
||||
errorCode = ZBUFF_freeCCtx(ress.ctx);
|
||||
if (ZBUFF_isError(errorCode)) EXM_THROW(38, "Error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
|
||||
if (ZBUFF_isError(errorCode)) EXM_THROW(38, "zstd: error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
|
||||
}
|
||||
|
||||
|
||||
@ -315,9 +317,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
||||
U64 const fileSize = UTIL_getFileSize(srcFileName);
|
||||
|
||||
/* init */
|
||||
{ ZSTD_parameters params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = ZSTD_getCParams(cLevel, fileSize, ress.dictBufferSize);
|
||||
{ ZSTD_parameters params = ZSTD_getParams(cLevel, fileSize, ress.dictBufferSize);
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
params.fParams.checksumFlag = g_checksumFlag;
|
||||
params.fParams.noDictIDFlag = !g_dictIDFlag;
|
||||
@ -375,8 +375,8 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
||||
}
|
||||
|
||||
|
||||
/*! FIO_compressFilename_internal() :
|
||||
* same as FIO_compressFilename_extRess(), with ress.destFile already opened (typically stdout)
|
||||
/*! FIO_compressFilename_srcFile() :
|
||||
* note : ress.destFile already opened
|
||||
* @return : 0 : compression completed correctly,
|
||||
* 1 : missing or pb opening srcFileName
|
||||
*/
|
||||
@ -417,7 +417,7 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
|
||||
|
||||
result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, cLevel);
|
||||
|
||||
if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
|
||||
if (fclose(ress.dstFile)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; }
|
||||
if (result!=0) remove(dstFileName); /* remove operation artefact */
|
||||
return result;
|
||||
}
|
||||
@ -429,13 +429,13 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
|
||||
clock_t const start = clock();
|
||||
|
||||
cRess_t const ress = FIO_createCResources(dictFileName);
|
||||
int const issueWithSrcFile = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
|
||||
FIO_freeCResources(ress);
|
||||
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
|
||||
|
||||
{ double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
|
||||
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
|
||||
DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
|
||||
}
|
||||
return issueWithSrcFile;
|
||||
|
||||
FIO_freeCResources(ress);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
#include <sys/timeb.h> /* timeb */
|
||||
#include <string.h> /* strcmp */
|
||||
#include <time.h> /* clock_t */
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue */
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
|
||||
#include "zstd.h" /* ZSTD_VERSION_STRING, ZSTD_getErrorCode */
|
||||
#include "zdict.h" /* ZDICT_trainFromBuffer */
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
@ -109,9 +109,9 @@ static unsigned FUZ_highbit32(U32 v32)
|
||||
}
|
||||
|
||||
|
||||
#define CHECKTEST(var, fn) size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
|
||||
#define CHECK(fn) { CHECKTEST(err, fn); }
|
||||
#define CHECKPLUS(var, fn, more) { CHECKTEST(var, fn); more; }
|
||||
#define CHECK_V(var, fn) size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
|
||||
#define CHECK(fn) { CHECK_V(err, fn); }
|
||||
#define CHECKPLUS(var, fn, more) { CHECK_V(var, fn); more; }
|
||||
static int basicUnitTests(U32 seed, double compressibility)
|
||||
{
|
||||
size_t const CNBuffSize = 5 MB;
|
||||
@ -216,10 +216,8 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
|
||||
{ size_t const testSize = CNBuffSize / 3;
|
||||
{ ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize);
|
||||
ZSTD_frameParameters const fPar = { 1 , 0 , 0 };
|
||||
ZSTD_parameters p;
|
||||
p.cParams = cPar; p.fParams = fPar;
|
||||
{ ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
|
||||
p.fParams.contentSizeFlag = 1;
|
||||
CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
|
||||
}
|
||||
CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig) );
|
||||
@ -277,10 +275,8 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++);
|
||||
{ ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ };
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize);
|
||||
ZSTD_parameters p;
|
||||
p.cParams = cParams; p.fParams = fParams;
|
||||
{ ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize);
|
||||
p.fParams.noDictIDFlag = 1;
|
||||
cSize = ZSTD_compress_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
|
||||
CNBuffer, CNBuffSize,
|
||||
dictBuffer, dictSize, p);
|
||||
@ -320,6 +316,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
static const size_t blockSize = 100 KB;
|
||||
static const size_t dictSize = 16 KB;
|
||||
size_t cSize2;
|
||||
|
||||
/* basic block compression */
|
||||
DISPLAYLEVEL(4, "test%3i : Block compression test : ", testNb++);
|
||||
@ -330,7 +327,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : Block decompression test : ", testNb++);
|
||||
CHECK( ZSTD_decompressBegin(dctx) );
|
||||
{ CHECKTEST(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
|
||||
{ CHECK_V(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
|
||||
if (r != blockSize) goto _output_error; }
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
@ -339,11 +336,15 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
|
||||
cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize);
|
||||
if (ZSTD_isError(cSize2)) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : Dictionary Block decompression test : ", testNb++);
|
||||
CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
|
||||
{ CHECKTEST( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
|
||||
{ CHECK_V( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
|
||||
if (r != blockSize) goto _output_error; }
|
||||
{ CHECK_V( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+blockSize, CNBuffSize, (char*)compressedBuffer+cSize, cSize2) );
|
||||
if (r != blockSize) goto _output_error; }
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
@ -361,7 +362,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
sampleSize += 96 KB;
|
||||
cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
{ CHECKTEST(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
|
||||
{ CHECK_V(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
|
||||
if (regenSize!=sampleSize) goto _output_error; }
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
}
|
||||
@ -370,12 +371,12 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
#define ZEROESLENGTH 100
|
||||
DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
|
||||
memset(CNBuffer, 0, ZEROESLENGTH);
|
||||
{ CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
|
||||
{ CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
|
||||
cSize = r; }
|
||||
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
|
||||
{ CHECKTEST(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
|
||||
{ CHECK_V(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
|
||||
if (r != ZEROESLENGTH) goto _output_error; }
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
@ -403,13 +404,13 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
|
||||
} }}
|
||||
DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++);
|
||||
{ CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
|
||||
{ CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
|
||||
CNBuffer, _3BYTESTESTLENGTH, 19) );
|
||||
cSize = r; }
|
||||
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
|
||||
{ CHECKTEST(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
|
||||
{ CHECK_V(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
|
||||
if (r != _3BYTESTESTLENGTH) goto _output_error; }
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
|
@ -381,13 +381,9 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
|
||||
{ size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
|
||||
dict = srcBuffer + dictStart;
|
||||
}
|
||||
{ ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
|
||||
U32 const checksum = FUZ_rand(&lseed) & 1;
|
||||
U32 const noDictIDFlag = FUZ_rand(&lseed) & 1;
|
||||
ZSTD_frameParameters const fPar = { 0, checksum, noDictIDFlag };
|
||||
ZSTD_parameters params;
|
||||
params.cParams = cPar;
|
||||
params.fParams = fPar;
|
||||
{ ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize);
|
||||
params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
|
||||
params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
|
||||
{ size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, 0);
|
||||
CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
|
||||
} } }
|
||||
|
@ -33,7 +33,8 @@ It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
|
||||
It also features a very fast decoder, with speed > 500 MB/s per core.
|
||||
|
||||
\fBzstd\fR command line is generally similar to gzip, but features the following differences :
|
||||
- Original files are preserved
|
||||
- Source files are preserved by default
|
||||
It's possible to remove them automatically by using \fB--rm\fR command
|
||||
- By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
|
||||
Use \fB-q\fR to turn them off
|
||||
|
||||
@ -57,6 +58,19 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
|
||||
.BR \-f ", " --force
|
||||
overwrite output without prompting
|
||||
.TP
|
||||
.BR \-c ", " --stdout
|
||||
force write to standard output, even if it is the console
|
||||
.TP
|
||||
.BR \--rm
|
||||
remove source file(s) after successful compression or decompression
|
||||
.TP
|
||||
.BR \-k ", " --keep
|
||||
keep source file(s) after successful compression or decompression.
|
||||
This is the default behavior.
|
||||
.TP
|
||||
.BR \-r
|
||||
operate recursively on directories
|
||||
.TP
|
||||
.BR \-h/\-H ", " --help
|
||||
display help/long help and exit
|
||||
.TP
|
||||
@ -69,9 +83,6 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
|
||||
.BR \-q ", " --quiet
|
||||
suppress warnings and notifications; specify twice to suppress errors too
|
||||
.TP
|
||||
.BR \-c ", " --stdout
|
||||
force write to standard output, even if it is the console
|
||||
.TP
|
||||
.BR \-C ", " --check
|
||||
add integrity check computed from uncompressed data
|
||||
.TP
|
||||
|
@ -115,6 +115,7 @@ static int usage(const char* programName)
|
||||
DISPLAY( " -D file: use `file` as Dictionary \n");
|
||||
DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
|
||||
DISPLAY( " -f : overwrite output without prompting \n");
|
||||
DISPLAY( "--rm : remove source file(s) after successful de/compression \n");
|
||||
DISPLAY( " -h/-H : display help/long help and exit\n");
|
||||
return 0;
|
||||
}
|
||||
@ -132,7 +133,6 @@ static int usage_advanced(const char* programName)
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
DISPLAY( " -r : operate recursively on directories\n");
|
||||
#endif
|
||||
DISPLAY( "--rm : remove source files after successful de/compression \n");
|
||||
#ifndef ZSTD_NOCOMPRESS
|
||||
DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n");
|
||||
DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
|
||||
@ -181,7 +181,7 @@ static void waitEnter(void)
|
||||
/*! readU32FromChar() :
|
||||
@return : unsigned integer value reach from input in `char` format
|
||||
Will also modify `*stringPtr`, advancing it to position where it stopped reading.
|
||||
Note : this function can overflow if result > MAX_UNIT */
|
||||
Note : this function can overflow if result > MAX_UINT */
|
||||
static unsigned readU32FromChar(const char** stringPtr)
|
||||
{
|
||||
unsigned result = 0;
|
||||
@ -254,7 +254,7 @@ int main(int argCount, const char** argv)
|
||||
if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
|
||||
if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
|
||||
if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
|
||||
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
|
||||
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); continue; }
|
||||
if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
|
||||
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
|
||||
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
|
||||
@ -265,13 +265,17 @@ int main(int argCount, const char** argv)
|
||||
if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
|
||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
|
||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; }
|
||||
if (!strcmp(argument, "--keep")) { continue; } /* does nothing, since preserving input is default; for gzip/xz compatibility */
|
||||
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
|
||||
|
||||
/* '-' means stdin/stdout */
|
||||
if (!strcmp(argument, "-")){
|
||||
if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; outFileName=stdoutmark; continue; }
|
||||
}
|
||||
if (!filenameIdx) {
|
||||
filenameIdx=1, filenameTable[0]=stdinmark;
|
||||
outFileName=stdoutmark;
|
||||
displayLevel-=(displayLevel==2);
|
||||
continue;
|
||||
} }
|
||||
|
||||
/* Decode commands (note : aggregated commands are allowed) */
|
||||
if (argument[0]=='-') {
|
||||
@ -300,7 +304,7 @@ int main(int argCount, const char** argv)
|
||||
case 'd': decode=1; argument++; break;
|
||||
|
||||
/* Force stdout, even if stdout==console */
|
||||
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
|
||||
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); argument++; break;
|
||||
|
||||
/* Use file content as dictionary */
|
||||
case 'D': nextEntryIsDictionary = 1; argument++; break;
|
||||
@ -314,8 +318,8 @@ int main(int argCount, const char** argv)
|
||||
/* Quiet mode */
|
||||
case 'q': displayLevel--; argument++; break;
|
||||
|
||||
/* keep source file (default anyway, so useless; for gzip/xz compatibility) */
|
||||
case 'k': argument++; break;
|
||||
/* keep source file (default); for gzip/xz compatibility */
|
||||
case 'k': FIO_setRemoveSrcFile(0); argument++; break;
|
||||
|
||||
/* Checksum */
|
||||
case 'C': argument++; FIO_setChecksumFlag(2); break;
|
||||
|
@ -7,3 +7,4 @@ The following projects are included with the zstd distribution:
|
||||
- cmake - CMake project contributed by Artyom Dymchenko
|
||||
- VS2008 - Visual Studio 2008 project
|
||||
- VS2010 - Visual Studio 2010 project (which also works well with Visual Studio 2012, 2013, 2015)
|
||||
- build - command line scripts prepared for Visual Studio compilation without IDE
|
||||
|
591
zstd_compression_format.md
Normal file
591
zstd_compression_format.md
Normal file
@ -0,0 +1,591 @@
|
||||
Zstandard Compression Format
|
||||
============================
|
||||
|
||||
### Notices
|
||||
|
||||
Copyright (c) 2016 Yann Collet
|
||||
|
||||
Permission is granted to copy and distribute this document
|
||||
for any purpose and without charge,
|
||||
including translations into other languages
|
||||
and incorporation into compilations,
|
||||
provided that the copyright notice and this notice are preserved,
|
||||
and that any substantive changes or deletions from the original
|
||||
are clearly marked.
|
||||
Distribution of this document is unlimited.
|
||||
|
||||
### Version
|
||||
|
||||
0.0.1 (30/06/2016 - Work in progress - unfinished)
|
||||
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The purpose of this document is to define a lossless compressed data format,
|
||||
that is independent of CPU type, operating system,
|
||||
file system and character set, suitable for
|
||||
File compression, Pipe and streaming compression
|
||||
using the [Zstandard algorithm](http://www.zstandard.org).
|
||||
|
||||
The data can be produced or consumed,
|
||||
even for an arbitrarily long sequentially presented input data stream,
|
||||
using only an a priori bounded amount of intermediate storage,
|
||||
and hence can be used in data communications.
|
||||
The format uses the Zstandard compression method,
|
||||
and optional [xxHash-64 checksum method](http://www.xxhash.org),
|
||||
for detection of data corruption.
|
||||
|
||||
The data format defined by this specification
|
||||
does not attempt to allow random access to compressed data.
|
||||
|
||||
This specification is intended for use by implementers of software
|
||||
to compress data into Zstandard format and/or decompress data from Zstandard format.
|
||||
The text of the specification assumes a basic background in programming
|
||||
at the level of bits and other primitive data representations.
|
||||
|
||||
Unless otherwise indicated below,
|
||||
a compliant compressor must produce data sets
|
||||
that conform to the specifications presented here.
|
||||
It doesn’t need to support all options though.
|
||||
|
||||
A compliant decompressor must be able to decompress
|
||||
at least one working set of parameters
|
||||
that conforms to the specifications presented here.
|
||||
It may also ignore informative fields, such as checksum.
|
||||
Whenever it does not support a parameter defined in the compressed stream,
|
||||
it must produce a non-ambiguous error code and associated error message
|
||||
explaining which parameter is unsupported.
|
||||
|
||||
|
||||
Definitions
|
||||
-----------
|
||||
A content compressed by Zstandard is transformed into a Zstandard __frame__.
|
||||
Multiple frames can be appended into a single file or stream.
|
||||
A frame is totally independent, has a defined beginning and end,
|
||||
and a set of parameters which tells the decoder how to decompress it.
|
||||
|
||||
A frame encapsulates one or multiple __blocks__.
|
||||
Each block can be compressed or not,
|
||||
and has a guaranteed maximum content size, which depends on frame parameters.
|
||||
Unlike frames, each block depends on previous blocks for proper decoding.
|
||||
However, each block can be decompressed without waiting for its successor,
|
||||
allowing streaming operations.
|
||||
|
||||
|
||||
General Structure of Zstandard Frame format
|
||||
-------------------------------------------
|
||||
|
||||
| MagicNb | F. Header | Block | (More blocks) | EndMark |
|
||||
|:-------:|:----------:| ----- | ------------- | ------- |
|
||||
| 4 bytes | 2-14 bytes | | | 3 bytes |
|
||||
|
||||
__Magic Number__
|
||||
|
||||
4 Bytes, Little endian format.
|
||||
Value : 0xFD2FB527
|
||||
|
||||
__Frame Header__
|
||||
|
||||
2 to 14 Bytes, to be detailed in the next part.
|
||||
|
||||
__Data Blocks__
|
||||
|
||||
To be detailed later on.
|
||||
That’s where compressed data is stored.
|
||||
|
||||
__EndMark__
|
||||
|
||||
The flow of blocks ends when the last block header brings an _end signal_ .
|
||||
This last block header may optionally host a __Content Checksum__ .
|
||||
|
||||
__Content Checksum__
|
||||
|
||||
Content Checksum verify that frame content has been regenrated correctly.
|
||||
The content checksum is the result
|
||||
of [xxh64() hash function](https://www.xxHash.com)
|
||||
digesting the original (decoded) data as input, and a seed of zero.
|
||||
Bits from 11 to 32 (included) are extracted to form a 22 bits checksum
|
||||
stored into the last block header.
|
||||
```
|
||||
contentChecksum = (XXH64(content, size, 0) >> 11) & (1<<22)-1);
|
||||
```
|
||||
Content checksum is only present when its associated flag
|
||||
is set in the frame descriptor.
|
||||
Its usage is optional.
|
||||
|
||||
__Frame Concatenation__
|
||||
|
||||
In some circumstances, it may be required to append multiple frames,
|
||||
for example in order to add new data to an existing compressed file
|
||||
without re-framing it.
|
||||
|
||||
In such case, each frame brings its own set of descriptor flags.
|
||||
Each frame is considered independent.
|
||||
The only relation between frames is their sequential order.
|
||||
|
||||
The ability to decode multiple concatenated frames
|
||||
within a single stream or file is left outside of this specification.
|
||||
As an example, the reference `zstd` command line utility is able
|
||||
to decode all concatenated frames in their sequential order,
|
||||
delivering the final decompressed result as if it was a single content.
|
||||
|
||||
|
||||
Frame Header
|
||||
-------------
|
||||
|
||||
| FHD | (WD) | (Content Size) | (dictID) |
|
||||
| ------- | --------- |:--------------:| --------- |
|
||||
| 1 byte | 0-1 byte | 0 - 8 bytes | 0-4 bytes |
|
||||
|
||||
Frame header has a variable size, which uses a minimum of 2 bytes,
|
||||
and up to 14 bytes depending on optional parameters.
|
||||
|
||||
__FHD byte__ (Frame Header Descriptor)
|
||||
|
||||
The first Header's byte is called the Frame Header Descriptor.
|
||||
It tells which other fields are present.
|
||||
Decoding this byte is enough to get the full size of the Frame Header.
|
||||
|
||||
| BitNb | 7-6 | 5 | 4 | 3 | 2 | 1-0 |
|
||||
| ------- | ------ | ------- | ------ | -------- | -------- | -------- |
|
||||
|FieldName| FCSize | Segment | Unused | Reserved | Checksum | dictID |
|
||||
|
||||
In this table, bit 7 is highest bit, while bit 0 is lowest.
|
||||
|
||||
__Frame Content Size flag__
|
||||
|
||||
This is a 2-bits flag (`= FHD >> 6`),
|
||||
specifying if decompressed data size is provided within the header.
|
||||
|
||||
| Value | 0 | 1 | 2 | 3 |
|
||||
| ------- | --- | --- | --- | --- |
|
||||
|FieldSize| 0-1 | 2 | 4 | 8 |
|
||||
|
||||
Value 0 has a double meaning :
|
||||
it either means `0` (size not provided) _if_ the `WD` byte is present,
|
||||
or it means `1` byte (size <= 255 bytes).
|
||||
|
||||
__Single Segment__
|
||||
|
||||
If this flag is set,
|
||||
data shall be regenerated within a single continuous memory segment.
|
||||
In which case, `WD` byte __is not present__,
|
||||
but `Frame Content Size` field necessarily is.
|
||||
|
||||
As a consequence, the decoder must allocate a memory segment
|
||||
of size `>= Frame Content Size`.
|
||||
|
||||
In order to preserve the decoder from unreasonable memory requirement,
|
||||
a decoder can refuse a compressed frame
|
||||
which requests a memory size beyond decoder's authorized range.
|
||||
|
||||
For broader compatibility, decoders are recommended to support
|
||||
memory sizes of 8 MB at least.
|
||||
However, this is merely a recommendation,
|
||||
and each decoder is free to support higher or lower limits,
|
||||
depending on local limitations.
|
||||
|
||||
__Unused bit__
|
||||
|
||||
The value of this bit is unimportant
|
||||
and not interpreted by a decoder compliant with this specification version.
|
||||
It may be used in a future revision,
|
||||
to signal a property which is not required to properly decode the frame.
|
||||
|
||||
__Reserved bit__
|
||||
|
||||
This bit is reserved for some future feature.
|
||||
Its value _must be zero_.
|
||||
A decoder compliant with this specification version must ensure it is not set.
|
||||
This bit may be used in a future revision,
|
||||
to signal a feature that must be interpreted in order to decode the frame.
|
||||
|
||||
__Content checksum flag__
|
||||
|
||||
If this flag is set, a content checksum will be present into the EndMark.
|
||||
The checksum is a 22 bits value extracted from the XXH64() of data.
|
||||
See __Content Checksum__ .
|
||||
|
||||
__Dictionary ID flag__
|
||||
|
||||
This is a 2-bits flag (`= FHD & 3`),
|
||||
telling if a dictionary ID is provided within the header
|
||||
|
||||
| Value | 0 | 1 | 2 | 3 |
|
||||
| ------- | --- | --- | --- | --- |
|
||||
|FieldSize| 0 | 1 | 2 | 4 |
|
||||
|
||||
__WD byte__ (Window Descriptor)
|
||||
|
||||
Provides guarantees on maximum back-reference distance
|
||||
that will be present within compressed data.
|
||||
This information is useful for decoders to allocate enough memory.
|
||||
|
||||
| BitNb | 7-3 | 0-2 |
|
||||
| --------- | -------- | -------- |
|
||||
| FieldName | Exponent | Mantissa |
|
||||
|
||||
Maximum distance is given by the following formulae :
|
||||
```
|
||||
windowLog = 10 + Exponent;
|
||||
windowBase = 1 << windowLog;
|
||||
windowAdd = (windowBase / 8) * Mantissa;
|
||||
windowSize = windowBase + windowAdd;
|
||||
```
|
||||
The minimum window size is 1 KB.
|
||||
The maximum size is (15*(2^38))-1 bytes, which is almost 1.875 TB.
|
||||
|
||||
To properly decode compressed data,
|
||||
a decoder will need to allocate a buffer of at least `windowSize` bytes.
|
||||
|
||||
Note that `WD` byte is optional. It's not present in `single segment` mode.
|
||||
In which case, the maximum back-reference distance is the content size itself,
|
||||
which can be any value from 1 to 2^64-1 bytes (16 EB).
|
||||
|
||||
In order to preserve decoder from unreasonable memory requirements,
|
||||
a decoder can refuse a compressed frame
|
||||
which requests a memory size beyond decoder's authorized range.
|
||||
|
||||
For better interoperability,
|
||||
decoders are recommended to be compatible with window sizes of 8 MB.
|
||||
Encoders are recommended to not request more than 8 MB.
|
||||
It's merely a recommendation though,
|
||||
decoders are free to support larger or lower limits,
|
||||
depending on local limitations.
|
||||
|
||||
__Frame Content Size__
|
||||
|
||||
This is the original (uncompressed) size.
|
||||
This information is optional, and only present if associated flag is set.
|
||||
Content size is provided using 1, 2, 4 or 8 Bytes.
|
||||
Format is Little endian.
|
||||
|
||||
| Field Size | Range |
|
||||
| ---------- | ---------- |
|
||||
| 0 | 0 |
|
||||
| 1 | 0 - 255 |
|
||||
| 2 | 256 - 65791|
|
||||
| 4 | 0 - 2^32-1 |
|
||||
| 8 | 0 - 2^64-1 |
|
||||
|
||||
When field size is 1, 4 or 8 bytes, the value is read directly.
|
||||
When field size is 2, _an offset of 256 is added_.
|
||||
It's allowed to represent a small size (ex: `18`) using the 8-bytes variant.
|
||||
A size of `0` means `content size is unknown`.
|
||||
In which case, the `WD` byte will necessarily be present,
|
||||
and becomes the only hint to determine memory allocation.
|
||||
|
||||
In order to preserve decoder from unreasonable memory requirement,
|
||||
a decoder can refuse a compressed frame
|
||||
which requests a memory size beyond decoder's authorized range.
|
||||
|
||||
__Dictionary ID__
|
||||
|
||||
This is a variable size field, which contains a single ID.
|
||||
It checks if the correct dictionary is used for decoding.
|
||||
Note that this field is optional. If it's not present,
|
||||
it's up to the caller to make sure it uses the correct dictionary.
|
||||
|
||||
Field size depends on __Dictionary ID flag__.
|
||||
1 byte can represent an ID 0-255.
|
||||
2 bytes can represent an ID 0-65535.
|
||||
4 bytes can represent an ID 0-(2^32-1).
|
||||
|
||||
It's allowed to represent a small ID (for example `13`)
|
||||
with a large 4-bytes dictionary ID, losing some efficiency in the process.
|
||||
|
||||
|
||||
Data Blocks
|
||||
-----------
|
||||
|
||||
| B. Header | data |
|
||||
|:---------:| ------ |
|
||||
| 3 bytes | |
|
||||
|
||||
|
||||
__Block Header__
|
||||
|
||||
This field uses 3-bytes, format is __big-endian__.
|
||||
|
||||
The 2 highest bits represent the `block type`,
|
||||
while the remaining 22 bits represent the (compressed) block size.
|
||||
|
||||
There are 4 block types :
|
||||
|
||||
| Value | 0 | 1 | 2 | 3 |
|
||||
| ---------- | ---------- | --- | --- | ------- |
|
||||
| Block Type | Compressed | Raw | RLE | EndMark |
|
||||
|
||||
- Compressed : this is a Zstandard compressed block,
|
||||
detailed in a later part of this specification.
|
||||
"block size" is the compressed size.
|
||||
Decompressed size is unknown,
|
||||
but its maximum possible value is guaranteed (see below)
|
||||
- Raw : this is an uncompressed block.
|
||||
"block size" is the number of bytes to read and copy.
|
||||
- RLE : this is a single byte, repeated N times.
|
||||
In which case, "block size" is the size to regenerate,
|
||||
while the "compressed" block is just 1 byte (the byte to repeat).
|
||||
- EndMark : this is not a block. Signal the end of the frame.
|
||||
The rest of the field may be optionally filled by a checksum
|
||||
(see frame checksum).
|
||||
|
||||
Block sizes must respect a few rules :
|
||||
- In compressed mode, compressed size if always strictly `< contentSize`.
|
||||
- Block decompressed size is necessarily <= maximum back-reference distance .
|
||||
- Block decompressed size is necessarily <= 128 KB
|
||||
|
||||
|
||||
__Data__
|
||||
|
||||
Where the actual data to decode stands.
|
||||
It might be compressed or not, depending on previous field indications.
|
||||
A data block is not necessarily "full" :
|
||||
since an arbitrary “flush” may happen anytime,
|
||||
block content can be any size, up to Block Maximum Size.
|
||||
Block Maximum Size is the smallest of :
|
||||
- Max back-reference distance
|
||||
- 128 KB
|
||||
|
||||
|
||||
Skippable Frames
|
||||
----------------
|
||||
|
||||
| Magic Number | Frame Size | User Data |
|
||||
|:------------:|:----------:| --------- |
|
||||
| 4 bytes | 4 bytes | |
|
||||
|
||||
Skippable frames allow the insertion of user-defined data
|
||||
into a flow of concatenated frames.
|
||||
Its design is pretty straightforward,
|
||||
with the sole objective to allow the decoder to quickly skip
|
||||
over user-defined data and continue decoding.
|
||||
|
||||
Skippable frames defined in this specification are compatible with LZ4 ones.
|
||||
|
||||
|
||||
__Magic Number__ :
|
||||
|
||||
4 Bytes, Little endian format.
|
||||
Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
|
||||
All 16 values are valid to identify a skippable frame.
|
||||
|
||||
__Frame Size__ :
|
||||
|
||||
This is the size, in bytes, of the following User Data
|
||||
(without including the magic number nor the size field itself).
|
||||
4 Bytes, Little endian format, unsigned 32-bits.
|
||||
This means User Data can’t be bigger than (2^32-1) Bytes.
|
||||
|
||||
__User Data__ :
|
||||
|
||||
User Data can be anything. Data will just be skipped by the decoder.
|
||||
|
||||
|
||||
Compressed block format
|
||||
-----------------------
|
||||
This specification details the content of a _compressed block_.
|
||||
A compressed block has a size, which must be known in order to decode it.
|
||||
It also has a guaranteed maximum regenerated size,
|
||||
in order to properly allocate destination buffer.
|
||||
See "Frame format" for more details.
|
||||
|
||||
A compressed block consists of 2 sections :
|
||||
- Literals section
|
||||
- Sequences section
|
||||
|
||||
### Prerequisite
|
||||
For proper decoding, a compressed block requires access to following elements :
|
||||
- Previous decoded blocks, up to a distance of `windowSize`,
|
||||
or all previous blocks in the same frame "single segment" mode.
|
||||
- List of "recent offsets" from previous compressed block.
|
||||
|
||||
|
||||
### Compressed Literals
|
||||
|
||||
Literals are compressed using order-0 huffman compression.
|
||||
During sequence phase, literals will be entangled with match copy operations.
|
||||
All literals are regrouped in the first part of the block.
|
||||
They can be decoded first, and then copied during sequence operations,
|
||||
or they can be decoded on the flow, as needed by sequences.
|
||||
|
||||
| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) |
|
||||
| ------ | ------------------ | ------- | --------- | --------- | --------- |
|
||||
|
||||
Literals can be compressed, or uncompressed.
|
||||
When compressed, an optional tree description can be present,
|
||||
followed by 1 or 4 streams.
|
||||
|
||||
#### Block Literal Header
|
||||
|
||||
Header is in charge of describing precisely how literals are packed.
|
||||
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
|
||||
using big-endian convention.
|
||||
|
||||
| BlockType | sizes format | (compressed size) | regenerated size |
|
||||
| --------- | ------------ | ----------------- | ---------------- |
|
||||
| 2 bits | 1 - 2 bits | 0 - 18 bits | 5 - 20 bits |
|
||||
|
||||
__Block Type__ :
|
||||
|
||||
This is a 2-bits field, describing 4 different block types :
|
||||
|
||||
| Value | 0 | 1 | 2 | 3 |
|
||||
| ---------- | ---------- | ------ | --- | ------- |
|
||||
| Block Type | Compressed | Repeat | Raw | RLE |
|
||||
|
||||
- Compressed : This is a standard huffman-compressed block,
|
||||
starting with a huffman tree description.
|
||||
See details below.
|
||||
- Repeat Stats : This is a huffman-compressed block,
|
||||
using huffman tree from previous huffman-compressed block.
|
||||
Huffman tree description will be skipped.
|
||||
Compressed stream is equivalent to "compressed" block type.
|
||||
- Raw : Literals are stored uncompressed.
|
||||
- RLE : Literals consist of a single byte value repeated N times.
|
||||
|
||||
__Sizes format__ :
|
||||
|
||||
Sizes format are divided into 2 families :
|
||||
|
||||
- For compressed block, it requires to decode both the compressed size
|
||||
and the decompressed size. It will also decode the number of streams.
|
||||
- For Raw or RLE blocks, it's enough to decode the size to regenerate.
|
||||
|
||||
For values spanning several bytes, convention is Big-endian.
|
||||
|
||||
__Sizes format for Raw or RLE block__ :
|
||||
|
||||
- Value : 0x : Regenerated size uses 5 bits (0-31).
|
||||
Total literal header size is 1 byte.
|
||||
`size = h[0] & 31;`
|
||||
- Value : 10 : Regenerated size uses 12 bits (0-4095).
|
||||
Total literal header size is 2 bytes.
|
||||
`size = ((h[0] & 15) << 8) + h[1];`
|
||||
- Value : 11 : Regenerated size uses 20 bits (0-1048575).
|
||||
Total literal header size is 2 bytes.
|
||||
`size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
|
||||
|
||||
Note : it's allowed to represent a short value (ex : `13`)
|
||||
using a long format, accepting the reduced compacity.
|
||||
|
||||
__Sizes format for Compressed Block__ :
|
||||
|
||||
Note : also applicable to "repeat-stats" blocks.
|
||||
- Value : 00 : 4 streams
|
||||
Compressed and regenerated sizes use 10 bits (0-1023)
|
||||
Total literal header size is 3 bytes
|
||||
- Value : 01 : _Single stream_
|
||||
Compressed and regenerated sizes use 10 bits (0-1023)
|
||||
Total literal header size is 3 bytes
|
||||
- Value : 10 : 4 streams
|
||||
Compressed and regenerated sizes use 14 bits (0-16383)
|
||||
Total literal header size is 4 bytes
|
||||
- Value : 10 : 4 streams
|
||||
Compressed and regenerated sizes use 18 bits (0-262143)
|
||||
Total literal header size is 5 bytes
|
||||
|
||||
Compressed and regenerated size fields follow big endian convention.
|
||||
|
||||
#### Huffman Tree description
|
||||
|
||||
This section is only present when block type is _compressed_ (`0`).
|
||||
It describes the different leaf nodes of the huffman tree,
|
||||
and their relative weights.
|
||||
|
||||
##### Representation
|
||||
|
||||
All byte values from zero (included) to last present one (excluded)
|
||||
are represented by `weight` values, from 0 to `maxBits`.
|
||||
Transformation from `weight` to `nbBits` follows this formulae :
|
||||
`nbBits = weight ? maxBits + 1 - weight : 0;` .
|
||||
The last symbol's weight is deduced from previously decoded ones,
|
||||
by completing to the nearest power of 2.
|
||||
This power of 2 gives `maxBits`, the depth of the current tree.
|
||||
|
||||
__Example__ :
|
||||
Let's presume the following huffman tree must be described :
|
||||
|
||||
| Value | 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
| ------ | - | - | - | - | - | - |
|
||||
| nbBits | 1 | 2 | 3 | 0 | 4 | 4 |
|
||||
|
||||
The tree depth is 4, since its smallest element uses 4 bits.
|
||||
Value `5` will not be listed, nor will values above `5`.
|
||||
Values from `0` to `4` will be listed using `weight` instead of `nbBits`.
|
||||
Weight formula is : `weight = nbBits ? maxBits + 1 - nbBits : 0;`
|
||||
It gives the following serie of weights :
|
||||
|
||||
| weight | 4 | 3 | 2 | 0 | 1 |
|
||||
| ------ | - | - | - | - | - |
|
||||
| Value | 0 | 1 | 2 | 3 | 4 |
|
||||
|
||||
The decoder will do the inverse operation :
|
||||
having collected weights of symbols from `0` to `4`,
|
||||
it knows the last symbol, `5`, is present with a non-zero weight.
|
||||
The weight of `5` can be deduced by joining to the nearest power of 2.
|
||||
Sum of 2^(weight-1) (excluding 0) is :
|
||||
8 + 4 + 2 + 0 + 1 = 15
|
||||
Nearest power of 2 is 16.
|
||||
Therefore, `maxBits = 4` and `weight[5] = 1`.
|
||||
It can then proceed to transform back weights into nbBits :
|
||||
`weight = nbBits ? maxBits + 1 - nbBits : 0;` .
|
||||
|
||||
##### Huffman Tree header
|
||||
|
||||
This is a single byte value (0-255), which tells how to decode the tree.
|
||||
|
||||
- if headerByte >= 242 : this is one of 14 pre-defined weight distributions :
|
||||
+ 242 : 1x1 (+ 1x1)
|
||||
+ 243 : 2x1 (+ 1x2)
|
||||
+ 244 : 3x1 (+ 1x1)
|
||||
+ 245 : 4x1 (+ 1x4)
|
||||
+ 246 : 7x1 (+ 1x1)
|
||||
+ 247 : 8x1 (+ 1x8)
|
||||
+ 248 : 15x1 (+ 1x1)
|
||||
+ 249 : 16x1 (+ 1x16)
|
||||
+ 250 : 31x1 (+ 1x1)
|
||||
+ 251 : 32x1 (+ 1x32)
|
||||
+ 252 : 63x1 (+ 1x1)
|
||||
+ 253 : 64x1 (+ 1x64)
|
||||
+ 254 :127x1 (+ 1x1)
|
||||
+ 255 :128x1 (+ 1x128)
|
||||
|
||||
- if headerByte >= 128 : this is a direct representation,
|
||||
where each weight is written directly as a 4 bits field (0-15).
|
||||
The full representation occupies (nbSymbols+1/2) bytes,
|
||||
meaning it uses a last full byte even if nbSymbols is odd.
|
||||
`nbSymbols = headerByte - 127;`
|
||||
|
||||
- if headerByte < 128 :
|
||||
the serie of weights is compressed by FSE.
|
||||
The length of the compressed serie is `headerByte` (0-127).
|
||||
|
||||
##### FSE (Finite State Entropy) compression of huffman weights
|
||||
|
||||
The serie of weights is compressed using standard FSE compression.
|
||||
It's a single bitstream with 2 interleaved states,
|
||||
using a single distribution table.
|
||||
|
||||
To decode an FSE bitstream, it is necessary to know its compressed size.
|
||||
Compressed size is provided by `headerByte`.
|
||||
It's also necessary to know its maximum decompressed size.
|
||||
In this case, it's `255`, since literal values range from `0` to `255`,
|
||||
and the last symbol value is not represented.
|
||||
|
||||
An FSE bitstream starts by a header, describing probabilities distribution.
|
||||
Result will create a Decoding Table.
|
||||
It is necessary to know the maximum accuracy of distribution
|
||||
to properly allocate space for the Table.
|
||||
For a list of huffman weights, this maximum is 8 bits.
|
||||
|
||||
FSE header and bitstreams are described in a separated chapter.
|
||||
|
||||
##### Conversion from weights to huffman prefix codes
|
||||
|
||||
|
||||
|
||||
|
||||
Version changes
|
||||
---------------
|
Loading…
Reference in New Issue
Block a user