Merge pull request #216 from Cyan4973/dev

v0.7.1
This commit is contained in:
Yann Collet 2016-06-21 08:43:04 +02:00 committed by GitHub
commit d000042108
11 changed files with 206 additions and 116 deletions

1
.gitignore vendored
View File

@ -13,6 +13,7 @@
*.dylib
# Executables
zstd
*.exe
*.out
*.app

View File

@ -51,6 +51,7 @@ all:
zstdprogram:
$(MAKE) -C $(PRGDIR)
mv $(PRGDIR)/zstd .
zlibwrapper:
$(MAKE) -C $(ZSTDDIR) all
@ -63,6 +64,7 @@ clean:
@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
@$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID)
@rm -f zstd
@echo Cleaning completed

4
NEWS
View File

@ -1,3 +1,7 @@
v0.7.1
fixed : corruption issue, reported by cj
modified : checksum enabled by default in command line mode
v0.7.0
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
New : Command `--rm`, to remove source file after successful de/compression

View File

@ -246,7 +246,7 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U
size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface, for paramagrill */
{
ZSTD_CCtx* zc = ZSTD_createCCtx();
ZSTD_CCtx* const zc = ZSTD_createCCtx();
ZSTD_parameters params;
memset(&params, 0, sizeof(params));
params.cParams = cParams;
@ -1024,11 +1024,12 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
}
static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{
const BYTE* const pStart = pIn;
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
while ((pIn<pInLimit-(sizeof(size_t)-1))) {
while (pIn < pInLoopLimit) {
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
pIn += ZSTD_NbCommonBytes(diff);
@ -1128,7 +1129,6 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1];
/* init */
ZSTD_resetSeqStore(seqStorePtr);
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
if (offset_1 > maxRep) offset_1 = 0;
@ -1137,35 +1137,34 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mlCode;
size_t offset;
size_t mLength;
size_t const h = ZSTD_hashPtr(ip, hBits, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
hashTable[h] = current; /* update hash table */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
mLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
} else {
if ( (matchIndex <= lowestIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
size_t offset;
if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
ip += ((ip-anchor) >> g_searchStrength) + 1;
continue;
}
mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
mLength = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
offset = ip-match;
while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
ip += mlCode;
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
@ -1177,18 +1176,18 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
size_t const rLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
{ size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH);
ip += rlCode;
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend-base);
cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend-base);
cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend - base) + 1;
cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend - base) + 1;
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
@ -1238,32 +1237,25 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const ilimit = iend - 8;
U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
/* init */
ZSTD_resetSeqStore(seqStorePtr);
/* skip first position to avoid read overflow during repcode match check */
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
ip++;
/* Main Search Loop */
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hBits, mls);
const U32 matchIndex = hashTable[h];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1;
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* repMatch = repBase + repIndex;
size_t mlCode;
U32 offset;
size_t mLength;
hashTable[h] = current; /* update hash table */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < lowestIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@ -1272,16 +1264,17 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
}
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */
U32 offset;
mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
ip += mlCode;
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
@ -1437,7 +1430,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
} }
*smallerPtr = *largerPtr = 0;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
return 1;
}
@ -1571,7 +1564,6 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B
}
/** Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch_extDict (
ZSTD_CCtx* zc,
@ -1743,7 +1735,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* init */
ip += (ip==base);
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_resetSeqStore(seqStorePtr);
{ U32 i;
U32 const maxRep = (U32)(ip-base);
for (i=0; i<ZSTD_REP_INIT; i++) {
@ -1847,7 +1838,7 @@ _storeSequence:
/* Save reps for next block */
{ int i;
for (i=0; i<ZSTD_REP_NUM; i++) {
if (!rep[i]) rep[i] = (U32)(iend - ctx->base); /* in case some zero are left */
if (!rep[i]) rep[i] = (U32)(iend - ctx->base) + 1; /* in case some zero are left */
ctx->savedRep[i] = rep[i];
} }
@ -1913,7 +1904,6 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
{ U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_resetSeqStore(seqStorePtr);
ip += (ip == prefixStart);
/* Match Loop */
@ -2097,11 +2087,7 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr
static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
{
static const ZSTD_blockCompressor blockCompressor[2][6] = {
#if 1
{ ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
#else
{ ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict },
#endif
{ ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
};
@ -2111,8 +2097,9 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
ZSTD_resetSeqStore(&(zc->seqStore));
blockCompressor(zc, src, srcSize);
return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
}
@ -2245,7 +2232,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
/* preemptive overflow correction */
if (zc->lowLimit > (1<<30)) {
U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt);
U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) | (zc->params.cParams.strategy == ZSTD_btopt);
U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1;
U32 const newLowLimit = zc->lowLimit & chainMask; /* preserve position % chainSize */
U32 const correction = zc->lowLimit - newLowLimit;

View File

@ -465,7 +465,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* init */
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_resetSeqStore(seqStorePtr);
ZSTD_rescaleFreqs(seqStorePtr);
ip += (ip==prefixStart);
{ U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
@ -757,7 +756,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
{ U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_resetSeqStore(seqStorePtr);
ZSTD_rescaleFreqs(seqStorePtr);
ip += (ip==prefixStart);

View File

@ -749,7 +749,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream));
seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */
if (MEM_32bits() ||
if (MEM_32bits() |
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream));
/* ANS state update */
@ -765,23 +765,22 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
FORCE_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit_8,
const BYTE** litPtr, const BYTE* const litLimit_w,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
BYTE* const oend_8 = oend-8;
BYTE* const oend_w = oend-WILDCOPY_OVERLENGTH;
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
/* check */
if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */
if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */
/* copy Literals */
ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
@ -821,10 +820,10 @@ size_t ZSTD_execSequence(BYTE* op,
op += 8; match += 8;
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_8) {
ZSTD_wildcopy(op, match, oend_8 - op);
match += oend_8 - op;
op = oend_8;
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
@ -845,7 +844,7 @@ static size_t ZSTD_decompressSequences(
BYTE* const oend = ostart + maxDstSize;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH;
const BYTE* const litEnd = litPtr + dctx->litSize;
FSE_DTable* DTableLL = dctx->LLTable;
FSE_DTable* DTableML = dctx->MLTable;
@ -875,7 +874,7 @@ static size_t ZSTD_decompressSequences(
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
{ seq_t const sequence = ZSTD_decodeSequence(&seqState);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
} }
@ -888,8 +887,8 @@ static size_t ZSTD_decompressSequences(
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
//if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
}
@ -1180,12 +1179,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
}
static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
dctx->dictEnd = dctx->previousDstEnd;
dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
dctx->base = dict;
dctx->previousDstEnd = (const char*)dict + dictSize;
return 0;
}
static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
@ -1237,29 +1237,24 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c
static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
if (dictSize < 8) return ERROR(dictionary_corrupted);
if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
{ U32 const magic = MEM_readLE32(dict);
if (magic != ZSTD_DICT_MAGIC) {
/* pure content mode */
ZSTD_refDictContent(dctx, dict, dictSize);
return 0;
}
dctx->dictID = MEM_readLE32((const char*)dict + 4);
return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
} }
dctx->dictID = MEM_readLE32((const char*)dict + 4);
/* load entropy tables */
dict = (const char*)dict + 8;
dictSize -= 8;
{ size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + eSize;
dictSize -= eSize;
}
/* reference dictionary content */
ZSTD_refDictContent(dctx, dict, dictSize);
return 0;
/* load entropy tables */
dict = (const char*)dict + 8;
dictSize -= 8;
{ size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + eSize;
dictSize -= eSize;
}
/* reference dictionary content */
return ZSTD_refDictContent(dctx, dict, dictSize);
}
@ -1319,7 +1314,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu
}
/*! ZSTD_createDDict() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* Create a digested dictionary, ready to start decompression without startup delay.
* `dict` can be released after `ZSTD_DDict` creation */
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{

1
programs/.gitignore vendored
View File

@ -50,3 +50,4 @@ afl
# Misc files
*.bat
fileTests.sh
dirTest*

View File

@ -137,7 +137,7 @@ static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file ye
void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; }
static U32 g_dictIDFlag = 1;
void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; }
static U32 g_checksumFlag = 0;
static U32 g_checksumFlag = 1;
void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; }
static U32 g_removeSrcFile = 0;
void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }

View File

@ -36,8 +36,14 @@
#include <stdio.h> /* fprintf */
#include <sys/types.h> /* stat */
#include <sys/stat.h> /* stat */
#include "xxhash.h"
#include "zstd.h"
/*===========================================
* Macros
*==========================================*/
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
/** roundTripTest() :
* Compresses `srcBuff` into `compressedBuff`,
* then decompresses `compressedBuff` into `resultBuff`.
@ -51,7 +57,9 @@ static size_t roundTripTest(void* resultBuff, size_t resultBuffCapacity,
const void* srcBuff, size_t srcBuffSize)
{
static const int maxClevel = 19;
int const cLevel = (!srcBuffSize) ? 1 : (*(const unsigned char*)srcBuff) % maxClevel;
size_t const hashLength = MIN(128, srcBuffSize);
unsigned const h32 = XXH32(srcBuff, hashLength, 0);
int const cLevel = h32 % maxClevel;
size_t const cSize = ZSTD_compress(compressedBuff, compressedBuffCapacity, srcBuff, srcBuffSize, cLevel);
if (ZSTD_isError(cSize)) {
fprintf(stderr, "Compression error : %s \n", ZSTD_getErrorName(cSize));

View File

@ -136,7 +136,7 @@ static int usage_advanced(const char* programName)
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n");
DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
DISPLAY( "--check : enable integrity check\n");
DISPLAY( "--[no-]check : integrity check (default:enabled)\n");
#endif
#ifndef ZSTD_NODECOMPRESS
DISPLAY( "--test : test compressed file integrity \n");
@ -257,6 +257,7 @@ int main(int argCount, const char** argv)
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }

View File

@ -7,8 +7,9 @@ import glob
import hashlib
import os
import shutil
import subprocess
import sys
import subprocess
from subprocess import Popen, PIPE
repo_url = 'https://github.com/Cyan4973/zstd.git'
tmp_dir_name = 'tests/versionsTest'
@ -17,17 +18,32 @@ git_cmd = 'git'
test_dat_src = 'README.md'
test_dat = 'test_dat'
head = 'vdevel'
dict_source = 'dict_source'
dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c ./zstd/lib/decompress/*.c ./zstd/lib/dictBuilder/*.c ./zstd/lib/legacy/*.c '
dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h'
def execute(command, print_output=False, print_error=True, param_shell=False):
popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
stdout_lines, stderr_lines = popen.communicate()
stderr_lines = stderr_lines.decode("utf-8")
stdout_lines = stdout_lines.decode("utf-8")
if print_output:
print(stdout_lines)
print(stderr_lines)
if popen.returncode is not None and popen.returncode != 0:
if not print_output and print_error:
print(stderr_lines)
return popen.returncode
def proc(cmd_args, pipe=True, dummy=False):
if dummy:
return
if pipe:
subproc = subprocess.Popen(cmd_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
else:
subproc = subprocess.Popen(cmd_args)
subproc = Popen(cmd_args)
return subproc.communicate()
@ -45,21 +61,53 @@ def get_git_tags():
return tags
def create_dict(tag, dict_source_path):
dict_name = 'dict.' + tag
if not os.path.isfile(dict_name):
cFiles = glob.glob(dict_source_path + "/*.c")
hFiles = glob.glob(dict_source_path + "/*.h")
if tag == 'v0.5.0':
result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
else:
result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
if result == 0:
print(dict_name + ' created')
else:
print('ERROR: creating of ' + dict_name + ' failed')
else:
print(dict_name + ' already exists')
def dict_compress_sample(tag, sample):
dict_name = 'dict.' + tag
DEVNULL = open(os.devnull, 'wb')
if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst')
if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst')
if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst')
if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst')
if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst')
# zstdFiles = glob.glob("*.zst*")
# print(zstdFiles)
print(tag + " : dict compression completed")
def compress_sample(tag, sample):
try:
from subprocess import DEVNULL # py3k
except ImportError:
DEVNULL = open(os.devnull, 'wb')
DEVNULL = open(os.devnull, 'wb')
if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_01_64_' + tag + '.zst')
os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst')
if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_05_64_' + tag + '.zst')
os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst')
if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_09_64_' + tag + '.zst')
os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst')
if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_15_64_' + tag + '.zst')
os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst')
if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0:
os.rename(sample + '.zst', sample + '_18_64_' + tag + '.zst')
os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst')
# zstdFiles = glob.glob("*.zst*")
# print(zstdFiles)
print(tag + " : compression completed")
@ -87,16 +135,16 @@ def remove_duplicates():
def decompress_zst(tag):
dec_error = 0
list_zst = sorted(glob.glob('*.zst'))
try:
from subprocess import DEVNULL # py3k
except ImportError:
DEVNULL = open(os.devnull, 'wb')
list_zst = sorted(glob.glob('*_nodict.zst'))
for file_zst in list_zst:
print(file_zst, end=' ')
print(tag, end=' ')
file_dec = file_zst + '_d64_' + tag + '.dec'
if subprocess.call(['./zstd.' + tag, '-df', file_zst, '-o', file_dec], stderr=DEVNULL) == 0:
if tag <= 'v0.5.0':
params = ['./zstd.' + tag, '-df', file_zst, file_dec]
else:
params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec]
if execute(params) == 0:
if not filecmp.cmp(file_dec, test_dat):
print('ERR !! ')
dec_error = 1
@ -107,12 +155,43 @@ def decompress_zst(tag):
return dec_error
def decompress_dict(tag):
dec_error = 0
list_zst = sorted(glob.glob('*_dictio.zst'))
for file_zst in list_zst:
dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst"
if head in dict_tag: # find vdevel
dict_tag = head
else:
dict_tag = dict_tag[dict_tag.rfind('v'):]
if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
continue
dict_name = 'dict.' + dict_tag
print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ')
file_dec = file_zst + '_d64_' + tag + '.dec'
if tag <= 'v0.5.0':
params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec]
else:
params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec]
if execute(params) == 0:
if not filecmp.cmp(file_dec, test_dat):
print('ERR !! ')
dec_error = 1
else:
print('OK ')
else:
print('command does not work')
dec_error = 1
return dec_error
if __name__ == '__main__':
error_code = 0
base_dir = os.getcwd() + '/..' # /path/to/zstd
tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest
clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd
programs_dir = base_dir + '/programs' # /path/to/zstd/programs
base_dir = os.getcwd() + '/..' # /path/to/zstd
tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest
clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd
dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source
programs_dir = base_dir + '/programs' # /path/to/zstd/programs
os.makedirs(tmp_dir, exist_ok=True)
# since Travis clones limited depth, we should clone full repository
@ -137,6 +216,10 @@ if __name__ == '__main__':
os.makedirs(r_dir, exist_ok=True)
os.chdir(clone_dir)
git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
if tag == 'v0.5.0':
os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
make(['clean', 'dictBuilder'], False)
shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs
make(['clean', 'zstd'], False)
else:
@ -151,15 +234,25 @@ if __name__ == '__main__':
for dec in glob.glob("*.dec"):
os.remove(dec)
# copy *.c and *.h to a temporary directory ("dict_source")
if not os.path.isdir(dict_source_path):
os.mkdir(dict_source_path)
print('cp ' + dict_files + ' ' + dict_source_path)
execute('cp ' + dict_files + ' ' + dict_source_path, param_shell=True)
print('Compress test.dat by all released zstd')
error_code = 0
for tag in tags:
print(tag)
if tag >= 'v0.5.0':
create_dict(tag, dict_source_path)
dict_compress_sample(tag, test_dat)
remove_duplicates()
error_code += decompress_dict(tag)
compress_sample(tag, test_dat)
remove_duplicates()
if tag >= 'v0.5.1':
error_code += decompress_zst(tag)
error_code += decompress_zst(tag)
print('')
print('Enumerate different compressed files')