From c46fb924dfc433d448a1c1aea2d49c2c2195f692 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 29 May 2016 05:01:04 +0200 Subject: [PATCH] added dictionary ID (incomplete) --- lib/common/error_private.h | 1 + lib/common/error_public.h | 1 + lib/common/zstd_internal.h | 3 +- lib/common/zstd_static.h | 17 +- lib/compress/zstd_compress.c | 53 ++-- lib/decompress/zstd_decompress.c | 91 ++++-- lib/dictBuilder/zdict.c | 45 ++- programs/fileio.c | 1 + programs/tests/playTests.sh | 4 +- programs/zstdcli.c | 487 ------------------------------- 10 files changed, 136 insertions(+), 567 deletions(-) diff --git a/lib/common/error_private.h b/lib/common/error_private.h index 3f039ae6..6b243c07 100644 --- a/lib/common/error_private.h +++ b/lib/common/error_private.h @@ -106,6 +106,7 @@ ERR_STATIC const char* ERR_getErrorString(ERR_enum code) case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/common/error_public.h b/lib/common/error_public.h index 6fcf802e..660b2d3f 100644 --- a/lib/common/error_public.h +++ b/lib/common/error_public.h @@ -58,6 +58,7 @@ typedef enum { ZSTD_error_maxSymbolValue_tooLarge, ZSTD_error_maxSymbolValue_tooSmall, ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, ZSTD_error_maxCode } ZSTD_ErrorCode; diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 2eea5fee..e0a8a82d 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -63,7 +63,7 @@ #endif #define ZSTD_OPT_NUM (1<<12) -#define ZSTD_DICT_MAGIC 0xEC30A436 +#define ZSTD_DICT_MAGIC 0xEC30A437 #define ZSTD_REP_NUM 3 #define ZSTD_REP_INIT ZSTD_REP_NUM @@ -82,6 +82,7 @@ #define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; #define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; diff --git a/lib/common/zstd_static.h b/lib/common/zstd_static.h index e0f50cf4..d9f619f2 100644 --- a/lib/common/zstd_static.h +++ b/lib/common/zstd_static.h @@ -51,7 +51,7 @@ extern "C" { /*-************************************* * Constants ***************************************/ -#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ +#define ZSTD_MAGICNUMBER 0xFD2FB527 /* v0.7 */ /*-************************************* @@ -87,6 +87,7 @@ typedef struct { typedef struct { U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ + U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */ } ZSTD_frameParameters; typedef struct { @@ -103,11 +104,11 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; } * Advanced functions ***************************************/ /*! ZSTD_createCCtx_advanced() : - * Create a ZSTD compression context using external alloc and free functions */ + * Create a ZSTD compression context using external alloc and free functions */ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); /*! ZSTD_createDCtx_advanced() : - * Create a ZSTD decompression context using external alloc and free functions */ + * Create a ZSTD decompression context using external alloc and free functions */ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API unsigned ZSTD_maxCLevel (void); @@ -191,10 +192,14 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci You can then reuse ZSTD_CCtx to compress some new frame. */ -typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams; +typedef struct { + U64 frameContentSize; + U32 windowLog; + U32 dictID; +} ZSTD_frameParams; -#define ZSTD_FRAMEHEADERSIZE_MAX 13 /* for static allocation */ -static const size_t ZSTD_frameHeaderSize_min = 5; +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +static const size_t ZSTD_frameHeaderSize_min = 6; static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 91f6bb79..c7ba0305 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -99,6 +99,7 @@ struct ZSTD_CCtx_s U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 loadedDictEnd; U32 stage; /* 0: created; 1: init,dictLoad; 2:started */ + U32 dictID; ZSTD_parameters params; void* workSpace; size_t workSpaceSize; @@ -144,7 +145,7 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) if (!ctx) return NULL; memset(ctx, 0, sizeof(ZSTD_CCtx)); - ctx->customAlloc = customMem.customAlloc; + ctx->customAlloc = customMem.customAlloc; ctx->customFree = customMem.customFree; return ctx; } @@ -229,6 +230,7 @@ size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface { ZSTD_CCtx* zc = ZSTD_createCCtx(); ZSTD_parameters params; + memset(¶ms, 0, sizeof(params)); params.cParams = cParams; params.fParams.contentSizeFlag = 1; ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0); @@ -300,6 +302,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; zc->stage = 1; + zc->dictID = 0; zc->loadedDictEnd = 0; return 0; @@ -328,7 +331,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); } - /* copy dictionary pointers */ + /* copy dictionary offsets */ dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; dstCCtx->nextSrc = srcCCtx->nextSrc; @@ -337,6 +340,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) dstCCtx->dictLimit = srcCCtx->dictLimit; dstCCtx->lowLimit = srcCCtx->lowLimit; dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + dstCCtx->dictID = srcCCtx->dictID; /* copy entropy tables */ dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; @@ -2092,27 +2096,40 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - ZSTD_parameters params, U64 pledgedSrcSize) + ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) { BYTE* const op = (BYTE*)dst; U32 const fcsId = params.fParams.contentSizeFlag ? (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) : /* 0-3 */ 0; - BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ + BYTE const fAllocByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ | (fcsId << 6) ); - size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; - if (hSize > dstCapacity) return ERROR(dstSize_tooSmall); + U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + BYTE const fCheckByte = (BYTE)(dictIDSizeCode&3); + size_t pos; + + if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = fdescriptor; + op[4] = fAllocByte; + op[5] = fCheckByte; + pos = 6; + switch(dictIDSizeCode) + { + default: /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)(dictID)); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } switch(fcsId) { default: /* impossible */ case 0 : break; - case 1 : op[5] = (BYTE)(pledgedSrcSize); break; - case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break; - case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break; + case 1 : op[pos] = (BYTE)(pledgedSrcSize); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; } - return hSize; + return pos; } @@ -2126,7 +2143,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, if (zc->stage==0) return ERROR(stage_wrong); if (frame && (zc->stage==1)) { /* copy saved header */ - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize); + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize, zc->dictID); if (ZSTD_isError(fhSize)) return fhSize; dstCapacity -= fhSize; dst = (char*)dst + fhSize; @@ -2284,13 +2301,14 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t * @return : 0, or an error code */ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) { - if ((dict==NULL) || (dictSize<=4)) return 0; + if ((dict==NULL) || (dictSize<=8)) return 0; /* default : dict is pure content */ if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); + zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4); /* known magic number : dict is parsed for entropy stats and content */ - { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; + { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8) + 8; if (ZSTD_isError(eSize)) return eSize; return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); } @@ -2303,7 +2321,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize) { - { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); + { U32 const hashLog3 = (!pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; } { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1); @@ -2330,8 +2348,8 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters params; + memset(¶ms, 0, sizeof(params)); params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - params.fParams.contentSizeFlag = 0; ZSTD_adjustCParams(¶ms.cParams, 0, dictSize); ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); @@ -2358,7 +2376,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) /* special case : empty frame */ if (zc->stage==1) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0); + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0, 0); if (ZSTD_isError(fhSize)) return fhSize; dstCapacity -= fhSize; op += fhSize; @@ -2434,6 +2452,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters params; + memset(¶ms, 0, sizeof(params)); ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); params.cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); params.fParams.contentSizeFlag = 1; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index b670c548..dfcc6e2d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -120,6 +120,7 @@ struct ZSTD_DCtx_s ZSTD_freeFunction customFree; blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ ZSTD_dStage stage; + U32 dictID; U32 flagRepeatTable; const BYTE* litPtr; size_t litBufSize; @@ -140,6 +141,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->dictEnd = NULL; dctx->hufTableX4[0] = HufLog; dctx->flagRepeatTable = 0; + dctx->dictID = 0; return 0; } @@ -153,8 +155,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) { ZSTD_DCtx* dctx; - if (!customMem.customAlloc && !customMem.customFree) - { + if (!customMem.customAlloc && !customMem.customFree) { dctx = (ZSTD_DCtx*) malloc(sizeof(ZSTD_DCtx)); if (!dctx) return NULL; dctx->customAlloc = malloc; @@ -169,7 +170,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) dctx = (ZSTD_DCtx*) customMem.customAlloc(sizeof(ZSTD_DCtx)); if (!dctx) return NULL; - dctx->customAlloc = customMem.customAlloc; + dctx->customAlloc = customMem.customAlloc; dctx->customFree = customMem.customFree; ZSTD_decompressBegin(dctx); @@ -211,12 +212,23 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) /* Frame descriptor - 1 byte, using : + 1 byte - Alloc : bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) - bit 4 : minmatch 4(0) or 3(1) + bit 4 : reserved for windowLog (must be zero) bit 5 : reserved (must be zero) bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes + 1 byte - checker : + bit 0-1 : dictID (0, 1, 2 or 4 bytes) + bit 2-7 : reserved (must be zero) + + Optional : dictID (0, 1, 2 or 4 bytes) + Automatic adaptation + 0 : no dictID + 1 : 1 - 255 + 2 : 256 - 65535 + 4 : all other values + Optional : content size (0, 1, 2 or 8 bytes) 0 : unknown 1 : 0-255 bytes @@ -297,8 +309,10 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) { if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); - { U32 const fcsId = (((const BYTE*)src)[4]) >> 6; - return ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; } + { U32 const fcsId = (((const BYTE*)src)[4]) >> 6; + U32 const dictID =(((const BYTE*)src)[5]) & 3; + return ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId] + ZSTD_did_fieldSize[dictID]; + } } @@ -319,16 +333,27 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t if (srcSize < fhsize) return fhsize; } memset(fparamsPtr, 0, sizeof(*fparamsPtr)); - { BYTE const frameDesc = ip[4]; - fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported); /* reserved 1 bit */ - switch(frameDesc >> 6) /* fcsId */ + { BYTE const allocByte = ip[4]; + BYTE const checkByte = ip[5]; + size_t pos = ZSTD_frameHeaderSize_min; + U32 const dictIDSizeCode = checkByte&3; + fparamsPtr->windowLog = (allocByte & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if ((allocByte & 0x30) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */ + switch(dictIDSizeCode) /* fcsId */ + { + default: /* impossible */ + case 0 : fparamsPtr->dictID = 0; break; + case 1 : fparamsPtr->dictID = ip[pos]; pos++; break; + case 2 : fparamsPtr->dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : fparamsPtr->dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(allocByte >> 6) /* fcsId */ { default: /* impossible */ case 0 : fparamsPtr->frameContentSize = 0; break; - case 1 : fparamsPtr->frameContentSize = ip[5]; break; - case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break; - case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break; + case 1 : fparamsPtr->frameContentSize = ip[pos]; break; + case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+pos); break; } } return 0; } @@ -1021,6 +1046,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); if (ZSTD_isError(result)) return result; + if (dctx->dictID != dctx->fParams.dictID) return ERROR(dictionary_wrong); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -1120,25 +1146,29 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - size_t eSize; - U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { - /* pure content mode */ + if (dictSize < 8) return ERROR(dictionary_corrupted); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_DICT_MAGIC) { + /* pure content mode */ + ZSTD_refDictContent(dctx, dict, dictSize); + return 0; + } + dctx->dictID = MEM_readLE32((const char*)dict + 4); + + /* load entropy tables */ + dict = (const char*)dict + 8; + dictSize -= 8; + { size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + + /* reference dictionary content */ ZSTD_refDictContent(dctx, dict, dictSize); + return 0; } - /* load entropy tables */ - dict = (const char*)dict + 4; - dictSize -= 4; - eSize = ZSTD_loadEntropy(dctx, dict, dictSize); - if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); - - /* reference dictionary content */ - dict = (const char*)dict + eSize; - dictSize -= eSize; - ZSTD_refDictContent(dctx, dict, dictSize); - - return 0; } @@ -1154,4 +1184,3 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t return 0; } - diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 95d291f4..1d373225 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -61,11 +61,11 @@ #include "fse.h" #include "huf_static.h" #include "zstd_internal.h" +#include "xxhash.h" #include "divsufsort.h" #include "zdict_static.h" - /*-************************************* * Constants ***************************************/ @@ -104,8 +104,7 @@ static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length) { const BYTE* const b = (const BYTE*)ptr; size_t u; - for (u=0; u126) c = '.'; /* non-printable char */ DISPLAYLEVEL(dlevel, "%c", c); @@ -198,8 +197,12 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch) { const char* const pStart = (const char*)pIn; for (;;) { - size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; } + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); return (size_t)((const char*)pIn - pStart); } @@ -346,9 +349,8 @@ static dictItem ZDICT_analyzePos( maxLength = i; /* reduce maxLength in case of final into repetitive data */ - { - U32 l = (U32)maxLength; - BYTE c = b[pos + maxLength-1]; + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; while (b[pos+l-2]==c) l--; maxLength = l; } @@ -367,12 +369,10 @@ static dictItem ZDICT_analyzePos( solution.savings = savings[maxLength]; /* mark positions done */ - { - U32 id; - U32 testedPos; + { U32 id; for (id=start; idpos; + U32 const max = table->pos; U32 u; if (!id) return; /* protection, should never happen */ for (u=id; upos; if (nextElt >= maxSize) nextElt = maxSize-1; current = nextElt-1; @@ -530,8 +529,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, DISPLAYLEVEL(2, "finding patterns ... \n"); DISPLAYLEVEL(3, "minimum ratio : %u \n", minRatio); - { - U32 cursor; for (cursor=0; cursor < bufferSize; ) { + { U32 cursor; for (cursor=0; cursor < bufferSize; ) { dictItem solution; if (doneMarks[cursor]) { cursor++; continue; } solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio); @@ -542,8 +540,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, } } /* limit dictionary size */ - { - U32 max = dictList->pos; /* convention : nb of useful elts within dictList */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ U32 currentSize = 0; U32 n; for (n=1; n>11)); + } + hSize = 8; /* entropic tables */ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ DISPLAYLEVEL(2, "statistics ... \n"); - hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4, + hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, maxDictSize-hSize, compressionLevel, samplesBuffer, sampleSizes, nbSamples, (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); @@ -946,4 +946,3 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, samplesBuffer, samplesSizes, nbSamples, params); } - diff --git a/programs/fileio.c b/programs/fileio.c index eba90d01..d436401d 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -308,6 +308,7 @@ static int FIO_compressFilename_internal(cRess_t ress, /* init */ { ZSTD_parameters params; + memset(¶ms, 0, sizeof(params)); params.cParams = ZSTD_getCParams(cLevel, fileSize, ress.dictBufferSize); params.fParams.contentSizeFlag = 1; if (g_maxWLog) if (params.cParams.windowLog > g_maxWLog) params.cParams.windowLog = g_maxWLog; diff --git a/programs/tests/playTests.sh b/programs/tests/playTests.sh index 479ffeca..0c165dba 100755 --- a/programs/tests/playTests.sh +++ b/programs/tests/playTests.sh @@ -124,9 +124,9 @@ $ECHO "\n**** dictionary tests **** " ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2 diff -q tmp1 tmp2 $ZSTD --train *.c *.h -o tmpDict -$ZSTD zstd_compress.c -D tmpDict -of tmp +$ZSTD zstdcli.c -D tmpDict -of tmp $ZSTD -d tmp -D tmpDict -of result -diff zstd_compress.c result +diff zstdcli.c result $ECHO "\n**** multiple files tests **** " diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 68dd98cb..e69de29b 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1,487 +0,0 @@ -/* - zstdcli - Command Line Interface (cli) for zstd - Copyright (C) Yann Collet 2014-2016 - - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - You can contact the author at : - - zstd homepage : http://www.zstd.net/ -*/ -/* - Note : this is a user program, not part of libzstd. - The license of libzstd is BSD. - The license of this command line program is GPLv2. -*/ - - -/*-************************************ -* Includes -**************************************/ -#include "util.h" /* Compiler options, UTIL_HAS_CREATEFILELIST */ -#include /* strcmp, strlen */ -#include /* toupper */ -#include "fileio.h" -#ifndef ZSTD_NOBENCH -# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ -#endif -#include "zstd_static.h" /* ZSTD_maxCLevel, ZSTD version numbers */ -#ifndef ZSTD_NODICT -# include "dibio.h" -#endif - - - -/*-************************************ -* OS-specific Includes -**************************************/ -#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) -# include /* _isatty */ -# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) -#else -#if _POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _POSIX_SOURCE -# include /* isatty */ -# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) -#else -# define IS_CONSOLE(stdStream) 0 -#endif -#endif - - -/*-************************************ -* Constants -**************************************/ -#define COMPRESSOR_NAME "zstd command line interface" -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "v" ZSTD_VERSION_STRING -#endif -#define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR - -#define ZSTD_EXTENSION ".zst" -#define ZSTD_CAT "zstdcat" -#define ZSTD_UNZSTD "unzstd" - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -static const char* g_defaultDictName = "dictionary"; -static const unsigned g_defaultMaxDictSize = 110 KB; -static const unsigned g_defaultDictCLevel = 5; -static const unsigned g_defaultSelectivityLevel = 9; - - -/*-************************************ -* Display Macros -**************************************/ -#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } -static FILE* displayOut; -static unsigned displayLevel = 2; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ - - -/*-************************************ -* Command Line -**************************************/ -static int usage(const char* programName) -{ - DISPLAY( "Usage :\n"); - DISPLAY( " %s [args] [FILE(s)] [-o file]\n", programName); - DISPLAY( "\n"); - DISPLAY( "FILE : a filename\n"); - DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); - DISPLAY( "Arguments :\n"); -#ifndef ZSTD_NOCOMPRESS - DISPLAY( " -# : # compression level (1-%u, default:1) \n", ZSTD_maxCLevel()); -#endif -#ifndef ZSTD_NODECOMPRESS - DISPLAY( " -d : decompression \n"); -#endif - DISPLAY( " -D file: use `file` as Dictionary \n"); - DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); - DISPLAY( " -f : overwrite output without prompting \n"); - DISPLAY( " -h/-H : display help/long help and exit\n"); - return 0; -} - -static int usage_advanced(const char* programName) -{ - DISPLAY(WELCOME_MESSAGE); - usage(programName); - DISPLAY( "\n"); - DISPLAY( "Advanced arguments :\n"); - DISPLAY( " -V : display Version number and exit\n"); - DISPLAY( " -t : test compressed file integrity \n"); - DISPLAY( " -v : verbose mode\n"); - DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); - DISPLAY( " -c : force write to standard output, even if it is the console\n"); -#ifdef UTIL_HAS_CREATEFILELIST - DISPLAY( " -r : operate recursively on directories\n"); -#endif -#ifndef ZSTD_NOCOMPRESS - DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n"); -#endif - DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); -#ifndef ZSTD_NODICT - DISPLAY( "\n"); - DISPLAY( "Dictionary builder :\n"); - DISPLAY( "--train : create a dictionary from a training set of files \n"); - DISPLAY( " -o file: `file` is dictionary name (default: %s) \n", g_defaultDictName); - DISPLAY( "--maxdict:limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); - DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel); -#endif -#ifndef ZSTD_NOBENCH - DISPLAY( "\n"); - DISPLAY( "Benchmark arguments :\n"); - DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); - DISPLAY( " -e# : test all compression levels from -bX to # (default: 1)\n"); - DISPLAY( " -i# : iteration loops [1-9](default : 3)\n"); - DISPLAY( " -B# : cut file into independent blocks of size # (default: no block)\n"); -#endif - return 0; -} - -static int badusage(const char* programName) -{ - DISPLAYLEVEL(1, "Incorrect parameters\n"); - if (displayLevel >= 1) usage(programName); - return 1; -} - - -static void waitEnter(void) -{ - int unused; - DISPLAY("Press enter to continue...\n"); - unused = getchar(); - (void)unused; -} - - -#define CLEAN_RETURN(i) { operationResult = (i); goto _end; } - -int main(int argCount, const char** argv) -{ - int argNb, - bench=0, - decode=0, - forceStdout=0, - main_pause=0, - nextEntryIsDictionary=0, - operationResult=0, - dictBuild=0, - nextArgumentIsOutFileName=0, - nextArgumentIsMaxDict=0; - unsigned cLevel = 1; - unsigned cLevelLast = 1; - unsigned recursive = 0; - const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ - unsigned filenameIdx = 0; - const char* programName = argv[0]; - const char* outFileName = NULL; - const char* dictFileName = NULL; - char* dynNameSpace = NULL; - unsigned maxDictSize = g_defaultMaxDictSize; - unsigned dictCLevel = g_defaultDictCLevel; - unsigned dictSelect = g_defaultSelectivityLevel; -#ifdef UTIL_HAS_CREATEFILELIST - const char** fileNamesTable = NULL; - char* fileNamesBuf = NULL; - unsigned fileNamesNb; -#endif - - /* init */ - (void)recursive; (void)cLevelLast; (void)dictCLevel; /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */ - (void)decode; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */ - if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); } - filenameTable[0] = stdinmark; - displayOut = stderr; - /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */ - { size_t pos; - for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } } - programName += pos; - } - - /* preset behaviors */ - if (!strcmp(programName, ZSTD_UNZSTD)) decode=1; - if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; } - - /* command switches */ - for(argNb=1; argNb='0') && (*argument<='9')) { - cLevel = 0; - while ((*argument >= '0') && (*argument <= '9')) { - cLevel *= 10; - cLevel += *argument - '0'; - argument++; - } - dictCLevel = cLevel; - if (dictCLevel > ZSTD_maxCLevel()) - CLEAN_RETURN(badusage(programName)); - continue; - } -#endif - - switch(argument[0]) - { - /* Display help */ - case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); /* Version Only */ - case 'H': - case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); - - /* Decoding */ - case 'd': decode=1; argument++; break; - - /* Force stdout, even if stdout==console */ - case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break; - - /* Use file content as dictionary */ - case 'D': nextEntryIsDictionary = 1; argument++; break; - - /* Overwrite */ - case 'f': FIO_overwriteMode(); forceStdout=1; argument++; break; - - /* Verbose mode */ - case 'v': displayLevel=4; argument++; break; - - /* Quiet mode */ - case 'q': displayLevel--; argument++; break; - - /* keep source file (default anyway, so useless; for gzip/xz compatibility) */ - case 'k': argument++; break; - - /* test compressed file */ - case 't': decode=1; outFileName=nulmark; FIO_overwriteMode(); argument++; break; - - /* dictionary name */ - case 'o': nextArgumentIsOutFileName=1; argument++; break; - - /* recursive */ - case 'r': recursive=1; argument++; break; - -#ifndef ZSTD_NOBENCH - /* Benchmark */ - case 'b': bench=1; argument++; break; - - /* range bench (benchmark only) */ - case 'e': - /* compression Level */ - argument++; - if ((*argument>='0') && (*argument<='9')) { - cLevelLast = 0; - while ((*argument >= '0') && (*argument <= '9')) - cLevelLast *= 10, cLevelLast += *argument++ - '0'; - } - break; - - /* Modify Nb Iterations (benchmark only) */ - case 'i': - { U32 iters= 0; - argument++; - while ((*argument >='0') && (*argument <='9')) - iters *= 10, iters += *argument++ - '0'; - BMK_setNotificationLevel(displayLevel); - BMK_SetNbIterations(iters); - } - break; - - /* cut input into blocks (benchmark only) */ - case 'B': - { size_t bSize = 0; - argument++; - while ((*argument >='0') && (*argument <='9')) - bSize *= 10, bSize += *argument++ - '0'; - if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */ - if (toupper(*argument)=='M') bSize<<=20, argument++; - if (toupper(*argument)=='B') argument++; - BMK_setNotificationLevel(displayLevel); - BMK_SetBlockSize(bSize); - } - break; -#endif /* ZSTD_NOBENCH */ - - /* Selection level */ - case 's': argument++; - dictSelect = 0; - while ((*argument >= '0') && (*argument <= '9')) - dictSelect *= 10, dictSelect += *argument++ - '0'; - break; - - /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ - case 'p': argument++; -#ifndef ZSTD_NOBENCH - if ((*argument>='0') && (*argument<='9')) { - int additionalParam = 0; - while ((*argument >= '0') && (*argument <= '9')) - additionalParam *= 10, additionalParam += *argument++ - '0'; - BMK_setAdditionalParam(additionalParam); - } else -#endif - main_pause=1; - break; - /* unknown command */ - default : CLEAN_RETURN(badusage(programName)); - } - } - continue; - } /* if (argument[0]=='-') */ - - if (nextEntryIsDictionary) { - nextEntryIsDictionary = 0; - dictFileName = argument; - continue; - } - - if (nextArgumentIsOutFileName) { - nextArgumentIsOutFileName = 0; - outFileName = argument; - if (!strcmp(outFileName, "-")) outFileName = stdoutmark; - continue; - } - - if (nextArgumentIsMaxDict) { - nextArgumentIsMaxDict = 0; - maxDictSize = 0; - while ((*argument>='0') && (*argument<='9')) - maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++; - if (toupper(*argument)=='K') maxDictSize <<= 10; - continue; - } - - /* add filename to list */ - filenameTable[filenameIdx++] = argument; - } - - /* Welcome message (if verbose) */ - DISPLAYLEVEL(3, WELCOME_MESSAGE); - -#ifdef UTIL_HAS_CREATEFILELIST - if (recursive) { - fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb); - if (fileNamesTable) { - unsigned i; - for (i=0; i use stdin and stdout */ - filenameIdx += !filenameIdx; /*< default input is stdin */ - if (!strcmp(filenameTable[0], stdinmark) && !outFileName ) outFileName = stdoutmark; /*< when input is stdin, default output is stdout */ - - /* Check if input/output defined as console; trigger an error in this case */ - if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName)); - if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !(forceStdout && decode)) - CLEAN_RETURN(badusage(programName)); - - /* user-selected output filename, only possible with a single file */ - if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) { - DISPLAY("Too many files (%u) on the command line. \n", filenameIdx); - CLEAN_RETURN(filenameIdx); - } - - /* No warning message in pipe mode (stdin + stdout) or multiple mode */ - if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; - if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1; - - /* IO Stream/File */ - FIO_setNotificationLevel(displayLevel); -#ifndef ZSTD_NOCOMPRESS - if (!decode) { - if (filenameIdx==1 && outFileName) - operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel); - else - operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName, cLevel); - } else -#endif - { /* decompression */ -#ifndef ZSTD_NODECOMPRESS - if (filenameIdx==1 && outFileName) - operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName); - else - operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName); -#else - DISPLAY("Decompression not supported\n"); -#endif - } - -_end: - if (main_pause) waitEnter(); - free(dynNameSpace); -#ifdef UTIL_HAS_CREATEFILELIST - if (fileNamesTable) - UTIL_freeFileList(fileNamesTable, fileNamesBuf); - else -#endif - free((void*)filenameTable); - return operationResult; -}