From 7d3816183f5332eb2de75ef14ac9371173311bb7 Mon Sep 17 00:00:00 2001
From: Yann Collet `src` should point to the start of a ZSTD encoded frame or skippable frame
`srcSize` must be at least as large as the frame
- @return : the compressed size of the frame pointed to by `src`,
+ @return : the compressed size of the frame pointed to by `src`,FUZ_rand(&lseed) & 1)
suitable to pass to `ZSTD_decompress` or similar,
or an error code if given invalid input.
size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
@@ -482,7 +482,10 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict
typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e; +typedef enum { ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, rawContent otherwize */ + ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */ +} ZSTD_dictMode_e;
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_dictMode_e dictMode, @@ -886,7 +889,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! <===================================== */ -Reference a prefix (content-only dictionary) to bootstrap next compression job. +
Reference a prefix (raw-content dictionary) for next compression job. Decompression will have to use same prefix. Prefix is only used once. Tables are discarded at end of compression job. If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict. diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 4e216d72..bcf93e58 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -98,7 +98,6 @@ * Common constants ***************************************/ #define ZSTD_OPT_NUM (1<<12) -#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ #define ZSTD_REP_NUM 3 /* number of repcodes */ #define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b95b4770..b6b92f32 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3181,7 +3181,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, if (dictMode==ZSTD_dm_rawContent) return ZSTD_loadDictionaryContent(cctx, dict, dictSize); - if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) { + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictMode == ZSTD_dm_auto) { DEBUGLOG(5, "raw content dictionary detected"); return ZSTD_loadDictionaryContent(cctx, dict, dictSize); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e823c3db..61f766e2 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1884,7 +1884,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict { if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); { U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { + if (magic != ZSTD_MAGIC_DICTIONARY) { return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ } } dctx->dictID = MEM_readLE32((const char*)dict + 4); @@ -1964,7 +1964,7 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict) ddict->entropyPresent = 0; if (ddict->dictSize < 8) return 0; { U32 const magic = MEM_readLE32(ddict->dictContent); - if (magic != ZSTD_DICT_MAGIC) return 0; /* pure content mode */ + if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */ } ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4); @@ -2083,7 +2083,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) { if (dictSize < 8) return 0; - if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; return MEM_readLE32((const char*)dict + 4); } diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 943ddde0..04bfe661 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) { if (dictSize < 8) return 0; - if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; return MEM_readLE32((const char*)dictBuffer + 4); } @@ -865,7 +865,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); /* dictionary header */ - MEM_writeLE32(header, ZSTD_DICT_MAGIC); + MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY); { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; U32 const dictID = params.dictID ? params.dictID : compliantID; @@ -917,7 +917,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo } /* add dictionary header (after entropy tables) */ - MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC); + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; U32 const dictID = params.dictID ? params.dictID : compliantID; diff --git a/lib/zstd.h b/lib/zstd.h index 49a5b791..2b901870 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -360,6 +360,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output /* --- Constants ---*/ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */ #define ZSTD_WINDOWLOG_MAX_32 27 #define ZSTD_WINDOWLOG_MAX_64 27 @@ -379,7 +380,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ #define ZSTD_FRAMEHEADERSIZE_MIN 6 -static const size_t ZSTD_frameHeaderSize_prefix = 5; +static const size_t ZSTD_frameHeaderSize_prefix = 5; /* minimum input size to know frame header size */ static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ @@ -573,7 +574,10 @@ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter par ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); -typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e; +typedef enum { ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, rawContent otherwize */ + ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */ +} ZSTD_dictMode_e; /*! ZSTD_createCDict_advanced() : * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, @@ -1027,7 +1031,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (content-only dictionary) to bootstrap next compression job. + * Reference a prefix (raw-content dictionary) for next compression job. * Decompression will have to use same prefix. * Prefix is only used once. Tables are discarded at end of compression job. * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict.