exposed ZSTD_MAGIC_DICTIONARY in zstd.h

makes it easier to explain ZSTD_dictMode
2017-06-27 13:50:34 -07:00 · 2017-06-27 13:50:34 -07:00 · 7d3816183f
commit 7d3816183f
parent fecc721fd9
6 changed files with 20 additions and 14 deletions
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@ -348,7 +348,7 @@ static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
 <pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a ZSTD encoded frame or skippable frame
  `srcSize` must be at least as large as the frame
-  @return : the compressed size of the frame pointed to by `src`,
+  @return : the compressed size of the frame pointed to by `src`,FUZ_rand(&lseed) & 1)
            suitable to pass to `ZSTD_decompress` or similar,
            or an error code if given invalid input. 
 </p></pre><BR>
@ -482,7 +482,10 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict 
 </p></pre><BR>

-<pre><b>typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e;
+<pre><b>typedef enum { ZSTD_dm_auto=0,        </b>/* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, rawContent otherwize */<b>
+               ZSTD_dm_rawContent,    </b>/* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */<b>
+               ZSTD_dm_fullDict       </b>/* refuses to load a dictionary if it does not respect Zstandard's specification */<b>
+} ZSTD_dictMode_e;
 </b></pre><BR>
 <pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
                                      unsigned byReference, ZSTD_dictMode_e dictMode,
@ -886,7 +889,7 @@ void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 </p></pre><BR>

 <pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);   </b>/* Not ready yet ! <===================================== */<b>
-</b><p>  Reference a prefix (content-only dictionary) to bootstrap next compression job.
+</b><p>  Reference a prefix (raw-content dictionary) for next compression job.
  Decompression will have to use same prefix.
  Prefix is only used once. Tables are discarded at end of compression job.
  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict.
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@ -98,7 +98,6 @@
 *  Common constants
 ***************************************/
 #define ZSTD_OPT_NUM    (1<<12)
-#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */

 #define ZSTD_REP_NUM      3                 /* number of repcodes */
 #define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -3181,7 +3181,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
    if (dictMode==ZSTD_dm_rawContent)
        return ZSTD_loadDictionaryContent(cctx, dict, dictSize);

-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) {
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
        if (dictMode == ZSTD_dm_auto) {
            DEBUGLOG(5, "raw content dictionary detected");
            return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -1884,7 +1884,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 {
    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
    {   U32 const magic = MEM_readLE32(dict);
-        if (magic != ZSTD_DICT_MAGIC) {
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
    }   }
    dctx->dictID = MEM_readLE32((const char*)dict + 4);
@ -1964,7 +1964,7 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
    ddict->entropyPresent = 0;
    if (ddict->dictSize < 8) return 0;
    {   U32 const magic = MEM_readLE32(ddict->dictContent);
-        if (magic != ZSTD_DICT_MAGIC) return 0;   /* pure content mode */
+        if (magic != ZSTD_MAGIC_DICTIONARY) return 0;   /* pure content mode */
    }
    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4);

@ -2083,7 +2083,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
 unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
 {
    if (dictSize < 8) return 0;
-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
    return MEM_readLE32((const char*)dict + 4);
 }

--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
 unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
 {
    if (dictSize < 8) return 0;
-    if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
+    if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
    return MEM_readLE32((const char*)dictBuffer + 4);
 }

@ -865,7 +865,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);

    /* dictionary header */
-    MEM_writeLE32(header, ZSTD_DICT_MAGIC);
+    MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
        U32 const dictID = params.dictID ? params.dictID : compliantID;
@ -917,7 +917,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
    }

    /* add dictionary header (after entropy tables) */
-    MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
+    MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
        U32 const dictID = params.dictID ? params.dictID : compliantID;
--- a/lib/zstd.h
+++ b/lib/zstd.h
@ -360,6 +360,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output
 /* --- Constants ---*/
 #define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437   /* v0.7+ */

 #define ZSTD_WINDOWLOG_MAX_32  27
 #define ZSTD_WINDOWLOG_MAX_64  27
@ -379,7 +380,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output

 #define ZSTD_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
 #define ZSTD_FRAMEHEADERSIZE_MIN  6
-static const size_t ZSTD_frameHeaderSize_prefix = 5;
+static const size_t ZSTD_frameHeaderSize_prefix = 5;  /* minimum input size to know frame header size */
 static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
@ -573,7 +574,10 @@ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter par
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);


-typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e;
+typedef enum { ZSTD_dm_auto=0,        /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, rawContent otherwize */
+               ZSTD_dm_rawContent,    /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+               ZSTD_dm_fullDict       /* refuses to load a dictionary if it does not respect Zstandard's specification */
+} ZSTD_dictMode_e;
 /*! ZSTD_createCDict_advanced() :
 *  Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
@ -1027,7 +1031,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);

 /*! ZSTD_CCtx_refPrefix() :
- *  Reference a prefix (content-only dictionary) to bootstrap next compression job.
+ *  Reference a prefix (raw-content dictionary) for next compression job.
 *  Decompression will have to use same prefix.
 *  Prefix is only used once. Tables are discarded at end of compression job.
 *  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict.