From 831b4890ce7db8c3771e2ef9bfc2b5c929b5ac2b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 23 Feb 2017 23:09:10 -0800 Subject: [PATCH 1/3] minor tests/Makefile refactoring and update of zstd_manual,html --- doc/zstd_manual.html | 13 ++++++++----- lib/compress/zstdmt_compress.c | 10 +++++----- lib/zstd.h | 2 +- tests/.gitignore | 2 ++ tests/Makefile | 34 ++++++++++++++++++++++++++-------- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 23224d77..02656c23 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -34,7 +34,7 @@ zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. - Levels >= 20, labelled `--ultra`, should be used with caution, as they require more memory. + Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. Compression can be done in: - a single step (described as Simple API) - a single step, reusing a context (described as Explicit memory management) @@ -306,8 +306,8 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v

Compressed size functions


 
-
size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize);
-

`src` should point to the start of a ZSTD encoded frame +

size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+

`src` should point to the start of a ZSTD encoded frame or skippable frame `srcSize` must be at least as large as the frame @return : the compressed size of the frame pointed to by `src`, suitable to pass to `ZSTD_decompress` or similar, or an error code if given invalid input. @@ -321,7 +321,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. @return : decompressed size of the frame pointed to be `src` if known, otherwise - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - - ZSTD_CONTENTSIZE_ERROR if an error occured (e.g. invalid magic number, srcSize too small) + - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)


unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
@@ -365,7 +365,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
 


typedef enum {
-    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
+    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
 } ZSTD_CCtxParameter;
 

size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
@@ -585,6 +585,9 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo
   c) Frame Content - any content (User Data) of length equal to Frame Size
   For skippable frames ZSTD_decompressContinue() always returns 0.
   For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+    Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content.
+           For purposes of decompression, it is valid in both cases to skip the frame using
+           ZSTD_findFrameCompressedSize to find its size in bytes.
   It also returns Frame Size as fparamsPtr->frameContentSize.
 
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 97d2b38e..f32c334a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -231,16 +231,16 @@ void ZSTDMT_compressChunk(void* jobDescription) const void* const src = (const char*)job->srcStart + job->dictSize; buffer_t const dstBuff = job->dstBuff; DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); - if (job->cdict) { + if (job->cdict) { /* should only happen for first segment */ size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize); if (job->cdict) DEBUGLOG(3, "using CDict "); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } - } else { - size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); + } else { /* srcStart points at reloaded section */ + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); } - if (!job->firstChunk) { /* flush frame header */ + if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */ size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } ZSTD_invalidateRepCodes(job->cctx); @@ -248,7 +248,7 @@ void ZSTDMT_compressChunk(void* jobDescription) DEBUGLOG(4, "Compressing : "); DEBUG_PRINTHEX(4, job->srcStart, 12); - job->cSize = (job->lastChunk) ? /* last chunk signal */ + job->cSize = (job->lastChunk) ? ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize); DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); diff --git a/lib/zstd.h b/lib/zstd.h index e597c5db..f3580544 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -462,7 +462,7 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); typedef enum { - ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/ + ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */ } ZSTD_CCtxParameter; /*! ZSTD_setCCtxParameter() : * Set advanced parameters, selected through enum ZSTD_CCtxParameter diff --git a/tests/.gitignore b/tests/.gitignore index dc468dee..f408a749 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -9,6 +9,8 @@ zbufftest32 zbufftest-dll zstreamtest zstreamtest32 +zstreamtest_asan +zstreamtest_tsan zstreamtest-dll datagen paramgrill diff --git a/tests/Makefile b/tests/Makefile index 5b0e29c6..4fae769d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -35,22 +35,28 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c -ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c +ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c -ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) +ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c +ZSTD_OBJ := $(patsubst %.c,%.o, $(wildcard $(ZSTD_FILES)) ) +ZBUFF_OBJ := $(patsubst %.c,%.o, $(wildcard $(ZBUFF_FILES)) ) +ZDICT_OBJ := $(patsubst %.c,%.o, $(wildcard $(ZDICT_FILES)) ) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) EXT =.exe -MULTITHREAD = -DZSTD_MULTITHREAD +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = else EXT = -MULTITHREAD = -pthread -DZSTD_MULTITHREAD +MULTITHREAD_CPP = -DZSTD_MULTITHREAD +MULTITHREAD_LD = -lpthread endif +MULTITHREAD = $(MULTITHREAD_CPP) $(MULTITHREAD_LD) VOID = /dev/null ZSTREAM_TESTTIME = -T2mn @@ -124,11 +130,23 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c $(MAKE) -C $(ZSTDDIR) libzstd $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) -zstreamtest : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c - $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) +ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c +zstreamtest : CPPFLAGS += $(MULTITHREAD_CPP) +zstreamtest : LDFLAGS += $(MULTITHREAD_LD) +zstreamtest : $(ZSTREAMFILES) + $(CC) $(FLAGS) $^ -o $@$(EXT) -zstreamtest32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c - $(CC) -m32 $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) +zstreamtest32 : CFLAGS += -m32 +zstreamtest32 : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest_asan : CFLAGS += -fsanitize=address +zstreamtest_asan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) + +zstreamtest_tsan : CFLAGS += -fsanitize=thread +zstreamtest_tsan : $(ZSTREAMFILES) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zstreamtest.c From 14312d833e1249de7bb0caa916d83993d2fa33be Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 23 Feb 2017 23:42:12 -0800 Subject: [PATCH 2/3] zstdmt : fix : loading prefix from previous segments There used to be a (very small) chance that loading prefix from previous segment would be confused with a real zstd dictionary. For that to happen, the prefix needs to start with the same value as dictionary magic. That's 1 chance in 4 billions if all values have equal probability. But in fact, since some values are more common (0x00000000 for example) others are less common, and dictionary magic was selected to be one of them, so probabilities are likely even lower. Anyway, this risk is no down to zero by adding a new CCtx parameter : ZSTD_p_forceRawDict Current parameter policy : the parameter "stick" to its CCtx, so any dictionary loading after ZSTD_p_forceRawDict is set will be loaded in "raw" ("content only") mode, even if CCtx is re-used multiple times with multiple different dictionary. It's up to the user to reset this value differently if it needs so. --- doc/zstd_manual.html | 3 ++- lib/compress/zstd_compress.c | 7 +++++-- lib/compress/zstdmt_compress.c | 3 ++- lib/zstd.h | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 02656c23..77e8974d 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -365,7 +365,8 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v


typedef enum {
-    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceWindow,   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceRawDict   /* Force loading dictionary in "content-only" mode (no header analysis) */
 } ZSTD_CCtxParameter;
 

size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 0e0f9d37..d684e6a0 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -62,6 +62,7 @@ struct ZSTD_CCtx_s {
     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
     U32   loadedDictEnd;    /* index of end of dictionary */
     U32   forceWindow;      /* force back-references to respect limit of 1<forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
+    case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
     default: return ERROR(parameter_unknown);
     }
 }
@@ -2613,8 +2615,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
 {
     if ((dict==NULL) || (dictSize<=8)) return 0;
 
-    /* default : dict is pure content */
-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
+    /* dict as pure content */
+    if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
+        return ZSTD_loadDictionaryContent(zc, dict, dictSize);
     zc->dictID = zc->params.fParams.noDictIDFlag ? 0 :  MEM_readLE32((const char*)dict+4);
 
     /* known magic number : dict is parsed for entropy stats and content */
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index f32c334a..483ea157 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -236,8 +236,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
         if (job->cdict) DEBUGLOG(3, "using CDict ");
         if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
     } else {  /* srcStart points at reloaded section */
+        size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1);  /* Force loading dictionary in "content-only" mode (no header analysis) */
         size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
-        if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+        if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
         ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
     }
     if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first segment */
diff --git a/lib/zstd.h b/lib/zstd.h
index f3580544..7cca3690 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -462,7 +462,8 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
 
 typedef enum {
-    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceWindow,   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
+    ZSTD_p_forceRawDict   /* Force loading dictionary in "content-only" mode (no header analysis) */
 } ZSTD_CCtxParameter;
 /*! ZSTD_setCCtxParameter() :
  *  Set advanced parameters, selected through enum ZSTD_CCtxParameter

From df9f9296e3b1eaed41cc0bbc4cf0e5b935d321ed Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 24 Feb 2017 00:16:05 -0800
Subject: [PATCH 3/3] attempt to fix pthreat linking error

replacing -lpthread by -pthread
---
 tests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index 4fae769d..30b2a04a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -54,7 +54,7 @@ MULTITHREAD_LD  =
 else
 EXT =
 MULTITHREAD_CPP = -DZSTD_MULTITHREAD
-MULTITHREAD_LD  = -lpthread
+MULTITHREAD_LD  = -pthread
 endif
 MULTITHREAD = $(MULTITHREAD_CPP) $(MULTITHREAD_LD)