Merge remote-tracking branch 'upstream/dev' into dev

This commit is contained in:
Paul Cruz 2017-06-28 09:45:54 -07:00
commit d67212ab97
8 changed files with 449 additions and 49 deletions

View File

@ -224,10 +224,10 @@ asan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address" $(MAKE) -C $(TESTDIR) $*
msan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer" # datagen.c fails this test for no obvious reason
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason
msan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) $*
LDFLAGS=-fuse-ld=gold MOREFLAGS="-fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
asan32: clean
$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address"

View File

@ -139,16 +139,13 @@ all: zstd
$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
zstd xzstd zstd4 xzstd4 : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP)
zstd xzstd zstd4 xzstd4 : LDFLAGS += $(THREAD_LD) $(ZLIBLD)
xzstd xzstd4 : CPPFLAGS += $(LZMACPP)
xzstd xzstd4 : LDFLAGS += $(LZMALD)
zstd4 xzstd4 : CPPFLAGS += $(LZ4CPP)
zstd4 xzstd4 : LDFLAGS += $(LZ4LD)
zstd zstd4 : LZMA_MSG := - xz/lzma support is disabled
zstd xzstd : LZ4_MSG := - lz4 support is disabled
zstd xzstd zstd4 xzstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd xzstd zstd4 xzstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
zstd zstd4 : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP)
zstd zstd4 : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD)
zstd4 : CPPFLAGS += $(LZ4CPP)
zstd4 : LDFLAGS += $(LZ4LD)
zstd : LZ4_MSG := - lz4 support is disabled
zstd zstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd zstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
@echo "$(THREAD_MSG)"
@echo "$(ZLIB_MSG)"
@echo "$(LZMA_MSG)"
@ -181,6 +178,11 @@ zstd-nogz : ZLIBLD :=
zstd-nogz : ZLIB_MSG := - gzip support is disabled
zstd-nogz : zstd
zstd-noxz : LZMACPP :=
zstd-noxz : LZMALD :=
zstd-noxz : LZMA_MSG := - xz/lzma support is disabled
zstd-noxz : zstd
zstd-pgo : MOREFLAGS = -fprofile-generate
zstd-pgo : clean zstd

View File

@ -24,13 +24,23 @@ There are however other Makefile targets that create different variations of CLI
- __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format.
This is done through command `--format=gzip`.
Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior.
.gz support is automatically enabled when `zlib` library is detected at build time.
It's possible to disable .gz support, by either compiling `zstd-nogz` target or using HAVE_ZLIB=0 variable.
`.gz` support is automatically enabled when `zlib` library is detected at build time.
It's possible to disable `.gz` support, by either compiling `zstd-nogz` target or using HAVE_ZLIB=0 variable.
Example : make zstd HAVE_ZLIB=0
It's also possible to force compilation with zlib support, using HAVE_ZLIB=1.
In which case, linking stage will fail if `zlib` library cannot be found.
This might be useful to prevent silent feature disabling.
- __HAVE_LZMA__ : `zstd` can compress and decompress files in `.xz` and `.lzma` formats.
This is done through commands `--format=xz` and `--format=lzma` respectively.
Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
`.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
It's possible to disable `.xz` and `.lzma` support, by either compiling `zstd-noxz` target or using HAVE_LZMA=0 variable.
Example : make zstd HAVE_LZMA=0
It's also possible to force compilation with lzma support, using HAVE_LZMA=1.
In which case, linking stage will fail if `lzma` library cannot be found.
This might be useful to prevent silent feature disabling.
#### Aggregation of parameters
CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.

View File

@ -559,7 +559,7 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
strm.next_in = 0;
strm.avail_in = 0;
strm.next_out = ress->dstBuffer;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
while (1) {
@ -567,7 +567,7 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) action = LZMA_FINISH;
inFileSize += inSize;
strm.next_in = ress->srcBuffer;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = inSize;
}
@ -580,7 +580,7 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
EXM_THROW(73, "Write error : cannot write to output file");
outFileSize += compBytes;
strm.next_out = ress->dstBuffer;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
if (!srcFileSize)
@ -1490,16 +1490,16 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
EXM_THROW(71, "zstd: %s: lzma_alone_decoder/lzma_stream_decoder error %d",
srcFileName, ret);
strm.next_out = ress->dstBuffer;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
strm.next_in = ress->srcBuffer;
for ( ; ; ) {
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
strm.next_in = ress->srcBuffer;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
}
ret = lzma_code(&strm, action);
@ -1515,7 +1515,7 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
EXM_THROW(31, "Write error : cannot write to output file");
outFileSize += decompBytes;
strm.next_out = ress->dstBuffer;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
if (ret == LZMA_STREAM_END) break;

View File

@ -56,7 +56,9 @@
#define ZSTD_GUNZIP "gunzip"
#define ZSTD_GZCAT "gzcat"
#define ZSTD_LZMA "lzma"
#define ZSTD_UNLZMA "unlzma"
#define ZSTD_XZ "xz"
#define ZSTD_UNXZ "unxz"
#define KB *(1 <<10)
#define MB *(1 <<20)
@ -379,7 +381,9 @@ int main(int argCount, const char* argv[])
if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat */
if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like unlzma */
if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like unxz */
memset(&compressionParams, 0, sizeof(compressionParams));
/* command switches */

View File

@ -180,7 +180,7 @@ legacy : CPPFLAGS+= -I$(ZSTDDIR)/legacy
legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) decodecorpus.c
decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) $(ZDICT_FILES) decodecorpus.c
$(CC) $(FLAGS) $^ -o $@$(EXT) -lm
symbols : symbols.c
@ -272,7 +272,7 @@ endif
test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zstream32
test-all: test test32 valgrindTest
test-all: test test32 valgrindTest test-decodecorpus-cli
test-zstd: ZSTD = $(PRGDIR)/zstd
test-zstd: zstd zstd-playTests
@ -342,6 +342,39 @@ test-legacy: legacy
test-decodecorpus: decodecorpus
$(QEMU_SYS) ./decodecorpus -t $(DECODECORPUS_TESTTIME)
test-decodecorpus-cli: decodecorpus
@echo "\n ---- decodecorpus basic cli tests ----"
@mkdir testdir
./decodecorpus -n5 -otestdir -ptestdir
@cd testdir && \
$(ZSTD) -d z000000.zst -o tmp0 && \
$(ZSTD) -d z000001.zst -o tmp1 && \
$(ZSTD) -d z000002.zst -o tmp2 && \
$(ZSTD) -d z000003.zst -o tmp3 && \
$(ZSTD) -d z000004.zst -o tmp4 && \
diff z000000 tmp0 && \
diff z000001 tmp1 && \
diff z000002 tmp2 && \
diff z000003 tmp3 && \
diff z000004 tmp4 && \
rm ./* && \
cd ..
@echo "\n ---- decodecorpus dictionary cli tests ----"
./decodecorpus -n5 -otestdir -ptestdir --use-dict=1MB
@cd testdir && \
$(ZSTD) -d z000000.zst -D dictionary -o tmp0 && \
$(ZSTD) -d z000001.zst -D dictionary -o tmp1 && \
$(ZSTD) -d z000002.zst -D dictionary -o tmp2 && \
$(ZSTD) -d z000003.zst -D dictionary -o tmp3 && \
$(ZSTD) -d z000004.zst -D dictionary -o tmp4 && \
diff z000000 tmp0 && \
diff z000001 tmp1 && \
diff z000002 tmp2 && \
diff z000003 tmp3 && \
diff z000004 tmp4 && \
cd ..
@rm -rf testdir
test-pool: pool
$(QEMU_SYS) ./pool

View File

@ -18,6 +18,8 @@
#include "zstd.h"
#include "zstd_internal.h"
#include "mem.h"
#define ZDICT_STATIC_LINKING_ONLY
#include "zdict.h"
// Direct access to internal compression functions is required
#include "zstd_compress.c"
@ -229,6 +231,12 @@ typedef struct {
cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
} frame_t;
typedef struct {
int useDict;
U32 dictID;
size_t dictContentSize;
BYTE* dictContent;
} dictInfo;
/*-*******************************************************
* Generator Functions
*********************************************************/
@ -238,7 +246,7 @@ struct {
} opts; /* advanced options on generation */
/* Generate and write a random frame header */
static void writeFrameHeader(U32* seed, frame_t* frame)
static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
{
BYTE* const op = frame->data;
size_t pos = 0;
@ -304,15 +312,26 @@ static void writeFrameHeader(U32* seed, frame_t* frame)
pos += 4;
{
/*
* fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
* singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
* contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
* dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
* For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
*/
int const dictBits = info.useDict ? 3 : 0;
BYTE const frameHeaderDescriptor =
(BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2));
(BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
op[pos++] = frameHeaderDescriptor;
}
if (!singleSegment) {
op[pos++] = windowByte;
}
if (info.useDict) {
MEM_writeLE32(op + pos, (U32) info.dictID);
pos += 4;
}
if (contentSizeFlag) {
switch (fcsCode) {
default: /* Impossible */
@ -603,7 +622,7 @@ static inline void initSeqStore(seqStore_t *seqStore) {
/* Randomly generate sequence commands */
static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
size_t contentSize, size_t literalsSize)
size_t contentSize, size_t literalsSize, dictInfo info)
{
/* The total length of all the matches */
size_t const remainingMatch = contentSize - literalsSize;
@ -627,7 +646,6 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
}
DISPLAYLEVEL(5, " total match lengths: %u\n", (U32)remainingMatch);
for (i = 0; i < numSequences; i++) {
/* Generate match and literal lengths by exponential distribution to
* ensure nice numbers */
@ -652,14 +670,33 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
memcpy(srcPtr, literals, literalLen);
srcPtr += literalLen;
do {
if (RAND(seed) & 7) {
/* do a normal offset */
U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
offset = (RAND(seed) %
MIN(frame->header.windowSize,
(size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
1;
if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
/* need to occasionally generate offsets that go past the start */
/* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
if (offset > frame->header.windowSize) {
if (lenPastStart < MIN_SEQ_LEN) {
/* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
/* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
/* make sure lenPastStart does not go past dictionary start though */
lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
}
{
U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
matchLen = MIN(matchLen, matchLenBound);
}
}
}
offsetCode = offset + ZSTD_REP_MOVE;
repIndex = 2;
} else {
@ -675,11 +712,20 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
repIndex = MIN(2, offsetCode + 1);
}
}
} while (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart) || offset == 0);
} while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
{ size_t j;
{
size_t j;
BYTE* const dictEnd = info.dictContent + info.dictContentSize;
for (j = 0; j < matchLen; j++) {
*srcPtr = *(srcPtr-offset);
if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
/* copy from dictionary instead of literals */
size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
*srcPtr = *(dictEnd - dictOffset);
}
else {
*srcPtr = *(srcPtr-offset);
}
srcPtr++;
}
}
@ -929,7 +975,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
}
static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
size_t literalsSize)
size_t literalsSize, dictInfo info)
{
seqStore_t seqStore;
size_t numSequences;
@ -938,14 +984,14 @@ static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
initSeqStore(&seqStore);
/* randomly generate sequences */
numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize);
numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
/* write them out to the frame data */
CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
return numSequences;
}
static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize)
static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
{
BYTE* const blockStart = (BYTE*)frame->data;
size_t literalsSize;
@ -957,7 +1003,7 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize);
nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize);
nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq);
@ -965,7 +1011,7 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
}
static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
int lastBlock)
int lastBlock, dictInfo info)
{
int const blockTypeDesc = RAND(seed) % 8;
size_t blockSize;
@ -1005,7 +1051,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
frame->oldStats = frame->stats;
frame->data = op;
compressedSize = writeCompressedBlock(seed, frame, contentSize);
compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
if (compressedSize > contentSize) {
blockType = 0;
memcpy(op, frame->src, contentSize);
@ -1031,7 +1077,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
frame->data = op;
}
static void writeBlocks(U32* seed, frame_t* frame)
static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
{
size_t contentLeft = frame->header.contentSize;
size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize);
@ -1054,7 +1100,7 @@ static void writeBlocks(U32* seed, frame_t* frame)
}
}
writeBlock(seed, frame, blockContentSize, lastBlock);
writeBlock(seed, frame, blockContentSize, lastBlock, info);
contentLeft -= blockContentSize;
if (lastBlock) break;
@ -1119,20 +1165,102 @@ static void initFrame(frame_t* fr)
}
/* Return the final seed */
static U32 generateFrame(U32 seed, frame_t* fr)
static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
{
/* generate a complete frame */
DISPLAYLEVEL(1, "frame seed: %u\n", seed);
initFrame(fr);
writeFrameHeader(&seed, fr);
writeBlocks(&seed, fr);
writeFrameHeader(&seed, fr, info);
writeBlocks(&seed, fr, info);
writeChecksum(fr);
return seed;
}
/*_*******************************************************
* Dictionary Helper Functions
*********************************************************/
/* returns 0 if successful, otherwise returns 1 upon error */
static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
/* allocate space for samples */
int ret = 0;
unsigned const numSamples = 4;
size_t sampleSizes[4];
BYTE* const samples = malloc(5000*sizeof(BYTE));
if (samples == NULL) {
DISPLAY("Error: could not allocate space for samples\n");
return 1;
}
/* generate samples */
{
unsigned literalValue = 1;
unsigned samplesPos = 0;
size_t currSize = 1;
while (literalValue <= 4) {
sampleSizes[literalValue - 1] = currSize;
{
size_t k;
for (k = 0; k < currSize; k++) {
*(samples + (samplesPos++)) = (BYTE)literalValue;
}
}
literalValue++;
currSize *= 16;
}
}
{
/* create variables */
size_t dictWriteSize = 0;
ZDICT_params_t zdictParams;
size_t const headerSize = dictSize/4;
size_t const dictContentSize = dictSize - dictSize/4;
BYTE* const dictContent = fullDict + headerSize;
if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
DISPLAY("Error: dictionary size is too small\n");
ret = 1;
goto exitGenRandomDict;
}
/* init dictionary params */
memset(&zdictParams, 0, sizeof(zdictParams));
zdictParams.dictID = dictID;
zdictParams.notificationLevel = 1;
/* fill in dictionary content */
RAND_buffer(&seed, (void*)dictContent, dictContentSize);
/* finalize dictionary with random samples */
dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
dictContent, dictContentSize,
samples, sampleSizes, numSamples,
zdictParams);
if (ZDICT_isError(dictWriteSize)) {
DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
ret = 1;
}
}
exitGenRandomDict:
free(samples);
return ret;
}
static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
/* allocate space statically */
dictInfo dictOp;
memset(&dictOp, 0, sizeof(dictOp));
dictOp.useDict = useDict;
dictOp.dictContentSize = dictContentSize;
dictOp.dictContent = dictContent;
dictOp.dictID = dictID;
return dictOp;
}
/*-*******************************************************
* Test Mode
*********************************************************/
@ -1194,6 +1322,63 @@ cleanup:
return ret;
}
static size_t testDecodeWithDict(U32 seed, size_t dictSize)
{
/* create variables */
U32 const dictID = RAND(&seed);
size_t errorDetected = 0;
BYTE* const fullDict = malloc(dictSize);
if (fullDict == NULL) {
return ERROR(GENERIC);
}
/* generate random dictionary */
{
int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
if (ret != 0) {
errorDetected = ERROR(GENERIC);
goto dictTestCleanup;
}
}
{
frame_t fr;
/* generate frame */
{
size_t const dictContentSize = dictSize-dictSize/4;
BYTE* const dictContent = fullDict+dictSize/4;
dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
seed = generateFrame(seed, &fr, info);
}
/* manually decompress and check difference */
{
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
{
size_t const returnValue = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
fullDict, dictSize);
if (ZSTD_isError(returnValue)) {
errorDetected = returnValue;
goto dictTestCleanup;
}
}
if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
errorDetected = ERROR(corruption_detected);
goto dictTestCleanup;
}
ZSTD_freeDCtx(dctx);
}
}
dictTestCleanup:
free(fullDict);
return errorDetected;
}
static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS)
{
unsigned fnum;
@ -1213,7 +1398,10 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS
else
DISPLAYUPDATE("\r%u ", fnum);
seed = generateFrame(seed, &fr);
{
dictInfo const info = initDictInfo(0, 0, NULL, 0);
seed = generateFrame(seed, &fr, info);
}
{ size_t const r = testDecodeSimple(&fr);
if (ZSTD_isError(r)) {
@ -1229,6 +1417,15 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS
return 1;
}
}
{
/* don't create a dictionary that is too big */
size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN;
size_t const r = testDecodeWithDict(seed, dictSize);
if (ZSTD_isError(r)) {
DISPLAY("Error in dictionary mode on test seed %u: %s\n", seed+fnum, ZSTD_getErrorName(r));
return 1;
}
}
}
DISPLAY("\r%u tests completed: ", fnum);
@ -1248,7 +1445,10 @@ static int generateFile(U32 seed, const char* const path,
DISPLAY("seed: %u\n", seed);
generateFrame(seed, &fr);
{
dictInfo const info = initDictInfo(0, 0, NULL, 0);
generateFrame(seed, &fr, info);
}
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
if (origPath) {
@ -1270,7 +1470,10 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
seed = generateFrame(seed, &fr);
{
dictInfo const info = initDictInfo(0, 0, NULL, 0);
seed = generateFrame(seed, &fr, info);
}
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
@ -1292,6 +1495,92 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
return 0;
}
static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
const char* const origPath, const size_t dictSize)
{
char outPath[MAX_PATH];
BYTE* fullDict;
U32 const dictID = RAND(&seed);
int errorDetected = 0;
if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
return 1;
}
/* allocate space for the dictionary */
fullDict = malloc(dictSize);
if (fullDict == NULL) {
DISPLAY("Error: could not allocate space for full dictionary.\n");
return 1;
}
/* randomly generate the dictionary */
{
int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
if (ret != 0) {
errorDetected = ret;
goto dictCleanup;
}
}
/* write out dictionary */
if (numFiles != 0) {
if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
DISPLAY("Error: dictionary path too long\n");
errorDetected = 1;
goto dictCleanup;
}
outputBuffer(fullDict, dictSize, outPath);
}
else {
outputBuffer(fullDict, dictSize, "dictionary");
}
/* generate random compressed/decompressed files */
{
unsigned fnum;
for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
frame_t fr;
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
{
size_t const dictContentSize = dictSize-dictSize/4;
BYTE* const dictContent = fullDict+dictSize/4;
dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
seed = generateFrame(seed, &fr, info);
}
if (numFiles != 0) {
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
errorDetected = 1;
goto dictCleanup;
}
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
if (origPath) {
if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
errorDetected = 1;
goto dictCleanup;
}
outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
}
}
else {
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
if (origPath) {
outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
}
}
}
}
dictCleanup:
free(fullDict);
return errorDetected;
}
/*_*******************************************************
* Command line
@ -1337,6 +1626,40 @@ static void advancedUsage(const char* programName)
DISPLAY( "\n");
DISPLAY( "Advanced arguments :\n");
DISPLAY( " --content-size : always include the content size in the frame header\n");
DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n");
}
/*! readU32FromChar() :
@return : unsigned integer value read from input in `char` format
allows and interprets K, KB, KiB, M, MB and MiB suffix.
Will also modify `*stringPtr`, advancing it to position where it stopped reading.
Note : function result can overflow if digit string > MAX_UINT */
static unsigned readU32FromChar(const char** stringPtr)
{
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9'))
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
if ((**stringPtr=='K') || (**stringPtr=='M')) {
result <<= 10;
if (**stringPtr=='M') result <<= 10;
(*stringPtr)++ ;
if (**stringPtr=='i') (*stringPtr)++;
if (**stringPtr=='B') (*stringPtr)++;
}
return result;
}
/** longCommandWArg() :
* check if *stringPtr is the same as longCommand.
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
{
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
if (result) *stringPtr += comSize;
return result;
}
int main(int argc, char** argv)
@ -1348,6 +1671,8 @@ int main(int argc, char** argv)
int testMode = 0;
const char* path = NULL;
const char* origPath = NULL;
int useDict = 0;
unsigned dictSize = (10 << 10); /* 10 kB default */
int argNb;
@ -1408,6 +1733,9 @@ int main(int argc, char** argv)
argument++;
if (strcmp(argument, "content-size") == 0) {
opts.contentSize = 1;
} else if (longCommandWArg(&argument, "use-dict=")) {
dictSize = readU32FromChar(&argument);
useDict = 1;
} else {
advancedUsage(argv[0]);
return 1;
@ -1439,9 +1767,13 @@ int main(int argc, char** argv)
return 1;
}
if (numFiles == 0) {
if (numFiles == 0 && useDict == 0) {
return generateFile(seed, path, origPath);
} else {
} else if (useDict == 0){
return generateCorpus(seed, numFiles, path, origPath);
} else {
/* should generate files with a dictionary */
return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize);
}
}

View File

@ -442,6 +442,7 @@ if [ $LZMAMODE -eq 1 ]; then
XZEXE=1
xz -V && lzma -V || XZEXE=0
if [ $XZEXE -eq 1 ]; then
$ECHO "Testing zstd xz and lzma support"
./datagen > tmp
$ZSTD --format=lzma -f tmp
$ZSTD --format=xz -f tmp
@ -452,6 +453,24 @@ if [ $LZMAMODE -eq 1 ]; then
$ZSTD -d -f -v tmp.xz
$ZSTD -d -f -v tmp.lzma
rm tmp*
$ECHO "Creating symlinks"
ln -s $ZSTD ./xz
ln -s $ZSTD ./unxz
ln -s $ZSTD ./lzma
ln -s $ZSTD ./unlzma
$ECHO "Testing xz and lzma symlinks"
./datagen > tmp
./xz tmp
xz -d tmp.xz
./lzma tmp
lzma -d tmp.lzma
$ECHO "Testing unxz and unlzma symlinks"
xz tmp
./xz -d tmp.xz
lzma tmp
./lzma -d tmp.lzma
rm xz unxz lzma unlzma
rm tmp*
else
$ECHO "xz binary not detected"
fi