From e600b5d0dab674e480e6538ad7eaffbdbb72f029 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 16 Oct 2017 17:18:43 -0700 Subject: [PATCH] [test] Exercise all codes in dictionary tables --- tests/Makefile | 2 +- tests/seqgen.c | 260 ++++++++++++++++++++++++++++++++++++++++++++ tests/seqgen.h | 58 ++++++++++ tests/zstreamtest.c | 131 +++++++++++++++++++--- 4 files changed, 436 insertions(+), 15 deletions(-) create mode 100644 tests/seqgen.c create mode 100644 tests/seqgen.h diff --git a/tests/Makefile b/tests/Makefile index 651833bb..eb8bb3dd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -130,7 +130,7 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c $(MAKE) -C $(ZSTDDIR) libzstd $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) -ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c +ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c seqgen.c zstreamtest.c zstreamtest : CPPFLAGS += $(MULTITHREAD_CPP) zstreamtest : LDFLAGS += $(MULTITHREAD_LD) zstreamtest : $(ZSTREAMFILES) diff --git a/tests/seqgen.c b/tests/seqgen.c new file mode 100644 index 00000000..8233ecee --- /dev/null +++ b/tests/seqgen.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "seqgen.h" +#include "mem.h" +#include + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +static const size_t kMatchBytes = 128; + +#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static BYTE SEQ_randByte(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = SEQ_rotl32(rand32, 13); + *src = rand32; + return (BYTE)(rand32 >> 5); +} + +SEQ_stream SEQ_initStream(unsigned seed) +{ + SEQ_stream stream; + stream.state = 0; + XXH64_reset(&stream.xxh, 0); + stream.seed = seed; + return stream; +} + +/* Generates a single guard byte, then match length + 1 of a different byte, + * then another guard byte. + */ +static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value, + SEQ_outBuffer* out) +{ + typedef enum { + ml_first_byte = 0, + ml_match_bytes, + ml_last_byte, + } ml_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((ml_state)stream->state) { + case ml_first_byte: + /* Generate a single byte and pick a different byte for the match */ + if (op >= oend) { + stream->bytesLeft = 1; + break; + } + *op = SEQ_randByte(&stream->seed) & 0xFF; + do { + stream->saved = SEQ_randByte(&stream->seed) & 0xFF; + } while (*op == stream->saved); + ++op; + /* State transition */ + stream->state = ml_match_bytes; + stream->bytesLeft = value + 1; + /* fall-through */ + case ml_match_bytes: { + /* Copy matchLength + 1 bytes to the output buffer */ + size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); + if (setLength > 0) { + memset(op, stream->saved, setLength); + op += setLength; + stream->bytesLeft -= setLength; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ml_last_byte; + } + /* fall-through */ + case ml_last_byte: + /* Generate a single byte and pick a different byte for the match */ + if (op >= oend) { + stream->bytesLeft = 1; + break; + } + do { + *op = SEQ_randByte(&stream->seed) & 0xFF; + } while (*op == stream->saved); + ++op; + /* State transition */ + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Saves the current seed then generates kMatchBytes random bytes >= 128. + * Generates literal length - kMatchBytes random bytes < 128. + * Generates another kMatchBytes using the saved seed to generate a match. + * This way the match is easy to find for the compressors. + */ +static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) +{ + typedef enum { + ll_start = 0, + ll_run_bytes, + ll_literals, + ll_run_match, + } ll_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((ll_state)stream->state) { + case ll_start: + stream->state = ll_run_bytes; + stream->saved = stream->seed; + stream->bytesLeft = MIN(kMatchBytes, value); + /* fall-through */ + case ll_run_bytes: + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ll_literals; + stream->bytesLeft = value - MIN(kMatchBytes, value); + /* fall-through */ + case ll_literals: + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) & 0x7F; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = ll_run_match; + stream->bytesLeft = MIN(kMatchBytes, value); + /* fall-through */ + case ll_run_match: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->saved) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + } + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Saves the current seed then generates kMatchBytes random bytes >= 128. + * Generates offset - kMatchBytes of zeros to get a large offset without + * polluting the hash tables. + * Generates another kMatchBytes using the saved seed to generate a with the + * required offset. + */ +static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) +{ + typedef enum { + of_start = 0, + of_run_bytes, + of_offset, + of_run_match, + } of_state; + BYTE* const ostart = (BYTE*)out->dst; + BYTE* const oend = ostart + out->size; + BYTE* op = ostart + out->pos; + + switch ((of_state)stream->state) { + case of_start: + stream->state = of_run_bytes; + stream->saved = stream->seed; + stream->bytesLeft = MIN(value, kMatchBytes); + /* fall-through */ + case of_run_bytes: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->seed) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = of_offset; + stream->bytesLeft = value - MIN(value, kMatchBytes); + } + /* fall-through */ + case of_offset: { + /* Copy matchLength + 1 bytes to the output buffer */ + size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); + if (setLength > 0) { + memset(op, 0, setLength); + op += setLength; + stream->bytesLeft -= setLength; + } + if (stream->bytesLeft > 0) + break; + /* State transition */ + stream->state = of_run_match; + stream->bytesLeft = MIN(value, kMatchBytes); + } + /* fall-through */ + case of_run_match: { + while (stream->bytesLeft > 0 && op < oend) { + *op++ = SEQ_randByte(&stream->saved) | 0x80; + --stream->bytesLeft; + } + if (stream->bytesLeft > 0) + break; + } + /* fall-through */ + default: + stream->state = 0; + stream->bytesLeft = 0; + break; + } + XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); + out->pos = op - ostart; + return stream->bytesLeft; +} + +/* Returns the number of bytes left to generate. + * Must pass the same type/value until it returns 0. + */ +size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out) +{ + switch (type) { + case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out); + case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out); + case SEQ_gen_of: return SEQ_gen_offset(stream, value, out); + case SEQ_gen_max: /* fall-through */ + default: return 0; + } +} + +/* Returns the xxhash of the data produced so far */ +XXH64_hash_t SEQ_digest(SEQ_stream const* stream) +{ + return XXH64_digest(&stream->xxh); +} diff --git a/tests/seqgen.h b/tests/seqgen.h new file mode 100644 index 00000000..72d79884 --- /dev/null +++ b/tests/seqgen.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef SEQGEN_H +#define SEQGEN_H + +#define XXH_STATIC_LINKING_ONLY + +#include "xxhash.h" +#include /* size_t */ + +typedef enum { + SEQ_gen_ml = 0, + SEQ_gen_ll, + SEQ_gen_of, + SEQ_gen_max /* Must be the last value */ +} SEQ_gen_type; + +/* Internal state, do not use */ +typedef struct { + XXH64_state_t xxh; /* xxh state for all the data produced so far (seed=0) */ + unsigned seed; + int state; /* enum to control state machine (clean=0) */ + unsigned saved; + size_t bytesLeft; +} SEQ_stream; + +SEQ_stream SEQ_initStream(unsigned seed); + +typedef struct { + void* dst; + size_t size; + size_t pos; +} SEQ_outBuffer; + +/* Returns non-zero until the current type/value has been generated. + * Must pass the same type/value until it returns 0. + * + * Recommended to pick a value in the middle of the range you want, since there + * may be some noise that causes actual results to be slightly different. + * We try to be more accurate for smaller values. + * + * NOTE: Very small values don't work well (< 6). + */ +size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, + SEQ_outBuffer* out); + +/* Returns the xxhash of the data produced so far */ +XXH64_hash_t SEQ_digest(SEQ_stream const* stream); + +#endif /* SEQGEN_H */ diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 8b4c8369..1b42bd5d 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -36,6 +36,7 @@ #include "datagen.h" /* RDG_genBuffer */ #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #include "xxhash.h" /* XXH64_* */ +#include "seqgen.h" /*-************************************ @@ -96,15 +97,21 @@ unsigned int FUZ_rand(unsigned int* seedPtr) return rand32 >> 5; } -#define CHECK_Z(f) { \ - size_t const err = f; \ - if (ZSTD_isError(err)) { \ - DISPLAY("Error => %s : %s ", \ - #f, ZSTD_getErrorName(err)); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u, line %u) \n", \ + seed, testNb, __LINE__); \ goto _output_error; \ } } +#define CHECK_Z(f) { \ + size_t const err = f; \ + CHECK(ZSTD_isError(err), "%s : %s ", \ + #f, ZSTD_getErrorName(err)); \ +} + /*====================================================== * Basic Unit tests @@ -144,6 +151,63 @@ static void FUZ_freeDictionary(buffer_t dict) free(dict.start); } +/* Round trips data and updates xxh with the decompressed data produced */ +static size_t SEQ_roundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + XXH64_state_t* xxh, void* data, size_t size, + ZSTD_EndDirective endOp) +{ + static BYTE compressed[1024]; + static BYTE uncompressed[1024]; + + ZSTD_inBuffer cin = {data, size, 0}; + size_t cret; + + do { + ZSTD_outBuffer cout = {compressed, sizeof(compressed), 0}; + ZSTD_inBuffer din = {compressed, 0, 0}; + ZSTD_outBuffer dout = {uncompressed, 0, 0}; + + cret = ZSTD_compress_generic(cctx, &cout, &cin, endOp); + if (ZSTD_isError(cret)) + return cret; + + din.size = cout.pos; + while (din.pos < din.size || (endOp == ZSTD_e_end && cret == 0)) { + size_t dret; + + dout.pos = 0; + dout.size = sizeof(uncompressed); + dret = ZSTD_decompressStream(dctx, &dout, &din); + if (ZSTD_isError(dret)) + return dret; + XXH64_update(xxh, dout.dst, dout.pos); + if (dret == 0) + break; + } + } while (cin.pos < cin.size || (endOp != ZSTD_e_continue && cret != 0)); + return 0; +} + +/* Generates some data and round trips it */ +static size_t SEQ_generateRoundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + XXH64_state_t* xxh, SEQ_stream* seq, + SEQ_gen_type type, unsigned value) +{ + static BYTE data[1024]; + size_t gen; + + do { + SEQ_outBuffer sout = {data, sizeof(data), 0}; + size_t ret; + gen = SEQ_gen(seq, type, value, &sout); + + ret = SEQ_roundTrip(cctx, dctx, xxh, sout.dst, sout.pos, ZSTD_e_continue); + if (ZSTD_isError(ret)) + return ret; + } while (gen != 0); + + return 0; +} static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem) { @@ -618,6 +682,53 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo if (r != 0) goto _output_error; } /* error, or some data not flushed */ DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : check dictionary FSE tables can represent every code : ", testNb++); + { unsigned const kMaxWindowLog = 24; + unsigned value; + ZSTD_compressionParameters cParams = ZSTD_getCParams(3, 1U << kMaxWindowLog, 1024); + ZSTD_CDict* cdict; + ZSTD_DDict* ddict; + SEQ_stream seq = SEQ_initStream(0x87654321); + SEQ_gen_type type; + XXH64_state_t xxh; + + XXH64_reset(&xxh, 0); + cParams.windowLog = kMaxWindowLog; + cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem); + ddict = ZSTD_createDDict(dictionary.start, dictionary.filled); + + if (!cdict || !ddict) goto _output_error; + + ZSTD_CCtx_reset(zc); + ZSTD_resetDStream(zd); + CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict)); + CHECK_Z(ZSTD_initDStream_usingDDict(zd, ddict)); + CHECK_Z(ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1U << kMaxWindowLog)); + /* Test all values < 300 */ + for (value = 0; value < 300; ++value) { + for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value)); + } + } + /* Test values 2^8 to 2^17 */ + for (value = (1 << 8); value < (1 << 17); value <<= 1) { + for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value)); + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value + (value >> 2))); + } + } + /* Test offset values up to the max window log */ + for (value = 8; value <= kMaxWindowLog; ++value) { + CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, SEQ_gen_of, (1U << value) - 1)); + } + + CHECK_Z(SEQ_roundTrip(zc, zd, &xxh, NULL, 0, ZSTD_e_end)); + CHECK(SEQ_digest(&seq) != XXH64_digest(&xxh), "SEQ XXH64 does not match"); + + ZSTD_freeCDict(cdict); + ZSTD_freeDDict(ddict); + } + DISPLAYLEVEL(3, "OK \n"); /* Overlen overwriting window data bug */ DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++); @@ -708,14 +819,6 @@ static U32 FUZ_randomClampedLength(U32* seed, U32 minVal, U32 maxVal) return (U32)((FUZ_rand(seed) % mod) + minVal); } -#define CHECK(cond, ...) { \ - if (cond) { \ - DISPLAY("Error => "); \ - DISPLAY(__VA_ARGS__); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ - goto _output_error; \ -} } - static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests) { U32 const maxSrcLog = bigTests ? 24 : 22;