[test] Exercise all codes in dictionary tables
This commit is contained in:
parent
b7977e348d
commit
e600b5d0da
@ -130,7 +130,7 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c
|
||||
$(MAKE) -C $(ZSTDDIR) libzstd
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
|
||||
ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c seqgen.c zstreamtest.c
|
||||
zstreamtest : CPPFLAGS += $(MULTITHREAD_CPP)
|
||||
zstreamtest : LDFLAGS += $(MULTITHREAD_LD)
|
||||
zstreamtest : $(ZSTREAMFILES)
|
||||
|
260
tests/seqgen.c
Normal file
260
tests/seqgen.c
Normal file
@ -0,0 +1,260 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include "seqgen.h"
|
||||
#include "mem.h"
|
||||
#include <string.h>
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static const size_t kMatchBytes = 128;
|
||||
|
||||
#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static BYTE SEQ_randByte(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = SEQ_rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return (BYTE)(rand32 >> 5);
|
||||
}
|
||||
|
||||
SEQ_stream SEQ_initStream(unsigned seed)
|
||||
{
|
||||
SEQ_stream stream;
|
||||
stream.state = 0;
|
||||
XXH64_reset(&stream.xxh, 0);
|
||||
stream.seed = seed;
|
||||
return stream;
|
||||
}
|
||||
|
||||
/* Generates a single guard byte, then match length + 1 of a different byte,
|
||||
* then another guard byte.
|
||||
*/
|
||||
static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
|
||||
SEQ_outBuffer* out)
|
||||
{
|
||||
typedef enum {
|
||||
ml_first_byte = 0,
|
||||
ml_match_bytes,
|
||||
ml_last_byte,
|
||||
} ml_state;
|
||||
BYTE* const ostart = (BYTE*)out->dst;
|
||||
BYTE* const oend = ostart + out->size;
|
||||
BYTE* op = ostart + out->pos;
|
||||
|
||||
switch ((ml_state)stream->state) {
|
||||
case ml_first_byte:
|
||||
/* Generate a single byte and pick a different byte for the match */
|
||||
if (op >= oend) {
|
||||
stream->bytesLeft = 1;
|
||||
break;
|
||||
}
|
||||
*op = SEQ_randByte(&stream->seed) & 0xFF;
|
||||
do {
|
||||
stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
|
||||
} while (*op == stream->saved);
|
||||
++op;
|
||||
/* State transition */
|
||||
stream->state = ml_match_bytes;
|
||||
stream->bytesLeft = value + 1;
|
||||
/* fall-through */
|
||||
case ml_match_bytes: {
|
||||
/* Copy matchLength + 1 bytes to the output buffer */
|
||||
size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
|
||||
if (setLength > 0) {
|
||||
memset(op, stream->saved, setLength);
|
||||
op += setLength;
|
||||
stream->bytesLeft -= setLength;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
/* State transition */
|
||||
stream->state = ml_last_byte;
|
||||
}
|
||||
/* fall-through */
|
||||
case ml_last_byte:
|
||||
/* Generate a single byte and pick a different byte for the match */
|
||||
if (op >= oend) {
|
||||
stream->bytesLeft = 1;
|
||||
break;
|
||||
}
|
||||
do {
|
||||
*op = SEQ_randByte(&stream->seed) & 0xFF;
|
||||
} while (*op == stream->saved);
|
||||
++op;
|
||||
/* State transition */
|
||||
/* fall-through */
|
||||
default:
|
||||
stream->state = 0;
|
||||
stream->bytesLeft = 0;
|
||||
break;
|
||||
}
|
||||
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
||||
out->pos = op - ostart;
|
||||
return stream->bytesLeft;
|
||||
}
|
||||
|
||||
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
|
||||
* Generates literal length - kMatchBytes random bytes < 128.
|
||||
* Generates another kMatchBytes using the saved seed to generate a match.
|
||||
* This way the match is easy to find for the compressors.
|
||||
*/
|
||||
static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
|
||||
{
|
||||
typedef enum {
|
||||
ll_start = 0,
|
||||
ll_run_bytes,
|
||||
ll_literals,
|
||||
ll_run_match,
|
||||
} ll_state;
|
||||
BYTE* const ostart = (BYTE*)out->dst;
|
||||
BYTE* const oend = ostart + out->size;
|
||||
BYTE* op = ostart + out->pos;
|
||||
|
||||
switch ((ll_state)stream->state) {
|
||||
case ll_start:
|
||||
stream->state = ll_run_bytes;
|
||||
stream->saved = stream->seed;
|
||||
stream->bytesLeft = MIN(kMatchBytes, value);
|
||||
/* fall-through */
|
||||
case ll_run_bytes:
|
||||
while (stream->bytesLeft > 0 && op < oend) {
|
||||
*op++ = SEQ_randByte(&stream->seed) | 0x80;
|
||||
--stream->bytesLeft;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
/* State transition */
|
||||
stream->state = ll_literals;
|
||||
stream->bytesLeft = value - MIN(kMatchBytes, value);
|
||||
/* fall-through */
|
||||
case ll_literals:
|
||||
while (stream->bytesLeft > 0 && op < oend) {
|
||||
*op++ = SEQ_randByte(&stream->seed) & 0x7F;
|
||||
--stream->bytesLeft;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
/* State transition */
|
||||
stream->state = ll_run_match;
|
||||
stream->bytesLeft = MIN(kMatchBytes, value);
|
||||
/* fall-through */
|
||||
case ll_run_match: {
|
||||
while (stream->bytesLeft > 0 && op < oend) {
|
||||
*op++ = SEQ_randByte(&stream->saved) | 0x80;
|
||||
--stream->bytesLeft;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
}
|
||||
/* fall-through */
|
||||
default:
|
||||
stream->state = 0;
|
||||
stream->bytesLeft = 0;
|
||||
break;
|
||||
}
|
||||
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
||||
out->pos = op - ostart;
|
||||
return stream->bytesLeft;
|
||||
}
|
||||
|
||||
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
|
||||
* Generates offset - kMatchBytes of zeros to get a large offset without
|
||||
* polluting the hash tables.
|
||||
* Generates another kMatchBytes using the saved seed to generate a with the
|
||||
* required offset.
|
||||
*/
|
||||
static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
|
||||
{
|
||||
typedef enum {
|
||||
of_start = 0,
|
||||
of_run_bytes,
|
||||
of_offset,
|
||||
of_run_match,
|
||||
} of_state;
|
||||
BYTE* const ostart = (BYTE*)out->dst;
|
||||
BYTE* const oend = ostart + out->size;
|
||||
BYTE* op = ostart + out->pos;
|
||||
|
||||
switch ((of_state)stream->state) {
|
||||
case of_start:
|
||||
stream->state = of_run_bytes;
|
||||
stream->saved = stream->seed;
|
||||
stream->bytesLeft = MIN(value, kMatchBytes);
|
||||
/* fall-through */
|
||||
case of_run_bytes: {
|
||||
while (stream->bytesLeft > 0 && op < oend) {
|
||||
*op++ = SEQ_randByte(&stream->seed) | 0x80;
|
||||
--stream->bytesLeft;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
/* State transition */
|
||||
stream->state = of_offset;
|
||||
stream->bytesLeft = value - MIN(value, kMatchBytes);
|
||||
}
|
||||
/* fall-through */
|
||||
case of_offset: {
|
||||
/* Copy matchLength + 1 bytes to the output buffer */
|
||||
size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
|
||||
if (setLength > 0) {
|
||||
memset(op, 0, setLength);
|
||||
op += setLength;
|
||||
stream->bytesLeft -= setLength;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
/* State transition */
|
||||
stream->state = of_run_match;
|
||||
stream->bytesLeft = MIN(value, kMatchBytes);
|
||||
}
|
||||
/* fall-through */
|
||||
case of_run_match: {
|
||||
while (stream->bytesLeft > 0 && op < oend) {
|
||||
*op++ = SEQ_randByte(&stream->saved) | 0x80;
|
||||
--stream->bytesLeft;
|
||||
}
|
||||
if (stream->bytesLeft > 0)
|
||||
break;
|
||||
}
|
||||
/* fall-through */
|
||||
default:
|
||||
stream->state = 0;
|
||||
stream->bytesLeft = 0;
|
||||
break;
|
||||
}
|
||||
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
||||
out->pos = op - ostart;
|
||||
return stream->bytesLeft;
|
||||
}
|
||||
|
||||
/* Returns the number of bytes left to generate.
|
||||
* Must pass the same type/value until it returns 0.
|
||||
*/
|
||||
size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
|
||||
{
|
||||
switch (type) {
|
||||
case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
|
||||
case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
|
||||
case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
|
||||
case SEQ_gen_max: /* fall-through */
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns the xxhash of the data produced so far */
|
||||
XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
|
||||
{
|
||||
return XXH64_digest(&stream->xxh);
|
||||
}
|
58
tests/seqgen.h
Normal file
58
tests/seqgen.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef SEQGEN_H
|
||||
#define SEQGEN_H
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
|
||||
#include "xxhash.h"
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
typedef enum {
|
||||
SEQ_gen_ml = 0,
|
||||
SEQ_gen_ll,
|
||||
SEQ_gen_of,
|
||||
SEQ_gen_max /* Must be the last value */
|
||||
} SEQ_gen_type;
|
||||
|
||||
/* Internal state, do not use */
|
||||
typedef struct {
|
||||
XXH64_state_t xxh; /* xxh state for all the data produced so far (seed=0) */
|
||||
unsigned seed;
|
||||
int state; /* enum to control state machine (clean=0) */
|
||||
unsigned saved;
|
||||
size_t bytesLeft;
|
||||
} SEQ_stream;
|
||||
|
||||
SEQ_stream SEQ_initStream(unsigned seed);
|
||||
|
||||
typedef struct {
|
||||
void* dst;
|
||||
size_t size;
|
||||
size_t pos;
|
||||
} SEQ_outBuffer;
|
||||
|
||||
/* Returns non-zero until the current type/value has been generated.
|
||||
* Must pass the same type/value until it returns 0.
|
||||
*
|
||||
* Recommended to pick a value in the middle of the range you want, since there
|
||||
* may be some noise that causes actual results to be slightly different.
|
||||
* We try to be more accurate for smaller values.
|
||||
*
|
||||
* NOTE: Very small values don't work well (< 6).
|
||||
*/
|
||||
size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value,
|
||||
SEQ_outBuffer* out);
|
||||
|
||||
/* Returns the xxhash of the data produced so far */
|
||||
XXH64_hash_t SEQ_digest(SEQ_stream const* stream);
|
||||
|
||||
#endif /* SEQGEN_H */
|
@ -36,6 +36,7 @@
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
#include "xxhash.h" /* XXH64_* */
|
||||
#include "seqgen.h"
|
||||
|
||||
|
||||
/*-************************************
|
||||
@ -96,15 +97,21 @@ unsigned int FUZ_rand(unsigned int* seedPtr)
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
#define CHECK_Z(f) { \
|
||||
size_t const err = f; \
|
||||
if (ZSTD_isError(err)) { \
|
||||
DISPLAY("Error => %s : %s ", \
|
||||
#f, ZSTD_getErrorName(err)); \
|
||||
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
|
||||
#define CHECK(cond, ...) { \
|
||||
if (cond) { \
|
||||
DISPLAY("Error => "); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY(" (seed %u, test nb %u, line %u) \n", \
|
||||
seed, testNb, __LINE__); \
|
||||
goto _output_error; \
|
||||
} }
|
||||
|
||||
#define CHECK_Z(f) { \
|
||||
size_t const err = f; \
|
||||
CHECK(ZSTD_isError(err), "%s : %s ", \
|
||||
#f, ZSTD_getErrorName(err)); \
|
||||
}
|
||||
|
||||
|
||||
/*======================================================
|
||||
* Basic Unit tests
|
||||
@ -144,6 +151,63 @@ static void FUZ_freeDictionary(buffer_t dict)
|
||||
free(dict.start);
|
||||
}
|
||||
|
||||
/* Round trips data and updates xxh with the decompressed data produced */
|
||||
static size_t SEQ_roundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
|
||||
XXH64_state_t* xxh, void* data, size_t size,
|
||||
ZSTD_EndDirective endOp)
|
||||
{
|
||||
static BYTE compressed[1024];
|
||||
static BYTE uncompressed[1024];
|
||||
|
||||
ZSTD_inBuffer cin = {data, size, 0};
|
||||
size_t cret;
|
||||
|
||||
do {
|
||||
ZSTD_outBuffer cout = {compressed, sizeof(compressed), 0};
|
||||
ZSTD_inBuffer din = {compressed, 0, 0};
|
||||
ZSTD_outBuffer dout = {uncompressed, 0, 0};
|
||||
|
||||
cret = ZSTD_compress_generic(cctx, &cout, &cin, endOp);
|
||||
if (ZSTD_isError(cret))
|
||||
return cret;
|
||||
|
||||
din.size = cout.pos;
|
||||
while (din.pos < din.size || (endOp == ZSTD_e_end && cret == 0)) {
|
||||
size_t dret;
|
||||
|
||||
dout.pos = 0;
|
||||
dout.size = sizeof(uncompressed);
|
||||
dret = ZSTD_decompressStream(dctx, &dout, &din);
|
||||
if (ZSTD_isError(dret))
|
||||
return dret;
|
||||
XXH64_update(xxh, dout.dst, dout.pos);
|
||||
if (dret == 0)
|
||||
break;
|
||||
}
|
||||
} while (cin.pos < cin.size || (endOp != ZSTD_e_continue && cret != 0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Generates some data and round trips it */
|
||||
static size_t SEQ_generateRoundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
|
||||
XXH64_state_t* xxh, SEQ_stream* seq,
|
||||
SEQ_gen_type type, unsigned value)
|
||||
{
|
||||
static BYTE data[1024];
|
||||
size_t gen;
|
||||
|
||||
do {
|
||||
SEQ_outBuffer sout = {data, sizeof(data), 0};
|
||||
size_t ret;
|
||||
gen = SEQ_gen(seq, type, value, &sout);
|
||||
|
||||
ret = SEQ_roundTrip(cctx, dctx, xxh, sout.dst, sout.pos, ZSTD_e_continue);
|
||||
if (ZSTD_isError(ret))
|
||||
return ret;
|
||||
} while (gen != 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
|
||||
{
|
||||
@ -618,6 +682,53 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
if (r != 0) goto _output_error; } /* error, or some data not flushed */
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check dictionary FSE tables can represent every code : ", testNb++);
|
||||
{ unsigned const kMaxWindowLog = 24;
|
||||
unsigned value;
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(3, 1U << kMaxWindowLog, 1024);
|
||||
ZSTD_CDict* cdict;
|
||||
ZSTD_DDict* ddict;
|
||||
SEQ_stream seq = SEQ_initStream(0x87654321);
|
||||
SEQ_gen_type type;
|
||||
XXH64_state_t xxh;
|
||||
|
||||
XXH64_reset(&xxh, 0);
|
||||
cParams.windowLog = kMaxWindowLog;
|
||||
cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem);
|
||||
ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
|
||||
|
||||
if (!cdict || !ddict) goto _output_error;
|
||||
|
||||
ZSTD_CCtx_reset(zc);
|
||||
ZSTD_resetDStream(zd);
|
||||
CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict));
|
||||
CHECK_Z(ZSTD_initDStream_usingDDict(zd, ddict));
|
||||
CHECK_Z(ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1U << kMaxWindowLog));
|
||||
/* Test all values < 300 */
|
||||
for (value = 0; value < 300; ++value) {
|
||||
for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) {
|
||||
CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value));
|
||||
}
|
||||
}
|
||||
/* Test values 2^8 to 2^17 */
|
||||
for (value = (1 << 8); value < (1 << 17); value <<= 1) {
|
||||
for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) {
|
||||
CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value));
|
||||
CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value + (value >> 2)));
|
||||
}
|
||||
}
|
||||
/* Test offset values up to the max window log */
|
||||
for (value = 8; value <= kMaxWindowLog; ++value) {
|
||||
CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, SEQ_gen_of, (1U << value) - 1));
|
||||
}
|
||||
|
||||
CHECK_Z(SEQ_roundTrip(zc, zd, &xxh, NULL, 0, ZSTD_e_end));
|
||||
CHECK(SEQ_digest(&seq) != XXH64_digest(&xxh), "SEQ XXH64 does not match");
|
||||
|
||||
ZSTD_freeCDict(cdict);
|
||||
ZSTD_freeDDict(ddict);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
/* Overlen overwriting window data bug */
|
||||
DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
|
||||
@ -708,14 +819,6 @@ static U32 FUZ_randomClampedLength(U32* seed, U32 minVal, U32 maxVal)
|
||||
return (U32)((FUZ_rand(seed) % mod) + minVal);
|
||||
}
|
||||
|
||||
#define CHECK(cond, ...) { \
|
||||
if (cond) { \
|
||||
DISPLAY("Error => "); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
|
||||
goto _output_error; \
|
||||
} }
|
||||
|
||||
static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests)
|
||||
{
|
||||
U32 const maxSrcLog = bigTests ? 24 : 22;
|
||||
|
Loading…
Reference in New Issue
Block a user