zstd/tests/fuzzer.c

2689 lines
128 KiB
C
Raw Normal View History

/*
* Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
2016-08-30 17:04:33 +00:00
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
2016-08-30 17:04:33 +00:00
*/
2015-01-24 00:58:16 +00:00
2016-03-19 12:21:08 +00:00
/*-************************************
2015-01-24 00:58:16 +00:00
* Compiler specific
**************************************/
#ifdef _MSC_VER /* Visual Studio */
# define _CRT_SECURE_NO_WARNINGS /* fgets */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
2015-01-24 00:58:16 +00:00
#endif
2016-03-19 12:21:08 +00:00
/*-************************************
2015-01-24 00:58:16 +00:00
* Includes
**************************************/
#include <stdlib.h> /* free */
#include <stdio.h> /* fgets, sscanf */
#include <string.h> /* strcmp */
#include <assert.h>
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
2018-07-30 19:57:11 +00:00
#include "fse.h"
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd_errors.h" /* ZSTD_getErrorCode */
#include "zstdmt_compress.h"
#define ZDICT_STATIC_LINKING_ONLY
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
2015-10-21 13:39:26 +00:00
#include "mem.h"
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64 */
2017-11-30 03:11:12 +00:00
#include "util.h"
#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
2015-01-24 00:58:16 +00:00
2016-03-19 12:21:08 +00:00
/*-************************************
* Constants
2015-01-24 00:58:16 +00:00
**************************************/
#define KB *(1U<<10)
#define MB *(1U<<20)
#define GB *(1U<<30)
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
static const int FUZ_compressibility_default = 50;
static const int nbTestsDefault = 30000;
2015-01-24 00:58:16 +00:00
2016-03-19 12:21:08 +00:00
/*-************************************
2015-01-24 00:58:16 +00:00
* Display Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
2015-01-24 00:58:16 +00:00
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static U32 g_displayLevel = 2;
2017-11-30 03:11:12 +00:00
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
2015-12-04 16:25:26 +00:00
#define DISPLAYUPDATE(l, ...) \
if (g_displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stderr); } \
}
2015-01-24 00:58:16 +00:00
/*-*******************************************************
* Compile time test
*********************************************************/
2017-11-30 03:11:12 +00:00
#undef MIN
#undef MAX
/* Declaring the function, to avoid -Wmissing-prototype */
2018-09-27 22:13:43 +00:00
void FUZ_bug976(void);
void FUZ_bug976(void)
{ /* these constants shall not depend on MIN() macro */
assert(ZSTD_HASHLOG_MAX < 31);
assert(ZSTD_CHAINLOG_MAX < 31);
}
/*-*******************************************************
* Internal functions
*********************************************************/
#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))
2015-11-19 11:02:28 +00:00
2016-05-29 23:56:08 +00:00
#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
static U32 FUZ_rand(U32* src)
2015-01-24 00:58:16 +00:00
{
2016-03-19 12:21:08 +00:00
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
2015-01-24 00:58:16 +00:00
U32 rand32 = *src;
rand32 *= prime1;
rand32 += prime2;
rand32 = FUZ_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
static U32 FUZ_highbit32(U32 v32)
2015-01-24 00:58:16 +00:00
{
unsigned nbBits = 0;
if (v32==0) return 0;
2016-05-29 23:56:08 +00:00
while (v32) v32 >>= 1, nbBits++;
2015-01-24 00:58:16 +00:00
return nbBits;
}
/*=============================================
* Test macros
=============================================*/
#define CHECK_Z(f) { \
size_t const err = f; \
if (ZSTD_isError(err)) { \
DISPLAY("Error => %s : %s ", \
#f, ZSTD_getErrorName(err)); \
exit(1); \
} }
#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); goto _output_error; }
#define CHECK_NEWV(var, fn) size_t const CHECK_VAR(var, fn)
#define CHECK(fn) { CHECK_NEWV(err, fn); }
#define CHECKPLUS(var, fn, more) { CHECK_NEWV(var, fn); more; }
2019-02-19 19:07:52 +00:00
#define CHECK_OP(op, lhs, rhs) { \
if (!((lhs) op (rhs))) { \
DISPLAY("Error L%u => FAILED %s %s %s ", __LINE__, #lhs, #op, #rhs); \
goto _output_error; \
} \
}
2019-02-19 19:07:52 +00:00
#define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs)
#define CHECK_LT(lhs, rhs) CHECK_OP(<, lhs, rhs)
2016-08-22 22:30:31 +00:00
/*=============================================
2017-07-10 20:48:41 +00:00
* Memory Tests
=============================================*/
#if defined(__APPLE__) && defined(__MACH__)
#include <malloc/malloc.h> /* malloc_size */
typedef struct {
unsigned long long totalMalloc;
size_t currentMalloc;
2017-07-10 20:48:41 +00:00
size_t peakMalloc;
unsigned nbMalloc;
unsigned nbFree;
} mallocCounter_t;
static const mallocCounter_t INIT_MALLOC_COUNTER = { 0, 0, 0, 0, 0 };
2017-07-10 20:48:41 +00:00
static void* FUZ_mallocDebug(void* counter, size_t size)
{
mallocCounter_t* const mcPtr = (mallocCounter_t*)counter;
void* const ptr = malloc(size);
if (ptr==NULL) return NULL;
DISPLAYLEVEL(4, "allocating %u KB => effectively %u KB \n",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)(size >> 10), (unsigned)(malloc_size(ptr) >> 10)); /* OS-X specific */
2017-07-10 20:48:41 +00:00
mcPtr->totalMalloc += size;
mcPtr->currentMalloc += size;
if (mcPtr->currentMalloc > mcPtr->peakMalloc)
mcPtr->peakMalloc = mcPtr->currentMalloc;
2017-07-10 20:48:41 +00:00
mcPtr->nbMalloc += 1;
return ptr;
}
static void FUZ_freeDebug(void* counter, void* address)
{
mallocCounter_t* const mcPtr = (mallocCounter_t*)counter;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(4, "freeing %u KB \n", (unsigned)(malloc_size(address) >> 10));
2017-07-10 20:48:41 +00:00
mcPtr->nbFree += 1;
mcPtr->currentMalloc -= malloc_size(address); /* OS-X specific */
free(address);
2017-07-10 20:48:41 +00:00
}
static void FUZ_displayMallocStats(mallocCounter_t count)
{
DISPLAYLEVEL(3, "peak:%6u KB, nbMallocs:%2u, total:%6u KB \n",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)(count.peakMalloc >> 10),
2017-07-10 20:48:41 +00:00
count.nbMalloc,
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)(count.totalMalloc >> 10));
2017-07-10 20:48:41 +00:00
}
2018-08-14 23:56:07 +00:00
static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsigned part,
void* inBuffer, size_t inSize, void* outBuffer, size_t outSize)
2017-07-10 20:48:41 +00:00
{
/* test only played in verbose mode, as they are long */
if (g_displayLevel<3) return 0;
/* Create compressible noise */
if (!inBuffer || !outBuffer) {
DISPLAY("Not enough memory, aborting\n");
exit(1);
}
RDG_genBuffer(inBuffer, inSize, compressibility, 0. /*auto*/, seed);
/* simple compression tests */
if (part <= 1)
2017-07-10 20:48:41 +00:00
{ int compressionLevel;
2017-07-10 21:09:16 +00:00
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
mallocCounter_t malcount = INIT_MALLOC_COUNTER;
ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
2017-07-10 20:48:41 +00:00
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
CHECK_Z( ZSTD_compressCCtx(cctx, outBuffer, outSize, inBuffer, inSize, compressionLevel) );
2017-07-10 20:48:41 +00:00
ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compressCCtx level %i : ", compressionLevel);
FUZ_displayMallocStats(malcount);
} }
/* streaming compression tests */
if (part <= 2)
2017-07-10 20:48:41 +00:00
{ int compressionLevel;
2017-07-10 21:09:16 +00:00
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
mallocCounter_t malcount = INIT_MALLOC_COUNTER;
ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
2017-07-10 20:48:41 +00:00
ZSTD_CCtx* const cstream = ZSTD_createCStream_advanced(cMem);
ZSTD_outBuffer out = { outBuffer, outSize, 0 };
ZSTD_inBuffer in = { inBuffer, inSize, 0 };
CHECK_Z( ZSTD_initCStream(cstream, compressionLevel) );
CHECK_Z( ZSTD_compressStream(cstream, &out, &in) );
CHECK_Z( ZSTD_endStream(cstream, &out) );
2017-07-10 20:48:41 +00:00
ZSTD_freeCStream(cstream);
DISPLAYLEVEL(3, "compressStream level %i : ", compressionLevel);
FUZ_displayMallocStats(malcount);
} }
/* advanced MT API test */
if (part <= 3)
{ int nbThreads;
2017-07-10 21:09:16 +00:00
for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
mallocCounter_t malcount = INIT_MALLOC_COUNTER;
ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
2017-07-10 21:09:16 +00:00
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) );
CHECK_Z( ZSTD_compress2(cctx, outBuffer, outSize, inBuffer, inSize) );
2017-07-10 21:09:16 +00:00
ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compress_generic,-T%i,end level %i : ",
2017-07-10 21:09:16 +00:00
nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount);
} } }
2017-07-10 20:48:41 +00:00
/* advanced MT streaming API test */
if (part <= 4)
{ int nbThreads;
2017-07-10 21:09:16 +00:00
for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
mallocCounter_t malcount = INIT_MALLOC_COUNTER;
ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
2017-07-10 21:09:16 +00:00
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
ZSTD_outBuffer out = { outBuffer, outSize, 0 };
ZSTD_inBuffer in = { inBuffer, inSize, 0 };
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) );
CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
2017-07-10 21:09:16 +00:00
ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
2017-07-10 21:09:16 +00:00
nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount);
} } }
2017-07-10 20:48:41 +00:00
return 0;
}
2018-08-14 23:56:07 +00:00
static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
{
size_t const inSize = 64 MB + 16 MB + 4 MB + 1 MB + 256 KB + 64 KB; /* 85.3 MB */
size_t const outSize = ZSTD_compressBound(inSize);
void* const inBuffer = malloc(inSize);
void* const outBuffer = malloc(outSize);
int result;
/* Create compressible noise */
if (!inBuffer || !outBuffer) {
DISPLAY("Not enough memory, aborting \n");
exit(1);
}
result = FUZ_mallocTests_internal(seed, compressibility, part,
inBuffer, inSize, outBuffer, outSize);
free(inBuffer);
free(outBuffer);
return result;
}
2017-07-10 20:48:41 +00:00
#else
static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
2017-07-10 20:48:41 +00:00
{
(void)seed; (void)compressibility; (void)part;
2017-07-10 20:48:41 +00:00
return 0;
}
#endif
/*=============================================
* Unit tests
2016-08-22 22:30:31 +00:00
=============================================*/
static int basicUnitTests(U32 const seed, double compressibility)
2015-01-24 00:58:16 +00:00
{
size_t const CNBuffSize = 5 MB;
2016-05-30 13:10:09 +00:00
void* const CNBuffer = malloc(CNBuffSize);
2017-07-10 20:48:41 +00:00
size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize);
void* const compressedBuffer = malloc(compressedBufferSize);
2016-05-30 13:10:09 +00:00
void* const decodedBuffer = malloc(CNBuffSize);
2016-05-29 23:56:08 +00:00
int testResult = 0;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
unsigned testNb=0;
2016-05-29 23:56:08 +00:00
size_t cSize;
2015-01-24 00:58:16 +00:00
2016-05-29 23:56:08 +00:00
/* Create compressible noise */
2016-03-19 11:12:07 +00:00
if (!CNBuffer || !compressedBuffer || !decodedBuffer) {
2015-07-05 07:10:40 +00:00
DISPLAY("Not enough memory, aborting\n");
testResult = 1;
goto _end;
}
2016-05-30 13:10:09 +00:00
RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0., seed);
2015-01-24 00:58:16 +00:00
/* Basic tests */
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3u : ZSTD_getErrorName : ", testNb++);
{ const char* errorString = ZSTD_getErrorName(0);
DISPLAYLEVEL(3, "OK : %s \n", errorString);
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3u : ZSTD_getErrorName with wrong value : ", testNb++);
{ const char* errorString = ZSTD_getErrorName(499);
DISPLAYLEVEL(3, "OK : %s \n", errorString);
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3u : min compression level : ", testNb++);
{ int const mcl = ZSTD_minCLevel();
DISPLAYLEVEL(3, "%i (OK) \n", mcl);
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
2017-07-10 19:29:57 +00:00
if (cctx==NULL) goto _output_error;
CHECK_VAR(cSize, ZSTD_compressCCtx(cctx,
2017-07-10 20:48:41 +00:00
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, 1) );
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
2017-07-10 19:29:57 +00:00
DISPLAYLEVEL(3, "test%3i : size of cctx for level 1 : ", testNb++);
2017-07-10 19:29:57 +00:00
{ size_t const cctxSize = ZSTD_sizeof_CCtx(cctx);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cctxSize);
2017-07-10 19:29:57 +00:00
}
ZSTD_freeCCtx(cctx);
}
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : decompress skippable frame -8 size : ", testNb++);
{
char const skippable8[] = "\x50\x2a\x4d\x18\xf8\xff\xff\xff";
size_t const size = ZSTD_decompress(NULL, 0, skippable8, 8);
if (!ZSTD_isError(size)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_getFrameContentSize test : ", testNb++);
{ unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize);
if (rSize != CNBuffSize) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_findDecompressedSize test : ", testNb++);
{ unsigned long long const rSize = ZSTD_findDecompressedSize(compressedBuffer, cSize);
2016-07-07 11:14:21 +00:00
if (rSize != CNBuffSize) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
2016-07-07 11:14:21 +00:00
DISPLAYLEVEL(3, "test%3i : tight ZSTD_decompressBound test : ", testNb++);
{
2019-03-02 07:11:15 +00:00
unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize);
if (bound != CNBuffSize) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with invalid srcSize : ", testNb++);
{
unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize - 1);
if (bound != ZSTD_CONTENTSIZE_ERROR) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize);
2016-07-27 22:55:43 +00:00
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
2016-05-29 23:56:08 +00:00
{ size_t u;
2016-05-30 13:10:09 +00:00
for (u=0; u<CNBuffSize; u++) {
2019-08-16 01:17:06 +00:00
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
2016-05-29 23:56:08 +00:00
} }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : decompress with null dict : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
{ size_t const r = ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
NULL, 0);
if (r != CNBuffSize) goto _output_error;
}
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress with null DDict : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
{ size_t const r = ZSTD_decompress_usingDDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
NULL);
if (r != CNBuffSize) goto _output_error;
}
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress with 1 missing byte : ", testNb++);
2016-05-30 13:10:09 +00:00
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize-1);
2016-05-29 23:56:08 +00:00
if (!ZSTD_isError(r)) goto _output_error;
if (ZSTD_getErrorCode((size_t)r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : decompress with 1 too much byte : ", testNb++);
2016-05-30 13:10:09 +00:00
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize+1);
2016-05-29 23:56:08 +00:00
if (!ZSTD_isError(r)) goto _output_error;
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++);
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize);
if (!ZSTD_isError(r)) goto _output_error;
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2019-03-02 05:27:30 +00:00
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++);
{ /* create compressed buffer with content size missing */
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
2019-03-02 05:27:30 +00:00
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
CHECK_VAR(cSize, ZSTD_compress2(cctx,
2019-03-02 05:27:30 +00:00
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize) );
2019-03-02 05:27:30 +00:00
ZSTD_freeCCtx(cctx);
}
{ /* ensure frame content size is missing */
ZSTD_frameHeader zfh;
2019-03-02 07:11:15 +00:00
size_t const ret = ZSTD_getFrameHeader(&zfh, compressedBuffer, compressedBufferSize);
if (ret != 0 || zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error;
2019-03-02 05:27:30 +00:00
}
{ /* ensure CNBuffSize <= decompressBound */
2019-03-02 07:11:15 +00:00
unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, compressedBufferSize);
2019-03-02 05:27:30 +00:00
if (CNBuffSize > bound) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19);
if (ZSTD_isError(r)) goto _output_error;
if (ZSTD_sizeof_CCtx(cctx) > (1U << 20)) goto _output_error;
ZSTD_freeCCtx(cctx);
cSize = r;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : decompress empty frame into NULL : ", testNb++);
{ size_t const r = ZSTD_decompress(NULL, 0, compressedBuffer, cSize);
if (ZSTD_isError(r)) goto _output_error;
if (r != 0) goto _output_error;
}
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_outBuffer output;
if (cctx==NULL) goto _output_error;
output.dst = compressedBuffer;
output.size = compressedBufferSize;
output.pos = 0;
CHECK_Z( ZSTD_initCStream(cctx, 1) ); /* content size unknown */
CHECK_Z( ZSTD_flushStream(cctx, &output) ); /* ensure no possibility to "concatenate" and determine the content size */
CHECK_Z( ZSTD_endStream(cctx, &output) );
ZSTD_freeCCtx(cctx);
/* single scan decompression */
{ size_t const r = ZSTD_decompress(NULL, 0, compressedBuffer, output.pos);
if (ZSTD_isError(r)) goto _output_error;
if (r != 0) goto _output_error;
}
/* streaming decompression */
{ ZSTD_DCtx* const dstream = ZSTD_createDStream();
ZSTD_inBuffer dinput;
ZSTD_outBuffer doutput;
size_t ipos;
if (dstream==NULL) goto _output_error;
dinput.src = compressedBuffer;
dinput.size = 0;
dinput.pos = 0;
doutput.dst = NULL;
doutput.size = 0;
doutput.pos = 0;
CHECK_Z ( ZSTD_initDStream(dstream) );
for (ipos=1; ipos<=output.pos; ipos++) {
dinput.size = ipos;
CHECK_Z ( ZSTD_decompressStream(dstream, &doutput, &dinput) );
}
if (doutput.pos != 0) goto _output_error;
ZSTD_freeDStream(dstream);
}
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : re-use CCtx with expanding block size : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_parameters const params = ZSTD_getParams(1, ZSTD_CONTENTSIZE_UNKNOWN, 0);
assert(params.fParams.contentSizeFlag == 1); /* block size will be adapted if pledgedSrcSize is enabled */
CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, 1 /*pledgedSrcSize*/) );
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, 1) ); /* creates a block size of 1 */
CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */
{ size_t const inSize = 2* 128 KB;
size_t const outSize = ZSTD_compressBound(inSize);
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, outSize, CNBuffer, inSize) );
/* will fail if blockSize is not resized */
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : re-using a CCtx should compress the same : ", testNb++);
2018-12-19 22:10:27 +00:00
{ size_t const sampleSize = 30;
int i;
for (i=0; i<20; i++)
2018-07-18 14:10:23 +00:00
((char*)CNBuffer)[i] = (char)i; /* ensure no match during initial section */
memcpy((char*)CNBuffer + 20, CNBuffer, 10); /* create one match, starting from beginning of sample, which is the difficult case (see #1241) */
for (i=1; i<=19; i++) {
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t size1, size2;
DISPLAYLEVEL(5, "l%i ", i);
2018-12-19 22:10:27 +00:00
size1 = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize, i);
CHECK_Z(size1);
2018-12-19 22:10:27 +00:00
size2 = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize, i);
CHECK_Z(size2);
CHECK_EQ(size1, size2);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, i) );
size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, sampleSize);
CHECK_Z(size2);
CHECK_EQ(size1, size2);
size2 = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(sampleSize) - 1, CNBuffer, sampleSize); /* force streaming, as output buffer is not large enough to guarantee success */
CHECK_Z(size2);
CHECK_EQ(size1, size2);
2018-12-19 22:10:27 +00:00
{ ZSTD_inBuffer inb;
ZSTD_outBuffer outb;
inb.src = CNBuffer;
inb.pos = 0;
inb.size = sampleSize;
outb.dst = compressedBuffer;
outb.pos = 0;
outb.size = ZSTD_compressBound(sampleSize) - 1; /* force streaming, as output buffer is not large enough to guarantee success */
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) );
assert(inb.pos == inb.size);
CHECK_EQ(size1, outb.pos);
}
ZSTD_freeCCtx(cctx);
}
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : btultra2 & 1st block : ", testNb++);
{ size_t const sampleSize = 1024;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_inBuffer inb;
ZSTD_outBuffer outb;
inb.src = CNBuffer;
inb.pos = 0;
inb.size = 0;
outb.dst = compressedBuffer;
outb.pos = 0;
outb.size = compressedBufferSize;
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, ZSTD_maxCLevel()) );
inb.size = sampleSize; /* start with something, so that context is already used */
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) ); /* will break internal assert if stats_init is not disabled */
assert(inb.pos == inb.size);
outb.pos = 0; /* cancel output */
CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(cctx, sampleSize) );
inb.size = 4; /* too small size : compression will be skipped */
inb.pos = 0;
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) );
assert(inb.pos == inb.size);
inb.size += 5; /* too small size : compression will be skipped */
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) );
assert(inb.pos == inb.size);
inb.size += 11; /* small enough to attempt compression */
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_flush) );
assert(inb.pos == inb.size);
assert(inb.pos < sampleSize);
inb.size = sampleSize; /* large enough to trigger stats_init, but no longer at beginning */
CHECK_Z( ZSTD_compressStream2(cctx, &outb, &inb, ZSTD_e_end) ); /* will break internal assert if stats_init is not disabled */
assert(inb.pos == inb.size);
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : ZSTD_CCtx_getParameter() : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_outBuffer out = {NULL, 0, 0};
ZSTD_inBuffer in = {NULL, 0, 0};
int value;
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 3);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, 0);
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, ZSTD_HASHLOG_MIN));
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 3);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, ZSTD_HASHLOG_MIN);
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7));
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 7);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, ZSTD_HASHLOG_MIN);
/* Start a compression job */
ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 7);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, ZSTD_HASHLOG_MIN);
/* Reset the CCtx */
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 7);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, ZSTD_HASHLOG_MIN);
/* Reset the parameters */
ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_compressionLevel, &value));
CHECK_EQ(value, 3);
CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_hashLog, &value));
CHECK_EQ(value, 0);
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
2018-06-05 21:53:28 +00:00
/* this test is really too long, and should be made faster */
DISPLAYLEVEL(3, "test%3d : overflow protection with large windowLog : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_parameters params = ZSTD_getParams(-999, ZSTD_CONTENTSIZE_UNKNOWN, 0);
size_t const nbCompressions = ((1U << 31) / CNBuffSize) + 2; /* ensure U32 overflow protection is triggered */
size_t cnb;
assert(cctx != NULL);
params.fParams.contentSizeFlag = 0;
params.cParams.windowLog = ZSTD_WINDOWLOG_MAX;
for (cnb = 0; cnb < nbCompressions; ++cnb) {
DISPLAYLEVEL(6, "run %zu / %zu \n", cnb, nbCompressions);
CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) );
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
2018-06-05 21:53:28 +00:00
DISPLAYLEVEL(3, "test%3d : size down context : ", testNb++);
{ ZSTD_CCtx* const largeCCtx = ZSTD_createCCtx();
assert(largeCCtx != NULL);
CHECK_Z( ZSTD_compressBegin(largeCCtx, 19) ); /* streaming implies ZSTD_CONTENTSIZE_UNKNOWN, which maximizes memory usage */
CHECK_Z( ZSTD_compressEnd(largeCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1) );
{ size_t const largeCCtxSize = ZSTD_sizeof_CCtx(largeCCtx); /* size of context must be measured after compression */
{ ZSTD_CCtx* const smallCCtx = ZSTD_createCCtx();
assert(smallCCtx != NULL);
CHECK_Z(ZSTD_compressCCtx(smallCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1, 1));
{ size_t const smallCCtxSize = ZSTD_sizeof_CCtx(smallCCtx);
DISPLAYLEVEL(5, "(large) %zuKB > 32*%zuKB (small) : ",
largeCCtxSize>>10, smallCCtxSize>>10);
assert(largeCCtxSize > 32* smallCCtxSize); /* note : "too large" definition is handled within zstd_compress.c .
* make this test case extreme, so that it doesn't depend on a possibly fluctuating definition */
}
ZSTD_freeCCtx(smallCCtx);
}
{ U32 const maxNbAttempts = 1100; /* nb of usages before triggering size down is handled within zstd_compress.c.
* currently defined as 128x, but could be adjusted in the future.
* make this test long enough so that it's not too much tied to the current definition within zstd_compress.c */
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
unsigned u;
2018-06-05 21:53:28 +00:00
for (u=0; u<maxNbAttempts; u++) {
CHECK_Z(ZSTD_compressCCtx(largeCCtx, compressedBuffer, compressedBufferSize, CNBuffer, 1, 1));
2018-06-05 21:53:28 +00:00
if (ZSTD_sizeof_CCtx(largeCCtx) < largeCCtxSize) break; /* sized down */
}
DISPLAYLEVEL(5, "size down after %u attempts : ", u);
if (u==maxNbAttempts) goto _output_error; /* no sizedown happened */
}
}
ZSTD_freeCCtx(largeCCtx);
}
DISPLAYLEVEL(3, "OK \n");
/* Static CCtx tests */
#define STATIC_CCTX_LEVEL 3
DISPLAYLEVEL(3, "test%3i : create static CCtx for level %u :", testNb++, STATIC_CCTX_LEVEL);
2017-06-26 23:02:25 +00:00
{ size_t const staticCCtxSize = ZSTD_estimateCStreamSize(STATIC_CCTX_LEVEL);
2017-05-25 00:41:41 +00:00
void* const staticCCtxBuffer = malloc(staticCCtxSize);
size_t const staticDCtxSize = ZSTD_estimateDCtxSize();
void* const staticDCtxBuffer = malloc(staticDCtxSize);
if (staticCCtxBuffer==NULL || staticDCtxBuffer==NULL) {
free(staticCCtxBuffer);
free(staticDCtxBuffer);
DISPLAY("Not enough memory, aborting\n");
testResult = 1;
goto _end;
}
{ ZSTD_CCtx* staticCCtx = ZSTD_initStaticCCtx(staticCCtxBuffer, staticCCtxSize);
2017-05-25 00:41:41 +00:00
ZSTD_DCtx* staticDCtx = ZSTD_initStaticDCtx(staticDCtxBuffer, staticDCtxSize);
if ((staticCCtx==NULL) || (staticDCtx==NULL)) goto _output_error;
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CCtx for level %u : ", testNb++, STATIC_CCTX_LEVEL);
{ size_t const r = ZSTD_compressBegin(staticCCtx, STATIC_CCTX_LEVEL);
if (ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : simple compression test with static CCtx : ", testNb++);
CHECK_VAR(cSize, ZSTD_compressCCtx(staticCCtx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : simple decompression test with static DCtx : ", testNb++);
2017-05-25 00:41:41 +00:00
{ size_t const r = ZSTD_decompressDCtx(staticDCtx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize);
if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
2017-05-25 00:41:41 +00:00
{ size_t u;
for (u=0; u<CNBuffSize; u++) {
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u])
2019-08-16 01:17:06 +00:00
goto _output_error;
2017-05-25 00:41:41 +00:00
} }
DISPLAYLEVEL(3, "OK \n");
2017-05-25 00:41:41 +00:00
DISPLAYLEVEL(3, "test%3i : init CCtx for too large level (must fail) : ", testNb++);
{ size_t const r = ZSTD_compressBegin(staticCCtx, ZSTD_maxCLevel());
if (!ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
CHECK( ZSTD_compressBegin(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
CHECK( ZSTD_initCStream(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init static CStream with dictionary (should fail) : ", testNb++);
{ size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1);
if (!ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2017-05-25 00:41:41 +00:00
DISPLAYLEVEL(3, "test%3i : init DStream (should fail) : ", testNb++);
2017-05-25 00:41:41 +00:00
{ size_t const r = ZSTD_initDStream(staticDCtx);
if (ZSTD_isError(r)) goto _output_error; }
{ ZSTD_outBuffer output = { decodedBuffer, CNBuffSize, 0 };
ZSTD_inBuffer input = { compressedBuffer, ZSTD_FRAMEHEADERSIZE_MAX+1, 0 };
size_t const r = ZSTD_decompressStream(staticDCtx, &output, &input);
if (!ZSTD_isError(r)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
}
free(staticCCtxBuffer);
2017-05-25 00:41:41 +00:00
free(staticDCtxBuffer);
}
DISPLAYLEVEL(3, "test%3i : Static negative levels : ", testNb++);
{ size_t const cctxSizeN1 = ZSTD_estimateCCtxSize(-1);
size_t const cctxSizeP1 = ZSTD_estimateCCtxSize(1);
size_t const cstreamSizeN1 = ZSTD_estimateCStreamSize(-1);
size_t const cstreamSizeP1 = ZSTD_estimateCStreamSize(1);
if (!(0 < cctxSizeN1 && cctxSizeN1 <= cctxSizeP1)) goto _output_error;
if (!(0 < cstreamSizeN1 && cstreamSizeN1 <= cstreamSizeP1)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
/* ZSTDMT simple MT compression test */
DISPLAYLEVEL(3, "test%3i : create ZSTDMT CCtx : ", testNb++);
{ ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(2);
if (mtctx==NULL) {
DISPLAY("mtctx : not enough memory, aborting \n");
testResult = 1;
goto _end;
}
DISPLAYLEVEL(3, "OK \n");
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize);
CHECK_VAR(cSize, ZSTDMT_compressCCtx(mtctx,
2017-07-10 20:48:41 +00:00
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
1) );
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : decompressed size test : ", testNb++);
{ unsigned long long const rSize = ZSTD_getFrameContentSize(compressedBuffer, cSize);
if (rSize != CNBuffSize) {
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY("ZSTD_getFrameContentSize incorrect : %u != %u \n", (unsigned)rSize, (unsigned)CNBuffSize);
goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t u;
for (u=0; u<CNBuffSize; u++) {
2019-08-16 01:17:06 +00:00
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress -T2 with checksum : ", testNb++);
{ ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0);
params.fParams.checksumFlag = 1;
params.fParams.contentSizeFlag = 1;
CHECK_VAR(cSize, ZSTDMT_compress_advanced(mtctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
NULL, params, 3 /*overlapRLog*/) );
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
ZSTDMT_freeCCtx(mtctx);
}
2019-02-19 19:07:52 +00:00
DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
size_t cSize1, cSize2;
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) );
cSize1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK(cSize1);
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_lcm_uncompressed) );
cSize2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK(cSize2);
CHECK_LT(cSize1, cSize2);
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++)
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
/* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so
* ZSTDMT is forced to not take the shortcut.
*/
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) );
CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) );
ZSTD_freeCCtx(cctx);
2019-02-19 19:07:52 +00:00
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++)
{ ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
int value;
/* Check that the overlap log and job size are unset. */
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 0);
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 0);
/* Set and check the overlap log and job size. */
CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) );
CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, 2 MB) );
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 5);
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 2 MB);
Spelling (#1582) * spelling: accidentally * spelling: across * spelling: additionally * spelling: addresses * spelling: appropriate * spelling: assumed * spelling: available * spelling: builder * spelling: capacity * spelling: compiler * spelling: compressibility * spelling: compressor * spelling: compression * spelling: contract * spelling: convenience * spelling: decompress * spelling: description * spelling: deflate * spelling: deterministically * spelling: dictionary * spelling: display * spelling: eliminate * spelling: preemptively * spelling: exclude * spelling: failure * spelling: independence * spelling: independent * spelling: intentionally * spelling: matching * spelling: maximum * spelling: meaning * spelling: mishandled * spelling: memory * spelling: occasionally * spelling: occurrence * spelling: official * spelling: offsets * spelling: original * spelling: output * spelling: overflow * spelling: overridden * spelling: parameter * spelling: performance * spelling: probability * spelling: receives * spelling: redundant * spelling: recompression * spelling: resources * spelling: sanity * spelling: segment * spelling: series * spelling: specified * spelling: specify * spelling: subtracted * spelling: successful * spelling: return * spelling: translation * spelling: update * spelling: unrelated * spelling: useless * spelling: variables * spelling: variety * spelling: verbatim * spelling: verification * spelling: visited * spelling: warming * spelling: workers * spelling: with
2019-04-12 18:18:11 +00:00
/* Set the number of workers and check the overlap log and job size. */
CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) );
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 5);
CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
2019-02-19 19:07:52 +00:00
CHECK_EQ(value, 2 MB);
ZSTD_freeCCtxParams(params);
}
DISPLAYLEVEL(3, "OK \n");
/* Simple API multiframe test */
DISPLAYLEVEL(3, "test%3i : compress multiple frames : ", testNb++);
{ size_t off = 0;
int i;
int const segs = 4;
/* only use the first half so we don't push against size limit of compressedBuffer */
size_t const segSize = (CNBuffSize / 2) / segs;
for (i = 0; i < segs; i++) {
CHECK_NEWV(r, ZSTD_compress(
(BYTE*)compressedBuffer + off, CNBuffSize - off,
(BYTE*)CNBuffer + segSize * (size_t)i, segSize,
5) );
off += r;
if (i == segs/2) {
/* insert skippable frame */
2017-11-01 20:10:03 +00:00
const U32 skipLen = 129 KB;
MEM_writeLE32((BYTE*)compressedBuffer + off, ZSTD_MAGIC_SKIPPABLE_START);
MEM_writeLE32((BYTE*)compressedBuffer + off + 4, skipLen);
off += skipLen + ZSTD_SKIPPABLEHEADERSIZE;
}
}
cSize = off;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : get decompressed size of multiple frames : ", testNb++);
{ unsigned long long const r = ZSTD_findDecompressedSize(compressedBuffer, cSize);
if (r != CNBuffSize / 2) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : get tight decompressed bound of multiple frames : ", testNb++);
2019-03-02 07:11:15 +00:00
{ unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, cSize);
if (bound != CNBuffSize / 2) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress multiple frames : ", testNb++);
{ CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize));
if (r != CNBuffSize / 2) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
if (memcmp(decodedBuffer, CNBuffer, CNBuffSize / 2) != 0) goto _output_error;
DISPLAYLEVEL(3, "OK \n");
2016-04-12 17:13:08 +00:00
/* Dictionary and CCtx Duplication tests */
2016-05-29 23:56:08 +00:00
{ ZSTD_CCtx* const ctxOrig = ZSTD_createCCtx();
ZSTD_CCtx* const ctxDuplicated = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
2016-05-29 23:56:08 +00:00
static const size_t dictSize = 551;
assert(dctx != NULL); assert(ctxOrig != NULL); assert(ctxDuplicated != NULL);
2016-01-08 16:27:50 +00:00
DISPLAYLEVEL(3, "test%3i : copy context too soon : ", testNb++);
{ size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0);
2016-05-29 23:56:08 +00:00
if (!ZSTD_isError(copyResult)) goto _output_error; } /* error must be detected */
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : load dictionary into context : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECK( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) );
2017-02-09 20:27:32 +00:00
CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */
DISPLAYLEVEL(3, "OK \n");
2016-01-08 16:27:50 +00:00
DISPLAYLEVEL(3, "test%3i : compress with flat dictionary : ", testNb++);
2016-01-08 16:27:50 +00:00
cSize = 0;
CHECKPLUS(r, ZSTD_compressEnd(ctxOrig,
compressedBuffer, compressedBufferSize,
(const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
2016-05-29 23:56:08 +00:00
cSize += r);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
2016-01-08 16:27:50 +00:00
DISPLAYLEVEL(3, "test%3i : frame built with flat dictionary should be decompressible : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
2016-05-30 13:10:09 +00:00
decodedBuffer, CNBuffSize,
2016-05-29 23:56:08 +00:00
compressedBuffer, cSize,
CNBuffer, dictSize),
2016-05-30 13:10:09 +00:00
if (r != CNBuffSize - dictSize) goto _output_error);
DISPLAYLEVEL(3, "OK \n");
2016-01-08 16:27:50 +00:00
DISPLAYLEVEL(3, "test%3i : compress with duplicated context : ", testNb++);
2016-04-12 17:13:08 +00:00
{ size_t const cSizeOrig = cSize;
cSize = 0;
CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated,
compressedBuffer, compressedBufferSize,
(const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
2016-05-29 23:56:08 +00:00
cSize += r);
if (cSize != cSizeOrig) goto _output_error; /* should be identical ==> same size */
2016-04-12 17:13:08 +00:00
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
2016-01-08 16:27:50 +00:00
DISPLAYLEVEL(3, "test%3i : frame built with duplicated context should be decompressible : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
2016-05-30 13:10:09 +00:00
decodedBuffer, CNBuffSize,
2016-01-08 16:27:50 +00:00
compressedBuffer, cSize,
2016-05-29 23:56:08 +00:00
CNBuffer, dictSize),
2016-05-30 13:10:09 +00:00
if (r != CNBuffSize - dictSize) goto _output_error);
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress with DDict : ", testNb++);
2017-05-25 22:44:06 +00:00
{ ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, dictSize);
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
if (r != CNBuffSize - dictSize) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (size of DDict : %u) \n", (unsigned)ZSTD_sizeof_DDict(ddict));
ZSTD_freeDDict(ddict);
}
DISPLAYLEVEL(3, "test%3i : decompress with static DDict : ", testNb++);
2017-08-29 18:55:02 +00:00
{ size_t const ddictBufferSize = ZSTD_estimateDDictSize(dictSize, ZSTD_dlm_byCopy);
void* const ddictBuffer = malloc(ddictBufferSize);
2017-05-25 22:44:06 +00:00
if (ddictBuffer == NULL) goto _output_error;
{ const ZSTD_DDict* const ddict = ZSTD_initStaticDDict(ddictBuffer, ddictBufferSize, CNBuffer, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
2017-05-25 22:44:06 +00:00
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
if (r != CNBuffSize - dictSize) goto _output_error;
}
2017-06-20 19:17:32 +00:00
free(ddictBuffer);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (size of static DDict : %u) \n", (unsigned)ddictBufferSize);
2017-05-25 22:44:06 +00:00
}
DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++);
2016-05-30 13:10:09 +00:00
{ size_t const testSize = CNBuffSize / 3;
2016-06-27 13:28:45 +00:00
{ ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
p.fParams.contentSizeFlag = 1;
2016-05-30 13:10:09 +00:00
CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
2016-05-29 21:09:51 +00:00
}
CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
2016-05-29 23:56:08 +00:00
CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
(const char*)CNBuffer + dictSize, testSize) );
{ ZSTD_frameHeader zfh;
if (ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize)) goto _output_error;
if ((zfh.frameContentSize != testSize) && (zfh.frameContentSize != 0)) goto _output_error;
} }
DISPLAYLEVEL(3, "OK \n");
if ((int)(compressibility * 100 + 0.1) == FUZ_compressibility_default) { /* test only valid with known input */
size_t const flatdictSize = 22 KB;
size_t const contentSize = 9 KB;
const void* const dict = (const char*)CNBuffer;
const void* const contentStart = (const char*)dict + flatdictSize;
size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
3770, 3770, 3770, 3750, 3750,
3740, 3670, 3670, 3660, 3660,
3660, 3660, 3660, 3660, 3660,
3660, 3660, 3660 };
size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940,
2950, 2950, 2920, 2900, 2890,
2910, 2910, 2910, 2770, 2760,
2750, 2750, 2750, 2750, 2750,
2750, 2750, 2750 };
int l = 1;
int const maxLevel = ZSTD_maxCLevel();
DISPLAYLEVEL(3, "test%3i : flat-dictionary efficiency test : \n", testNb++);
assert(maxLevel == 22);
RDG_genBuffer(CNBuffer, flatdictSize + contentSize, compressibility, 0., seed);
DISPLAYLEVEL(4, "content hash : %016llx; dict hash : %016llx \n", XXH64(contentStart, contentSize, 0), XXH64(dict, flatdictSize, 0));
for ( ; l <= maxLevel; l++) {
size_t const nodict_cSize = ZSTD_compress(compressedBuffer, compressedBufferSize,
contentStart, contentSize, l);
if (nodict_cSize > target_nodict_cSize[l]) {
DISPLAYLEVEL(1, "error : compression at level %i worse than expected (%u > %u) \n",
l, (unsigned)nodict_cSize, (unsigned)target_nodict_cSize[l]);
goto _output_error;
}
DISPLAYLEVEL(4, "level %i : max expected %u >= reached %u \n",
l, (unsigned)target_nodict_cSize[l], (unsigned)nodict_cSize);
}
for ( l=1 ; l <= maxLevel; l++) {
size_t const wdict_cSize = ZSTD_compress_usingDict(ctxOrig,
compressedBuffer, compressedBufferSize,
contentStart, contentSize,
dict, flatdictSize,
l);
if (wdict_cSize > target_wdict_cSize[l]) {
DISPLAYLEVEL(1, "error : compression with dictionary at level %i worse than expected (%u > %u) \n",
l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]);
goto _output_error;
}
DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n",
l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
}
DISPLAYLEVEL(4, "compression efficiency tests OK \n");
}
ZSTD_freeCCtx(ctxOrig);
ZSTD_freeCCtx(ctxDuplicated);
ZSTD_freeDCtx(dctx);
2016-01-08 16:27:50 +00:00
}
/* Dictionary and dictBuilder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const dictBufferCapacity = 16 KB;
void* const dictBuffer = malloc(dictBufferCapacity);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
2016-05-30 20:29:45 +00:00
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
size_t dictSize;
2016-12-06 05:11:46 +00:00
U32 dictID;
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
free(samplesSizes);
goto _output_error;
}
DISPLAYLEVEL(3, "test%3i : dictBuilder on cyclic data : ", testNb++);
assert(compressedBufferSize >= totalSampleSize);
{ U32 u; for (u=0; u<totalSampleSize; u++) ((BYTE*)decodedBuffer)[u] = (BYTE)u; }
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
{ size_t const sDictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity,
decodedBuffer, samplesSizes, nbSamples);
if (ZDICT_isError(sDictSize)) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)sDictSize);
}
DISPLAYLEVEL(3, "test%3i : dictBuilder : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity,
CNBuffer, samplesSizes, nbSamples);
if (ZDICT_isError(dictSize)) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : Multithreaded COVER dictBuilder : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
{ ZDICT_cover_params_t coverParams;
memset(&coverParams, 0, sizeof(coverParams));
coverParams.steps = 8;
coverParams.nbThreads = 4;
dictSize = ZDICT_optimizeTrainFromBuffer_cover(
dictBuffer, dictBufferCapacity,
CNBuffer, samplesSizes, nbSamples/8, /* less samples for faster tests */
&coverParams);
if (ZDICT_isError(dictSize)) goto _output_error;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : COVER dictBuilder with shrinkDict: ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
{ ZDICT_cover_params_t coverParams;
memset(&coverParams, 0, sizeof(coverParams));
coverParams.steps = 8;
coverParams.nbThreads = 4;
coverParams.shrinkDict = 1;
coverParams.shrinkDictMaxRegression = 1;
dictSize = ZDICT_optimizeTrainFromBuffer_cover(
dictBuffer, dictBufferCapacity,
CNBuffer, samplesSizes, nbSamples/8, /* less samples for faster tests */
&coverParams);
if (ZDICT_isError(dictSize)) goto _output_error;
}
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : Multithreaded FASTCOVER dictBuilder : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
{ ZDICT_fastCover_params_t fastCoverParams;
memset(&fastCoverParams, 0, sizeof(fastCoverParams));
fastCoverParams.steps = 8;
fastCoverParams.nbThreads = 4;
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(
dictBuffer, dictBufferCapacity,
CNBuffer, samplesSizes, nbSamples,
&fastCoverParams);
if (ZDICT_isError(dictSize)) goto _output_error;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : FASTCOVER dictBuilder with shrinkDict: ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
{ ZDICT_fastCover_params_t fastCoverParams;
memset(&fastCoverParams, 0, sizeof(fastCoverParams));
fastCoverParams.steps = 8;
fastCoverParams.nbThreads = 4;
fastCoverParams.shrinkDict = 1;
fastCoverParams.shrinkDictMaxRegression = 1;
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(
dictBuffer, dictBufferCapacity,
CNBuffer, samplesSizes, nbSamples,
&fastCoverParams);
if (ZDICT_isError(dictSize)) goto _output_error;
}
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
2016-12-06 05:11:46 +00:00
dictID = ZDICT_getDictID(dictBuffer, dictSize);
if (dictID==0) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
2016-08-19 12:23:58 +00:00
DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++);
2017-07-10 20:48:41 +00:00
cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
dictBuffer, dictSize, 4);
if (ZSTD_isError(cSize)) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : retrieve dictID from dictionary : ", testNb++);
2016-12-06 05:11:46 +00:00
{ U32 const did = ZSTD_getDictID_fromDict(dictBuffer, dictSize);
if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : retrieve dictID from frame : ", testNb++);
2016-12-06 05:11:46 +00:00
{ U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : frame built with dictionary should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
dictBuffer, dictSize),
if (r != CNBuffSize) goto _output_error);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : estimate CDict size : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
2017-08-29 18:55:02 +00:00
size_t const estimatedSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byRef);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)estimatedSize);
}
DISPLAYLEVEL(3, "test%3i : compress with CDict ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
assert(cdict != NULL);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
2017-07-10 20:48:41 +00:00
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, cdict);
ZSTD_freeCDict(cdict);
if (ZSTD_isError(cSize)) goto _output_error;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : retrieve dictID from frame : ", testNb++);
{ U32 const did = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
if (did != dictID) goto _output_error; /* non-conformant (content-only) dictionary */
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : frame built with dictionary should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
dictBuffer, dictSize),
if (r != CNBuffSize) goto _output_error);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress with static CDict : ", testNb++);
{ int const maxLevel = ZSTD_maxCLevel();
int level;
for (level = 1; level <= maxLevel; ++level) {
ZSTD_compressionParameters const cParams = ZSTD_getCParams(level, CNBuffSize, dictSize);
size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
void* const cdictBuffer = malloc(cdictSize);
if (cdictBuffer==NULL) goto _output_error;
{ const ZSTD_CDict* const cdict = ZSTD_initStaticCDict(
cdictBuffer, cdictSize,
dictBuffer, dictSize,
ZSTD_dlm_byCopy, ZSTD_dct_auto,
cParams);
if (cdict == NULL) {
DISPLAY("ZSTD_initStaticCDict failed ");
goto _output_error;
}
cSize = ZSTD_compress_usingCDict(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, MIN(10 KB, CNBuffSize), cdict);
if (ZSTD_isError(cSize)) {
DISPLAY("ZSTD_compress_usingCDict failed ");
goto _output_error;
} }
free(cdictBuffer);
} }
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
2017-05-26 01:05:49 +00:00
DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict_advanced, no contentSize, no dictID : ", testNb++);
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
assert(cdict != NULL);
cSize = ZSTD_compress_usingCDict_advanced(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
cdict, fParams);
ZSTD_freeCDict(cdict);
if (ZSTD_isError(cSize)) goto _output_error;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : try retrieving contentSize from frame : ", testNb++);
{ U64 const contentSize = ZSTD_getFrameContentSize(compressedBuffer, cSize);
if (contentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error;
}
DISPLAYLEVEL(3, "OK (unknown)\n");
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
dictBuffer, dictSize),
if (r != CNBuffSize) goto _output_error);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_compress_advanced, no dictID : ", testNb++);
2016-06-27 13:28:45 +00:00
{ ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize);
p.fParams.noDictIDFlag = 1;
2017-07-10 20:48:41 +00:00
cSize = ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
dictBuffer, dictSize, p);
if (ZSTD_isError(cSize)) goto _output_error;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,
dictBuffer, dictSize),
if (r != CNBuffSize) goto _output_error);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : dictionary containing only header should return error : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
assert(dctx != NULL);
{ const size_t ret = ZSTD_decompress_usingDict(
dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,
"\x37\xa4\x30\xec\x11\x22\x33\x44", 8);
if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted)
goto _output_error;
}
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a good dictionary : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem);
if (cdict==NULL) goto _output_error;
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a rawContent (must fail) : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, ZSTD_dlm_byRef, ZSTD_dct_fullDict, cParams, ZSTD_defaultCMem);
if (cdict!=NULL) goto _output_error;
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_auto should fail : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
/* Either operation is allowed to fail, but one must fail. */
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto);
if (!ZSTD_isError(ret)) {
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (!ZSTD_isError(ret)) goto _output_error;
}
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dm_rawContent should pass : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
if (ZSTD_isError(ret)) goto _output_error;
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (ZSTD_isError(ret)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading dictionary before setting parameters is the same as loading after : ", testNb++);
{
size_t size1, size2;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) );
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) );
size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size1)) goto _output_error;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) );
size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size2)) goto _output_error;
if (size1 != size2) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the prefix : ", testNb++);
{
CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the cdict : ", testNb++);
{
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1);
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the prefix : ", testNb++);
{
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1);
CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the dictionary : ", testNb++);
{
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1);
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the dictionary : ", testNb++);
{
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the cdict : ", testNb++);
{
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1);
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) );
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loaded dictionary persists across reset session : ", testNb++);
{
size_t size1, size2;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) );
size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size1)) goto _output_error;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size2)) goto _output_error;
if (size1 != size2) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loaded dictionary is cleared after resetting parameters : ", testNb++);
{
size_t size1, size2;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) );
size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size1)) goto _output_error;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
if (ZSTD_isError(size2)) goto _output_error;
if (size1 == size2) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) );
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
CHECK_Z(cSize);
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++);
{
ZSTD_DCtx* dctx = ZSTD_createDCtx();
size_t ret;
/* We should fail to decompress without a dictionary. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (!ZSTD_isError(ret)) goto _output_error;
/* We should succeed to decompress with the dictionary. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) );
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
/* The dictionary should presist across calls. */
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
/* When we reset the context the dictionary is cleared. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (!ZSTD_isError(ret)) goto _output_error;
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with ddict : ", testNb++);
{
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_DDict* ddict = ZSTD_createDDict(dictBuffer, dictSize);
size_t ret;
/* We should succeed to decompress with the ddict. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddict) );
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
/* The ddict should presist across calls. */
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
/* When we reset the context the ddict is cleared. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (!ZSTD_isError(ret)) goto _output_error;
ZSTD_freeDCtx(dctx);
ZSTD_freeDDict(ddict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with prefix : ", testNb++);
{
ZSTD_DCtx* dctx = ZSTD_createDCtx();
size_t ret;
/* We should succeed to decompress with the prefix. */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_DCtx_refPrefix_advanced(dctx, dictBuffer, dictSize, ZSTD_dct_auto) );
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
/* The prefix should be cleared after the first compression. */
ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (!ZSTD_isError(ret)) goto _output_error;
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
2017-12-13 01:37:06 +00:00
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples);
if (ZDICT_isError(dictSize)) goto _output_error;
/* Set all the repcodes to non-default */
{
BYTE* dictPtr = (BYTE*)dictBuffer;
BYTE* dictLimit = dictPtr + dictSize - 12;
/* Find the repcodes */
while (dictPtr < dictLimit &&
(MEM_readLE32(dictPtr) != 1 || MEM_readLE32(dictPtr + 4) != 4 ||
MEM_readLE32(dictPtr + 8) != 8)) {
++dictPtr;
}
if (dictPtr >= dictLimit) goto _output_error;
MEM_writeLE32(dictPtr + 0, 10);
MEM_writeLE32(dictPtr + 4, 10);
MEM_writeLE32(dictPtr + 8, 10);
/* Set the last 8 bytes to 'x' */
memset((BYTE*)dictBuffer + dictSize - 8, 'x', 8);
}
/* The optimal parser checks all the repcodes.
* Make sure at least one is a match >= targetLength so that it is
* immediately chosen. This will make sure that the compressor and
* decompressor agree on at least one of the repcodes.
*/
{ size_t dSize;
BYTE data[1024];
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
2017-12-13 01:37:06 +00:00
ZSTD_compressionParameters const cParams = ZSTD_getCParams(19, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto,
2017-12-13 01:37:06 +00:00
cParams, ZSTD_defaultCMem);
assert(dctx != NULL); assert(cdict != NULL);
2017-12-13 01:37:06 +00:00
memset(data, 'x', sizeof(data));
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
data, sizeof(data), cdict);
ZSTD_freeCDict(cdict);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(5, "Compression error %s : ", ZSTD_getErrorName(cSize)); goto _output_error; }
dSize = ZSTD_decompress_usingDict(dctx, decodedBuffer, sizeof(data), compressedBuffer, cSize, dictBuffer, dictSize);
if (ZSTD_isError(dSize)) { DISPLAYLEVEL(5, "Decompression error %s : ", ZSTD_getErrorName(dSize)); goto _output_error; }
if (memcmp(data, decodedBuffer, sizeof(data))) { DISPLAYLEVEL(5, "Data corruption : "); goto _output_error; }
ZSTD_freeDCtx(dctx);
2017-12-13 01:37:06 +00:00
}
DISPLAYLEVEL(3, "OK \n");
2017-12-13 01:37:06 +00:00
ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);
}
/* COVER dictionary builder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t dictSize = 16 KB;
size_t optDictSize = dictSize;
void* dictBuffer = malloc(dictSize);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
U32 seed32 = seed;
ZDICT_cover_params_t params;
U32 dictID;
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
free(samplesSizes);
goto _output_error;
}
DISPLAYLEVEL(3, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(&params, 0, sizeof(params));
params.d = 1 + (FUZ_rand(&seed32) % 16);
params.k = params.d + (FUZ_rand(&seed32) % 256);
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples,
params);
if (ZDICT_isError(dictSize)) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, dictSize);
if (dictID==0) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
DISPLAYLEVEL(3, "test%3i : ZDICT_optimizeTrainFromBuffer_cover : ", testNb++);
memset(&params, 0, sizeof(params));
params.steps = 4;
optDictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, optDictSize,
CNBuffer, samplesSizes,
nbSamples / 4, &params);
if (ZDICT_isError(optDictSize)) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)optDictSize);
DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, optDictSize);
if (dictID==0) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);
}
2015-01-24 00:58:16 +00:00
/* Decompression defense tests */
DISPLAYLEVEL(3, "test%3i : Check input length for magic number : ", testNb++);
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 3); /* too small input */
2016-05-29 23:56:08 +00:00
if (!ZSTD_isError(r)) goto _output_error;
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
DISPLAYLEVEL(3, "test%3i : Check magic Number : ", testNb++);
2015-01-24 00:58:16 +00:00
((char*)(CNBuffer))[0] = 1;
2016-05-30 13:10:09 +00:00
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 4);
2016-05-29 21:09:51 +00:00
if (!ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
/* content size verification test */
DISPLAYLEVEL(3, "test%3i : Content size verification : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const srcSize = 5000;
size_t const wrongSrcSize = (srcSize + 1000);
ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0);
params.fParams.contentSizeFlag = 1;
CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) );
{ size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize);
if (!ZSTD_isError(result)) goto _output_error;
if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error;
DISPLAYLEVEL(3, "OK : %s \n", ZSTD_getErrorName(result));
2017-04-14 05:46:41 +00:00
}
ZSTD_freeCCtx(cctx);
}
/* negative compression level test : ensure simple API and advanced API produce same result */
DISPLAYLEVEL(3, "test%3i : negative compression level : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const srcSize = CNBuffSize / 5;
int const compressionLevel = -1;
assert(cctx != NULL);
{ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0);
size_t const cSize_1pass = ZSTD_compress_advanced(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, srcSize,
NULL, 0,
params);
if (ZSTD_isError(cSize_1pass)) goto _output_error;
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
{ size_t const compressionResult = ZSTD_compress2(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, srcSize);
DISPLAYLEVEL(5, "simple=%zu vs %zu=advanced : ", cSize_1pass, compressionResult);
if (ZSTD_isError(compressionResult)) goto _output_error;
if (compressionResult != cSize_1pass) goto _output_error;
} }
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
2018-03-19 23:02:51 +00:00
/* parameters order test */
{ size_t const inputSize = CNBuffSize / 2;
U64 xxh64;
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
2018-03-19 23:02:51 +00:00
DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++);
assert(cctx != NULL);
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
{ size_t const compressedSize = ZSTD_compress2(cctx,
compressedBuffer, ZSTD_compressBound(inputSize),
CNBuffer, inputSize);
CHECK(compressedSize);
cSize = compressedSize;
xxh64 = XXH64(compressedBuffer, compressedSize, 0);
2018-03-19 23:02:51 +00:00
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)cSize);
2018-03-19 23:02:51 +00:00
ZSTD_freeCCtx(cctx);
}
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++);
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) );
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
{ size_t const result = ZSTD_compress2(cctx,
compressedBuffer, ZSTD_compressBound(inputSize),
CNBuffer, inputSize);
CHECK(result);
if (result != cSize) goto _output_error; /* must result in same compressed result, hence same size */
if (XXH64(compressedBuffer, result, 0) != xxh64) goto _output_error; /* must result in exactly same content, hence same hash */
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)result);
2018-03-19 23:02:51 +00:00
}
ZSTD_freeCCtx(cctx);
}
}
/* advanced parameters for decompression */
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
assert(dctx != NULL);
DISPLAYLEVEL(3, "test%3i : get dParameter bounds ", testNb++);
{ ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
CHECK(bounds.error);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : wrong dParameter : ", testNb++);
{ size_t const sr = ZSTD_DCtx_setParameter(dctx, (ZSTD_dParameter)999999, 0);
if (!ZSTD_isError(sr)) goto _output_error;
}
{ ZSTD_bounds const bounds = ZSTD_dParam_getBounds((ZSTD_dParameter)999998);
if (!ZSTD_isError(bounds.error)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : out of bound dParameter : ", testNb++);
{ size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 9999);
if (!ZSTD_isError(sr)) goto _output_error;
}
{ size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (ZSTD_format_e)888);
if (!ZSTD_isError(sr)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
ZSTD_freeDCtx(dctx);
}
/* custom formats tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t const inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */
assert(dctx != NULL); assert(cctx != NULL);
/* basic block compression */
DISPLAYLEVEL(3, "test%3i : magic-less format test : ", testNb++);
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
{ ZSTD_inBuffer in = { CNBuffer, inputSize, 0 };
ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 };
size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
if (result != 0) goto _output_error;
if (in.pos != in.size) goto _output_error;
cSize = out.pos;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)cSize);
DISPLAYLEVEL(3, "test%3i : decompress normally (should fail) : ", testNb++);
{ size_t const decodeResult = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (ZSTD_getErrorCode(decodeResult) != ZSTD_error_prefix_unknown) goto _output_error;
DISPLAYLEVEL(3, "OK : %s \n", ZSTD_getErrorName(decodeResult));
}
DISPLAYLEVEL(3, "test%3i : decompress of magic-less frame : ", testNb++);
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
2018-12-05 01:06:48 +00:00
CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
{ ZSTD_frameHeader zfh;
size_t const zfhrt = ZSTD_getFrameHeader_advanced(&zfh, compressedBuffer, cSize, ZSTD_f_zstd1_magicless);
if (zfhrt != 0) goto _output_error;
}
/* one shot */
{ size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (result != inputSize) goto _output_error;
DISPLAYLEVEL(3, "one-shot OK, ");
}
/* streaming */
{ ZSTD_inBuffer in = { compressedBuffer, cSize, 0 };
ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 };
size_t const result = ZSTD_decompressStream(dctx, &out, &in);
if (result != 0) goto _output_error;
if (in.pos != in.size) goto _output_error;
if (out.pos != inputSize) goto _output_error;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "streaming OK : regenerated %u bytes \n", (unsigned)out.pos);
}
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
}
2016-01-09 00:08:23 +00:00
/* block API tests */
2016-03-19 11:12:07 +00:00
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
static const size_t dictSize = 65 KB;
static const size_t blockSize = 100 KB; /* won't cause pb with small dict size */
size_t cSize2;
assert(cctx != NULL); assert(dctx != NULL);
2016-01-09 00:08:23 +00:00
/* basic block compression */
DISPLAYLEVEL(3, "test%3i : Block compression test : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECK( ZSTD_compressBegin(cctx, 5) );
CHECK( ZSTD_getBlockSize(cctx) >= blockSize);
CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize) );
DISPLAYLEVEL(3, "OK \n");
2016-01-09 00:08:23 +00:00
DISPLAYLEVEL(3, "test%3i : Block decompression test : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECK( ZSTD_decompressBegin(dctx) );
{ CHECK_NEWV(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
2016-05-30 13:10:09 +00:00
if (r != blockSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2016-01-09 00:08:23 +00:00
/* very long stream of block compression */
DISPLAYLEVEL(3, "test%3i : Huge block streaming compression test : ", testNb++);
CHECK( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */
CHECK( ZSTD_getBlockSize(cctx) >= blockSize);
{ U64 const toCompress = 5000000000ULL; /* > 4 GB */
U64 compressed = 0;
while (compressed < toCompress) {
size_t const blockCSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize);
assert(blockCSize != 0);
if (ZSTD_isError(blockCSize)) goto _output_error;
compressed += blockCSize;
} }
DISPLAYLEVEL(3, "OK \n");
2016-01-09 01:00:10 +00:00
/* dictionary block compression */
DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize));
RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */
{ CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */
assert(r == 0); /* non-compressible block */ }
memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* send non-compressed block (without header) */
CHECK_VAR(cSize2, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize),
(char*)CNBuffer+dictSize+2*blockSize, blockSize));
DISPLAYLEVEL(3, "OK \n");
2016-01-09 01:00:10 +00:00
DISPLAYLEVEL(3, "test%3i : Dictionary Block decompression test : ", testNb++);
2016-05-29 23:56:08 +00:00
CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
{ CHECK_NEWV( r, ZSTD_decompressBlock(dctx, decodedBuffer, blockSize, compressedBuffer, cSize) );
if (r != blockSize) {
DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
goto _output_error;
} }
memcpy((char*)decodedBuffer+blockSize, (char*)compressedBuffer+cSize, blockSize);
ZSTD_insertBlock(dctx, (char*)decodedBuffer+blockSize, blockSize); /* insert non-compressed block into dctx history */
{ CHECK_NEWV( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, blockSize, (char*)compressedBuffer+cSize+blockSize, cSize2) );
if (r != blockSize) {
DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() and after insertBlock() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
goto _output_error;
} }
assert(memcpy((char*)CNBuffer+dictSize, decodedBuffer, blockSize*3)); /* ensure regenerated content is identical to origin */
DISPLAYLEVEL(3, "OK \n");
2016-01-09 01:00:10 +00:00
DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 3);
if (cdict==NULL) goto _output_error;
CHECK( ZSTD_compressBegin_usingCDict(cctx, cdict) );
CHECK( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
2016-01-09 00:08:23 +00:00
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
2016-01-09 00:08:23 +00:00
}
2015-06-18 15:43:16 +00:00
/* long rle test */
2016-03-19 11:12:07 +00:00
{ size_t sampleSize = 0;
DISPLAYLEVEL(3, "test%3i : Long RLE test : ", testNb++);
2016-05-29 23:56:08 +00:00
RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., seed+1);
2015-06-18 15:43:16 +00:00
memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1);
sampleSize += 256 KB - 1;
2016-05-29 23:56:08 +00:00
RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., seed+2);
2015-06-18 15:43:16 +00:00
sampleSize += 96 KB;
cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
2015-06-18 15:43:16 +00:00
if (ZSTD_isError(cSize)) goto _output_error;
{ CHECK_NEWV(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
2016-05-29 23:56:08 +00:00
if (regenSize!=sampleSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2015-06-18 15:43:16 +00:00
}
/* Multiple blocks of zeros test */
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
memset(CNBuffer, 0, LONGZEROSLENGTH);
CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) );
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100);
DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH);
{ CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) );
if (r != LONGZEROSLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2016-05-29 23:56:08 +00:00
/* All zeroes test (test bug #137) */
#define ZEROESLENGTH 100
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
memset(CNBuffer, 0, ZEROESLENGTH);
CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/ZEROESLENGTH*100);
DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
{ CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
2016-05-29 23:56:08 +00:00
if (r != ZEROESLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
/* nbSeq limit test */
2016-03-19 11:12:07 +00:00
#define _3BYTESTESTLENGTH 131000
#define NB3BYTESSEQLOG 9
#define NB3BYTESSEQ (1 << NB3BYTESSEQLOG)
#define NB3BYTESSEQMASK (NB3BYTESSEQ-1)
2016-03-19 12:21:08 +00:00
/* creates a buffer full of 3-bytes sequences */
2016-03-19 11:12:07 +00:00
{ BYTE _3BytesSeqs[NB3BYTESSEQ][3];
2016-03-19 12:21:08 +00:00
U32 rSeed = 1;
2016-03-19 12:21:08 +00:00
/* create batch of 3-bytes sequences */
2016-07-07 11:14:21 +00:00
{ int i;
for (i=0; i < NB3BYTESSEQ; i++) {
_3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255);
_3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255);
_3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255);
} }
2016-03-19 12:21:08 +00:00
/* randomly fills CNBuffer with prepared 3-bytes sequences */
2016-07-07 11:14:21 +00:00
{ int i;
for (i=0; i < _3BYTESTESTLENGTH; i += 3) { /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */
U32 const id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK;
((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0];
((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1];
((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
} } }
2018-08-28 20:20:37 +00:00
DISPLAYLEVEL(3, "test%3i : growing nbSeq : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const maxNbSeq = _3BYTESTESTLENGTH / 3;
size_t const bound = ZSTD_compressBound(_3BYTESTESTLENGTH);
size_t nbSeq = 1;
while (nbSeq <= maxNbSeq) {
2018-08-28 20:42:01 +00:00
CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19));
2018-08-28 20:20:37 +00:00
/* Check every sequence for the first 100, then skip more rapidly. */
if (nbSeq < 100) {
++nbSeq;
} else {
nbSeq += (nbSeq >> 2);
}
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++);
CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
CNBuffer, _3BYTESTESTLENGTH, 19) );
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/_3BYTESTESTLENGTH*100);
DISPLAYLEVEL(3, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
{ CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
2016-05-29 23:56:08 +00:00
if (r != _3BYTESTESTLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2018-08-28 20:20:37 +00:00
2018-08-28 20:42:01 +00:00
DISPLAYLEVEL(3, "test%3i : growing literals buffer : ", testNb++);
[libzstd] Fix bug in Huffman encoding Summary: Huffman encoding with a bad dictionary can encode worse than the HUF_BLOCKBOUND(srcSize), since we don't filter out incompressible input, and even if we did, the dictionaries Huffman table could be ill suited to compressing actual data. The fast optimization doesn't seem to improve compression speed, even when I hard coded fast = 1, the speed didn't improve over hard coding it to 0. Benchmarks: $ ./zstd.dev -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1890.0 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 372.6 MB/s ,1830.2 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1400.2 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 198.0 MB/s ,1280.1 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 107.8 MB/s ,1200.0 MB/s $ ./zstd -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1870.2 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 370.0 MB/s ,1810.3 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1380.1 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 196.1 MB/s ,1270.0 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 106.8 MB/s ,1180.1 MB/s $ ./zstd.dev -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1096.5 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 321.2 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 243.7 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 226.7 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 150.3 MB/s , 963.6 MB/s $ ./zstd -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1087.1 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 318.8 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 246.5 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 229.2 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 149.3 MB/s , 963.6 MB/s Test Plan: I added a test case to the fuzzer which crashed with ASAN before the patch and succeeded after.
2017-07-18 18:21:19 +00:00
RDG_genBuffer(CNBuffer, CNBuffSize, 0.0, 0.1, seed);
2018-08-28 20:20:37 +00:00
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const bound = ZSTD_compressBound(CNBuffSize);
size_t size = 1;
while (size <= CNBuffSize) {
CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3));
/* Check every size for the first 100, then skip more rapidly. */
if (size < 100) {
++size;
} else {
size += (size >> 2);
}
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : incompressible data and ill suited dictionary : ", testNb++);
[libzstd] Fix bug in Huffman encoding Summary: Huffman encoding with a bad dictionary can encode worse than the HUF_BLOCKBOUND(srcSize), since we don't filter out incompressible input, and even if we did, the dictionaries Huffman table could be ill suited to compressing actual data. The fast optimization doesn't seem to improve compression speed, even when I hard coded fast = 1, the speed didn't improve over hard coding it to 0. Benchmarks: $ ./zstd.dev -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1890.0 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 372.6 MB/s ,1830.2 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1400.2 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 198.0 MB/s ,1280.1 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 107.8 MB/s ,1200.0 MB/s $ ./zstd -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1870.2 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 370.0 MB/s ,1810.3 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1380.1 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 196.1 MB/s ,1270.0 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 106.8 MB/s ,1180.1 MB/s $ ./zstd.dev -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1096.5 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 321.2 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 243.7 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 226.7 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 150.3 MB/s , 963.6 MB/s $ ./zstd -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1087.1 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 318.8 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 246.5 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 229.2 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 149.3 MB/s , 963.6 MB/s Test Plan: I added a test case to the fuzzer which crashed with ASAN before the patch and succeeded after.
2017-07-18 18:21:19 +00:00
{ /* Train a dictionary on low characters */
size_t dictSize = 16 KB;
void* const dictBuffer = malloc(dictSize);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
if (!dictBuffer || !samplesSizes) goto _output_error;
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, CNBuffer, samplesSizes, nbSamples);
if (ZDICT_isError(dictSize)) goto _output_error;
/* Reverse the characters to make the dictionary ill suited */
{ U32 u;
for (u = 0; u < CNBuffSize; ++u) {
((BYTE*)CNBuffer)[u] = 255 - ((BYTE*)CNBuffer)[u];
}
}
{ /* Compress the data */
size_t const inputSize = 500;
size_t const outputSize = ZSTD_compressBound(inputSize);
void* const outputBuffer = malloc(outputSize);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
if (!outputBuffer || !cctx) goto _output_error;
CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1));
free(outputBuffer);
ZSTD_freeCCtx(cctx);
}
free(dictBuffer);
free(samplesSizes);
}
DISPLAYLEVEL(3, "OK \n");
[libzstd] Fix bug in Huffman encoding Summary: Huffman encoding with a bad dictionary can encode worse than the HUF_BLOCKBOUND(srcSize), since we don't filter out incompressible input, and even if we did, the dictionaries Huffman table could be ill suited to compressing actual data. The fast optimization doesn't seem to improve compression speed, even when I hard coded fast = 1, the speed didn't improve over hard coding it to 0. Benchmarks: $ ./zstd.dev -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1890.0 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 372.6 MB/s ,1830.2 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1400.2 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 198.0 MB/s ,1280.1 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 107.8 MB/s ,1200.0 MB/s $ ./zstd -b1e5 Benchmarking levels from 1 to 5 1#Synthetic 50% : 10000000 -> 3139163 (3.186), 524.8 MB/s ,1870.2 MB/s 2#Synthetic 50% : 10000000 -> 3115138 (3.210), 370.0 MB/s ,1810.3 MB/s 3#Synthetic 50% : 10000000 -> 3222672 (3.103), 223.3 MB/s ,1380.1 MB/s 4#Synthetic 50% : 10000000 -> 3276678 (3.052), 196.1 MB/s ,1270.0 MB/s 5#Synthetic 50% : 10000000 -> 3271570 (3.057), 106.8 MB/s ,1180.1 MB/s $ ./zstd.dev -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1096.5 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 321.2 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 243.7 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 226.7 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 150.3 MB/s , 963.6 MB/s $ ./zstd -b1e5 ../silesia.tar Benchmarking levels from 1 to 5 1#silesia.tar : 211988480 -> 73651685 (2.878), 429.7 MB/s ,1087.1 MB/s 2#silesia.tar : 211988480 -> 70158785 (3.022), 318.8 MB/s ,1029.1 MB/s 3#silesia.tar : 211988480 -> 66993813 (3.164), 246.5 MB/s , 981.4 MB/s 4#silesia.tar : 211988480 -> 66306481 (3.197), 229.2 MB/s , 972.4 MB/s 5#silesia.tar : 211988480 -> 64757852 (3.274), 149.3 MB/s , 963.6 MB/s Test Plan: I added a test case to the fuzzer which crashed with ASAN before the patch and succeeded after.
2017-07-18 18:21:19 +00:00
/* findFrameCompressedSize on skippable frames */
DISPLAYLEVEL(3, "test%3i : frame compressed size of skippable frame : ", testNb++);
{ const char* frame = "\x50\x2a\x4d\x18\x05\x0\x0\0abcde";
size_t const frameSrcSize = 13;
if (ZSTD_findFrameCompressedSize(frame, frameSrcSize) != frameSrcSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
2017-02-08 23:31:47 +00:00
/* error string tests */
DISPLAYLEVEL(3, "test%3i : testing ZSTD error code strings : ", testNb++);
2017-02-08 23:31:47 +00:00
if (strcmp("No error detected", ZSTD_getErrorName((ZSTD_ErrorCode)(0-ZSTD_error_no_error))) != 0) goto _output_error;
if (strcmp("No error detected", ZSTD_getErrorString(ZSTD_error_no_error)) != 0) goto _output_error;
if (strcmp("Unspecified error code", ZSTD_getErrorString((ZSTD_ErrorCode)(0-ZSTD_error_GENERIC))) != 0) goto _output_error;
if (strcmp("Error (generic)", ZSTD_getErrorName((size_t)0-ZSTD_error_GENERIC)) != 0) goto _output_error;
if (strcmp("Error (generic)", ZSTD_getErrorString(ZSTD_error_GENERIC)) != 0) goto _output_error;
if (strcmp("No error detected", ZSTD_getErrorName(ZSTD_error_GENERIC)) != 0) goto _output_error;
DISPLAYLEVEL(3, "OK \n");
2017-02-08 23:31:47 +00:00
2018-02-13 18:09:01 +00:00
DISPLAYLEVEL(3, "test%3i : testing ZSTD dictionary sizes : ", testNb++);
RDG_genBuffer(CNBuffer, CNBuffSize, compressibility, 0., seed);
{
size_t const size = MIN(128 KB, CNBuffSize);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_CDict* const lgCDict = ZSTD_createCDict(CNBuffer, size, 1);
ZSTD_CDict* const smCDict = ZSTD_createCDict(CNBuffer, 1 KB, 1);
ZSTD_frameHeader lgHeader;
ZSTD_frameHeader smHeader;
CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, size, lgCDict));
CHECK_Z(ZSTD_getFrameHeader(&lgHeader, compressedBuffer, compressedBufferSize));
CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, size, smCDict));
CHECK_Z(ZSTD_getFrameHeader(&smHeader, compressedBuffer, compressedBufferSize));
if (lgHeader.windowSize != smHeader.windowSize) goto _output_error;
ZSTD_freeCDict(smCDict);
ZSTD_freeCDict(lgCDict);
ZSTD_freeCCtx(cctx);
}
2018-02-13 18:09:01 +00:00
DISPLAYLEVEL(3, "OK \n");
2018-07-30 19:57:11 +00:00
DISPLAYLEVEL(3, "test%3i : testing FSE_normalizeCount() PR#1255: ", testNb++);
{
short norm[32];
unsigned count[32];
unsigned const tableLog = 5;
size_t const nbSeq = 32;
unsigned const maxSymbolValue = 31;
size_t i;
for (i = 0; i < 32; ++i)
count[i] = 1;
/* Calling FSE_normalizeCount() on a uniform distribution should not
* cause a division by zero.
*/
FSE_normalizeCount(norm, tableLog, count, nbSeq, maxSymbolValue);
}
DISPLAYLEVEL(3, "OK \n");
2015-01-24 00:58:16 +00:00
_end:
free(CNBuffer);
free(compressedBuffer);
free(decodedBuffer);
return testResult;
_output_error:
testResult = 1;
DISPLAY("Error detected in Unit tests ! \n");
goto _end;
}
static size_t findDiff(const void* buf1, const void* buf2, size_t max)
{
2015-06-18 15:43:16 +00:00
const BYTE* b1 = (const BYTE*)buf1;
const BYTE* b2 = (const BYTE*)buf2;
2016-05-29 23:56:08 +00:00
size_t u;
for (u=0; u<max; u++) {
if (b1[u] != b2[u]) break;
2015-01-24 00:58:16 +00:00
}
2016-05-29 23:56:08 +00:00
return u;
2015-01-24 00:58:16 +00:00
}
2016-04-08 18:26:33 +00:00
2017-04-27 21:19:34 +00:00
static ZSTD_parameters FUZ_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
{
ZSTD_parameters params;
params.cParams = cParams;
params.fParams = fParams;
return params;
}
2016-04-08 18:26:33 +00:00
static size_t FUZ_rLogLength(U32* seed, U32 logLength)
{
size_t const lengthMask = ((size_t)1 << logLength) - 1;
return (lengthMask+1) + (FUZ_rand(seed) & lengthMask);
}
static size_t FUZ_randomLength(U32* seed, U32 maxLog)
{
U32 const logLength = FUZ_rand(seed) % maxLog;
return FUZ_rLogLength(seed, logLength);
}
2016-05-29 23:56:08 +00:00
#undef CHECK
#define CHECK(cond, ...) { \
if (cond) { \
DISPLAY("Error => "); \
DISPLAY(__VA_ARGS__); \
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY(" (seed %u, test nb %u) \n", (unsigned)seed, testNb); \
goto _output_error; \
} }
#undef CHECK_Z
#define CHECK_Z(f) { \
size_t const err = f; \
if (ZSTD_isError(err)) { \
DISPLAY("Error => %s : %s ", \
#f, ZSTD_getErrorName(err)); \
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY(" (seed %u, test nb %u) \n", (unsigned)seed, testNb); \
goto _output_error; \
} }
2015-01-24 00:58:16 +00:00
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests)
2015-01-24 00:58:16 +00:00
{
2016-04-08 18:26:33 +00:00
static const U32 maxSrcLog = 23;
static const U32 maxSampleLog = 22;
2016-05-29 23:56:08 +00:00
size_t const srcBufferSize = (size_t)1<<maxSrcLog;
size_t const dstBufferSize = (size_t)1<<maxSampleLog;
size_t const cBufferSize = ZSTD_compressBound(dstBufferSize);
BYTE* cNoiseBuffer[5];
2016-05-29 23:56:08 +00:00
BYTE* const cBuffer = (BYTE*) malloc (cBufferSize);
BYTE* const dstBuffer = (BYTE*) malloc (dstBufferSize);
BYTE* const mirrorBuffer = (BYTE*) malloc (dstBufferSize);
2016-05-30 13:10:09 +00:00
ZSTD_CCtx* const refCtx = ZSTD_createCCtx();
ZSTD_CCtx* const ctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
U32 result = 0;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
unsigned testNb = 0;
2018-08-14 23:56:07 +00:00
U32 coreSeed = seed;
2017-11-30 03:11:12 +00:00
UTIL_time_t const startClock = UTIL_getTime();
U64 const maxClockSpan = maxDurationS * SEC_TO_MICRO;
int const cLevelLimiter = bigTests ? 3 : 2;
2015-01-24 00:58:16 +00:00
/* allocation */
cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4]
2016-01-07 14:35:18 +00:00
|| !dstBuffer || !mirrorBuffer || !cBuffer || !refCtx || !ctx || !dctx,
"Not enough memory, fuzzer tests cancelled");
2015-01-24 00:58:16 +00:00
/* Create initial samples */
RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */
RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */
RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */
RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */
2015-01-24 00:58:16 +00:00
/* catch up testNb */
2016-03-19 11:47:52 +00:00
for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed);
2015-01-24 00:58:16 +00:00
2016-03-19 11:47:52 +00:00
/* main test loop */
2017-11-30 03:11:12 +00:00
for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < maxClockSpan); testNb++ ) {
2018-08-14 23:56:07 +00:00
BYTE* srcBuffer; /* jumping pointer */
U32 lseed;
2016-05-29 23:56:08 +00:00
size_t sampleSize, maxTestSize, totalTestSize;
size_t cSize, totalCSize, totalGenSize;
2016-04-08 18:26:33 +00:00
U64 crcOrig;
2015-11-19 11:02:28 +00:00
BYTE* sampleBuffer;
2015-12-06 12:18:37 +00:00
const BYTE* dict;
size_t dictSize;
2015-01-24 00:58:16 +00:00
2016-03-19 11:47:52 +00:00
/* notification */
if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); }
else { DISPLAYUPDATE(2, "\r%6u ", testNb); }
2015-12-04 16:45:35 +00:00
2015-01-24 00:58:16 +00:00
FUZ_rand(&coreSeed);
2016-03-19 12:21:08 +00:00
{ U32 const prime1 = 2654435761U; lseed = coreSeed ^ prime1; }
2016-04-08 18:26:33 +00:00
/* srcBuffer selection [0-4] */
{ U32 buffNb = FUZ_rand(&lseed) & 0x7F;
if (buffNb & 7) buffNb=2; /* most common : compressible (P) */
else {
buffNb >>= 3;
if (buffNb & 7) {
const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */
buffNb = tnb[buffNb >> 3];
} else {
const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */
buffNb = tnb[buffNb >> 3];
} }
srcBuffer = cNoiseBuffer[buffNb];
}
/* select src segment */
2016-05-30 13:10:09 +00:00
sampleSize = FUZ_randomLength(&lseed, maxSampleLog);
2015-01-24 00:58:16 +00:00
2015-11-19 11:02:28 +00:00
/* create sample buffer (to catch read error with valgrind & sanitizers) */
sampleBuffer = (BYTE*)malloc(sampleSize);
2016-05-30 13:10:09 +00:00
CHECK(sampleBuffer==NULL, "not enough memory for sample buffer");
2016-05-29 23:56:08 +00:00
{ size_t const sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize); }
2015-11-19 11:02:28 +00:00
crcOrig = XXH64(sampleBuffer, sampleSize, 0);
2016-04-08 18:26:33 +00:00
/* compression tests */
{ int const cLevelPositive =
( FUZ_rand(&lseed) %
(ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) )
+ 1;
int const cLevel = ((FUZ_rand(&lseed) & 15) == 3) ?
- (int)((FUZ_rand(&lseed) & 7) + 1) : /* test negative cLevel */
cLevelPositive;
DISPLAYLEVEL(5, "fuzzer t%u: Simple compression test (level %i) \n", testNb, cLevel);
2016-04-08 18:26:33 +00:00
cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize));
2016-04-08 18:26:33 +00:00
/* compression failure test : too small dest buffer */
2018-11-06 01:50:30 +00:00
assert(cSize > 3);
{ const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;
2016-04-08 18:26:33 +00:00
const size_t tooSmallSize = cSize - missing;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
const unsigned endMark = 0x4DC2B1A9;
memcpy(dstBuffer+tooSmallSize, &endMark, sizeof(endMark));
2018-11-06 01:50:30 +00:00
DISPLAYLEVEL(5, "fuzzer t%u: compress into too small buffer of size %u (missing %u bytes) \n",
testNb, (unsigned)tooSmallSize, (unsigned)missing);
2016-04-08 18:26:33 +00:00
{ size_t const errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (unsigned)tooSmallSize, (unsigned)cSize); }
{ unsigned endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, sizeof(endCheck));
2018-11-06 01:50:30 +00:00
CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow (check.%08X != %08X.mark)", endCheck, endMark); }
2016-05-30 13:10:09 +00:00
} }
2016-04-08 18:26:33 +00:00
/* frame header decompression test */
{ ZSTD_frameHeader zfh;
CHECK_Z( ZSTD_getFrameHeader(&zfh, cBuffer, cSize) );
CHECK(zfh.frameContentSize != sampleSize, "Frame content size incorrect");
}
2016-07-07 11:14:21 +00:00
/* Decompressed size test */
{ unsigned long long const rSize = ZSTD_findDecompressedSize(cBuffer, cSize);
2016-07-07 11:14:21 +00:00
CHECK(rSize != sampleSize, "decompressed size incorrect");
}
2016-03-19 11:47:52 +00:00
/* successful decompression test */
DISPLAYLEVEL(5, "fuzzer t%u: simple decompression test \n", testNb);
2016-04-08 18:26:33 +00:00
{ size_t const margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
2016-05-29 23:56:08 +00:00
size_t const dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (unsigned)sampleSize, (unsigned)cSize);
2016-04-08 18:26:33 +00:00
{ U64 const crcDest = XXH64(dstBuffer, sampleSize, 0);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (unsigned)findDiff(sampleBuffer, dstBuffer, sampleSize), (unsigned)sampleSize);
2016-04-08 18:26:33 +00:00
} }
2015-11-19 11:02:28 +00:00
free(sampleBuffer); /* no longer useful after this point */
/* truncated src decompression test */
DISPLAYLEVEL(5, "fuzzer t%u: decompression of truncated source \n", testNb);
2016-04-08 18:26:33 +00:00
{ size_t const missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
size_t const tooSmallSize = cSize - missing;
2016-05-30 13:10:09 +00:00
void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch read overflows */
CHECK(cBufferTooSmall == NULL, "not enough memory !");
memcpy(cBufferTooSmall, cBuffer, tooSmallSize);
2016-04-08 18:26:33 +00:00
{ size_t const errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize);
CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)"); }
free(cBufferTooSmall);
}
/* too small dst decompression test */
DISPLAYLEVEL(5, "fuzzer t%u: decompress into too small dst buffer \n", testNb);
2016-03-19 11:12:07 +00:00
if (sampleSize > 3) {
2016-04-08 13:25:32 +00:00
size_t const missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
size_t const tooSmallSize = sampleSize - missing;
static const BYTE token = 0xA9;
dstBuffer[tooSmallSize] = token;
2016-04-08 18:26:33 +00:00
{ size_t const errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (unsigned)errorCode, (unsigned)tooSmallSize); }
CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow");
}
/* noisy src decompression test */
2016-03-19 11:12:07 +00:00
if (cSize > 6) {
/* insert noise into src */
{ U32 const maxNbBits = FUZ_highbit32((U32)(cSize-4));
size_t pos = 4; /* preserve magic number (too easy to detect) */
for (;;) {
/* keep some original src */
{ U32 const nbBits = FUZ_rand(&lseed) % maxNbBits;
size_t const mask = (1<<nbBits) - 1;
size_t const skipLength = FUZ_rand(&lseed) & mask;
pos += skipLength;
}
if (pos >= cSize) break;
2016-03-19 11:12:07 +00:00
/* add noise */
2016-05-29 21:09:51 +00:00
{ U32 const nbBitsCodes = FUZ_rand(&lseed) % maxNbBits;
U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0;
size_t const mask = (1<<nbBits) - 1;
size_t const rNoiseLength = (FUZ_rand(&lseed) & mask) + 1;
size_t const noiseLength = MIN(rNoiseLength, cSize-pos);
size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength);
2016-03-19 11:12:07 +00:00
memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength);
pos += noiseLength;
} } }
/* decompress noisy source */
DISPLAYLEVEL(5, "fuzzer t%u: decompress noisy source \n", testNb);
2016-03-19 11:12:07 +00:00
{ U32 const endMark = 0xA9B1C3D6;
memcpy(dstBuffer+sampleSize, &endMark, 4);
2016-04-08 18:26:33 +00:00
{ size_t const decompressResult = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
/* result *may* be an unlikely success, but even then, it must strictly respect dst buffer boundaries */
CHECK((!ZSTD_isError(decompressResult)) && (decompressResult>sampleSize),
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
"ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (unsigned)decompressResult, (unsigned)sampleSize);
2016-04-08 18:26:33 +00:00
}
{ U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4);
CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
} } } /* noisy src decompression test */
/*===== Bufferless streaming compression test, scattered segments and dictionary =====*/
DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming compression test \n", testNb);
2016-04-08 18:26:33 +00:00
{ U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
int const cLevel = (FUZ_rand(&lseed) %
(ZSTD_maxCLevel() -
(MAX(testLog, dictLog) / cLevelLimiter))) +
1;
2016-04-08 18:26:33 +00:00
maxTestSize = FUZ_rLogLength(&lseed, testLog);
if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */
2016-05-29 23:56:08 +00:00
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
2016-04-08 18:26:33 +00:00
DISPLAYLEVEL(6, "fuzzer t%u: Compressing up to <=%u bytes at level %i with dictionary size %u \n",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
testNb, (unsigned)maxTestSize, cLevel, (unsigned)dictSize);
2016-05-31 16:13:56 +00:00
if (FUZ_rand(&lseed) & 0xF) {
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
2016-05-29 21:09:51 +00:00
} else {
ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
2016-05-31 16:13:56 +00:00
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
2017-04-27 21:19:34 +00:00
ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) );
2016-05-29 21:09:51 +00:00
}
CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) );
}
2016-05-30 13:10:09 +00:00
{ U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2;
U32 n;
XXH64_state_t xxhState;
XXH64_reset(&xxhState, 0);
2016-05-30 13:10:09 +00:00
for (totalTestSize=0, cSize=0, n=0 ; n<nbChunks ; n++) {
size_t const segmentSize = FUZ_randomLength(&lseed, maxSampleLog);
size_t const segmentStart = FUZ_rand(&lseed) % (srcBufferSize - segmentSize);
if (cBufferSize-cSize < ZSTD_compressBound(segmentSize)) break; /* avoid invalid dstBufferTooSmall */
if (totalTestSize+segmentSize > maxTestSize) break;
{ size_t const compressResult = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+segmentStart, segmentSize);
CHECK (ZSTD_isError(compressResult), "multi-segments compression error : %s", ZSTD_getErrorName(compressResult));
cSize += compressResult;
}
XXH64_update(&xxhState, srcBuffer+segmentStart, segmentSize);
memcpy(mirrorBuffer + totalTestSize, srcBuffer+segmentStart, segmentSize);
totalTestSize += segmentSize;
}
{ size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize, NULL, 0);
CHECK (ZSTD_isError(flushResult), "multi-segments epilogue error : %s", ZSTD_getErrorName(flushResult));
cSize += flushResult;
}
crcOrig = XXH64_digest(&xxhState);
2016-04-08 18:26:33 +00:00
}
/* streaming decompression test */
DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming decompression test \n", testNb);
/* ensure memory requirement is good enough (should always be true) */
{ ZSTD_frameHeader zfh;
CHECK( ZSTD_getFrameHeader(&zfh, cBuffer, ZSTD_FRAMEHEADERSIZE_MAX),
"ZSTD_getFrameHeader(): error retrieving frame information");
{ size_t const roundBuffSize = ZSTD_decodingBufferSize_min(zfh.windowSize, zfh.frameContentSize);
CHECK_Z(roundBuffSize);
CHECK((roundBuffSize > totalTestSize) && (zfh.frameContentSize!=ZSTD_CONTENTSIZE_UNKNOWN),
"ZSTD_decodingBufferSize_min() requires more memory (%u) than necessary (%u)",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)roundBuffSize, (unsigned)totalTestSize );
} }
2016-05-29 21:09:51 +00:00
if (dictSize<8) dictSize=0, dict=NULL; /* disable dictionary */
CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, dict, dictSize) );
totalCSize = 0;
totalGenSize = 0;
2016-03-19 11:12:07 +00:00
while (totalCSize < cSize) {
2016-04-08 18:26:33 +00:00
size_t const inSize = ZSTD_nextSrcSizeToDecompress(dctx);
size_t const genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize);
2016-07-12 07:47:31 +00:00
CHECK (ZSTD_isError(genSize), "ZSTD_decompressContinue error : %s", ZSTD_getErrorName(genSize));
totalGenSize += genSize;
totalCSize += inSize;
}
CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded");
2016-05-29 23:56:08 +00:00
CHECK (totalGenSize != totalTestSize, "streaming decompressed data : wrong size")
CHECK (totalCSize != cSize, "compressed data should be fully read")
2016-04-08 18:26:33 +00:00
{ U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
2018-08-14 23:56:07 +00:00
CHECK(crcOrig != crcDest, "streaming decompressed data corrupted (pos %u / %u)",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
(unsigned)findDiff(mirrorBuffer, dstBuffer, totalTestSize), (unsigned)totalTestSize);
2018-08-14 23:56:07 +00:00
}
2016-04-08 18:26:33 +00:00
} /* for ( ; (testNb <= nbTests) */
2015-12-04 16:45:35 +00:00
DISPLAY("\r%u fuzzer tests completed \n", testNb-1);
2015-01-24 00:58:16 +00:00
_cleanup:
2016-01-07 14:35:18 +00:00
ZSTD_freeCCtx(refCtx);
2015-10-29 17:23:38 +00:00
ZSTD_freeCCtx(ctx);
ZSTD_freeDCtx(dctx);
free(cNoiseBuffer[0]);
free(cNoiseBuffer[1]);
free(cNoiseBuffer[2]);
free(cNoiseBuffer[3]);
free(cNoiseBuffer[4]);
2015-01-24 00:58:16 +00:00
free(cBuffer);
free(dstBuffer);
free(mirrorBuffer);
2015-01-24 00:58:16 +00:00
return result;
_output_error:
result = 1;
goto _cleanup;
}
2016-03-19 11:12:07 +00:00
/*_*******************************************************
2015-01-24 00:58:16 +00:00
* Command line
*********************************************************/
static int FUZ_usage(const char* programName)
2015-01-24 00:58:16 +00:00
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY( " -i# : Nb of tests (default:%i) \n", nbTestsDefault);
2015-01-24 00:58:16 +00:00
DISPLAY( " -s# : Select seed (default:prompt user)\n");
DISPLAY( " -t# : Select starting test number (default:0)\n");
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_compressibility_default);
2015-01-24 00:58:16 +00:00
DISPLAY( " -v : verbose\n");
DISPLAY( " -p : pause at the end\n");
2015-01-24 00:58:16 +00:00
DISPLAY( " -h : display help and exit\n");
return 0;
}
/*! readU32FromChar() :
@return : unsigned integer value read from input in `char` format
allows and interprets K, KB, KiB, M, MB and MiB suffix.
Will also modify `*stringPtr`, advancing it to position where it stopped reading.
Note : function result can overflow if digit string > MAX_UINT */
static unsigned readU32FromChar(const char** stringPtr)
{
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9'))
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
if ((**stringPtr=='K') || (**stringPtr=='M')) {
result <<= 10;
if (**stringPtr=='M') result <<= 10;
(*stringPtr)++ ;
if (**stringPtr=='i') (*stringPtr)++;
if (**stringPtr=='B') (*stringPtr)++;
}
return result;
}
2015-01-24 00:58:16 +00:00
/** longCommandWArg() :
* check if *stringPtr is the same as longCommand.
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
static int longCommandWArg(const char** stringPtr, const char* longCommand)
{
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
if (result) *stringPtr += comSize;
return result;
}
2016-03-19 11:12:07 +00:00
int main(int argc, const char** argv)
2015-01-24 00:58:16 +00:00
{
U32 seed = 0;
int seedset = 0;
2015-01-24 00:58:16 +00:00
int argNb;
int nbTests = nbTestsDefault;
int testNb = 0;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
int proba = FUZ_compressibility_default;
int result = 0;
2015-01-24 00:58:16 +00:00
U32 mainPause = 0;
2016-03-19 12:21:08 +00:00
U32 maxDuration = 0;
int bigTests = 1;
2017-07-10 20:48:41 +00:00
U32 memTestsOnly = 0;
const char* const programName = argv[0];
2015-01-24 00:58:16 +00:00
/* Check command line */
2016-03-19 11:12:07 +00:00
for (argNb=1; argNb<argc; argNb++) {
const char* argument = argv[argNb];
2015-01-24 00:58:16 +00:00
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
2016-03-19 11:12:07 +00:00
if (argument[0]=='-') {
if (longCommandWArg(&argument, "--memtest=")) { memTestsOnly = readU32FromChar(&argument); continue; }
2017-07-10 20:48:41 +00:00
if (!strcmp(argument, "--memtest")) { memTestsOnly=1; continue; }
if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; }
2015-01-24 00:58:16 +00:00
argument++;
2016-03-19 11:12:07 +00:00
while (*argument!=0) {
2015-01-24 00:58:16 +00:00
switch(*argument)
{
case 'h':
return FUZ_usage(programName);
2015-01-24 00:58:16 +00:00
case 'v':
argument++;
g_displayLevel++;
2015-01-24 00:58:16 +00:00
break;
2015-01-24 00:58:16 +00:00
case 'q':
argument++;
g_displayLevel--;
break;
2015-01-24 00:58:16 +00:00
case 'p': /* pause at the end */
argument++;
mainPause = 1;
break;
case 'i':
argument++; maxDuration = 0;
nbTests = (int)readU32FromChar(&argument);
2015-01-24 00:58:16 +00:00
break;
2015-12-04 16:25:26 +00:00
case 'T':
argument++;
nbTests = 0;
maxDuration = readU32FromChar(&argument);
if (*argument=='s') argument++; /* seconds */
if (*argument=='m') maxDuration *= 60, argument++; /* minutes */
2015-12-04 16:25:26 +00:00
if (*argument=='n') argument++;
break;
2015-01-24 00:58:16 +00:00
case 's':
argument++;
seedset = 1;
seed = readU32FromChar(&argument);
2015-01-24 00:58:16 +00:00
break;
case 't':
argument++;
testNb = (int)readU32FromChar(&argument);
2015-01-24 00:58:16 +00:00
break;
case 'P': /* compressibility % */
argument++;
proba = (int)readU32FromChar(&argument);
if (proba>100) proba = 100;
2015-01-24 00:58:16 +00:00
break;
default:
return (FUZ_usage(programName), 1);
2016-03-19 11:12:07 +00:00
} } } } /* for (argNb=1; argNb<argc; argNb++) */
2015-01-24 00:58:16 +00:00
/* Get Seed */
DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING);
2015-01-24 00:58:16 +00:00
if (!seedset) {
time_t const t = time(NULL);
U32 const h = XXH32(&t, sizeof(t), 1);
seed = h % 10000;
}
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-22 00:19:44 +00:00
DISPLAY("Seed = %u\n", (unsigned)seed);
if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %i%%\n", proba);
2015-01-24 00:58:16 +00:00
2017-07-10 20:48:41 +00:00
if (memTestsOnly) {
g_displayLevel = MAX(3, g_displayLevel);
return FUZ_mallocTests(seed, ((double)proba) / 100, memTestsOnly);
2017-07-10 20:48:41 +00:00
}
if (nbTests < testNb) nbTests = testNb;
2016-03-19 11:12:07 +00:00
if (testNb==0)
result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */
2015-01-24 00:58:16 +00:00
if (!result)
result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100, bigTests);
2016-03-19 11:12:07 +00:00
if (mainPause) {
int unused;
2015-01-24 00:58:16 +00:00
DISPLAY("Press Enter \n");
unused = getchar();
(void)unused;
2015-01-24 00:58:16 +00:00
}
return result;
}