[regression] Add dictionary support

Dictionaries are prebuilt and saved as part of the data object.
The config decides whether or not to use the dictionary if it is
available. Configs that require dictionaries are only run with
data that have dictionaries. The method will skip configs that are
irrelevant, so for example ZSTD_compress() will skip configs with
dictionaries.

I've also trimmed the silesia source to 1MB per file (12 MB total),
and added 500 samples from the github data set with a dictionary.

I've intentionally added an extra line to the `results.csv` to make
the nightly build fail, so that we can see how CircleCI reports it.

Full list of changes:

* Add pre-built dictionaries to the data.
* Add `use_dictionary` and `no_pledged_src_size` flags to the config.
* Add a config using a dictionary for every level.
* Add a config that specifies no pledged source size.
* Support dictionaries and streaming in the `zstdcli` method.
* Add a context-reuse method using `ZSTD_compressCCtx()`.
* Clean up the formatting of the `results.csv` file to align columns.
* Add `--data`, `--config`, and `--method` flags to constrain each
  to a particular value. This is useful for debugging a failure
  or debugging a particular config/method/data.
This commit is contained in:
Nick Terrell 2018-11-30 17:16:19 -08:00
parent 386c9ab58a
commit e859862341
7 changed files with 657 additions and 226 deletions

View File

@ -19,6 +19,12 @@
.name = "level -" #x, \
.cli_args = "--fast=" #x, \
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
}; \
config_t const level_fast##x##_dict = { \
.name = "level -" #x " with dict", \
.cli_args = "--fast=" #x, \
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
.use_dictionary = 1, \
};
/* Define a config for each level we want to test with. */
@ -30,6 +36,12 @@
.name = "level " #x, \
.cli_args = "-" #x, \
.param_values = PARAM_VALUES(level_##x##_param_values), \
}; \
config_t const level_##x##_dict = { \
.name = "level " #x " with dict", \
.cli_args = "-" #x, \
.param_values = PARAM_VALUES(level_##x##_param_values), \
.use_dictionary = 1, \
};
@ -41,17 +53,31 @@
#undef LEVEL
#undef FAST_LEVEL
static config_t no_pledged_src_size = {
.name = "no source size",
.cli_args = "",
.param_values = {.data = NULL, .size = 0},
.no_pledged_src_size = 1,
};
static config_t const* g_configs[] = {
#define FAST_LEVEL(x) &level_fast##x,
#define LEVEL(x) &level_##x,
#define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict,
#define LEVEL(x) &level_##x, &level_##x##_dict,
#include "levels.h"
#undef LEVEL
#undef FAST_LEVEL
&no_pledged_src_size,
NULL,
};
config_t const* const* configs = g_configs;
int config_skip_data(config_t const* config, data_t const* data) {
return config->use_dictionary && !data_has_dict(data);
}
int config_get_level(config_t const* config) {
param_values_t const params = config->param_values;
size_t i;

View File

@ -16,6 +16,8 @@
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#include "data.h"
typedef struct {
ZSTD_cParameter param;
unsigned value;
@ -41,8 +43,25 @@ typedef struct {
* the parameters will be derived from these.
*/
param_values_t param_values;
/**
* Boolean parameter that says if we should use a dictionary. If the data
* doesn't have a dictionary, this config is skipped. Defaults to no.
*/
int use_dictionary;
/**
* Boolean parameter that says if we should pass the pledged source size
* when the method allows it. Defaults to yes.
*/
int no_pledged_src_size;
} config_t;
/**
* Returns true if the config should skip this data.
* For instance, if the config requires a dictionary but the data doesn't have
* one.
*/
int config_skip_data(config_t const* config, data_t const* data);
#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1)
/**
* Returns the compression level specified by the config, or CONFIG_NO_LEVEL if

View File

@ -32,27 +32,58 @@
"https://github.com/facebook/zstd/releases/download/regression-data/" x
data_t silesia = {
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.name = "silesia",
.type = data_type_dir,
.xxhash64 = 0x67558ee5506918b4LL,
.data =
{
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.xxhash64 = 0x48a199f92f93e977LL,
},
};
data_t silesia_tar = {
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.name = "silesia.tar",
.type = data_type_file,
.xxhash64 = 0x67558ee5506918b4LL,
.data =
{
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.xxhash64 = 0x48a199f92f93e977LL,
},
};
data_t github = {
.name = "github",
.type = data_type_dir,
.data =
{
.url = REGRESSION_RELEASE("github.tar.zst"),
.xxhash64 = 0xa9b1b44b020df292LL,
},
.dict =
{
.url = REGRESSION_RELEASE("github.dict.zst"),
.xxhash64 = 0x1eddc6f737d3cb53LL,
},
};
static data_t* g_data[] = {
&silesia,
&silesia_tar,
&github,
NULL,
};
data_t const* const* data = (data_t const* const*)g_data;
/**
* data helpers.
*/
int data_has_dict(data_t const* data) {
return data->dict.url != NULL;
}
/**
* data buffer helper functions (documented in header).
*/
@ -100,16 +131,24 @@ err:
free(buffer.data);
memset(&buffer, 0, sizeof(buffer));
return buffer;
}
data_buffer_t data_buffer_get(data_t const* data) {
data_buffer_t data_buffer_get_data(data_t const* data) {
data_buffer_t const kEmptyBuffer = {};
if (data->type != data_type_file)
return kEmptyBuffer;
return data_buffer_read(data->path);
return data_buffer_read(data->data.path);
}
data_buffer_t data_buffer_get_dict(data_t const* data) {
data_buffer_t const kEmptyBuffer = {};
if (!data_has_dict(data))
return kEmptyBuffer;
return data_buffer_read(data->dict.path);
}
int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
@ -124,13 +163,69 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
return 0;
assert(buffer1.size > buffer2.size);
return 1;
}
void data_buffer_free(data_buffer_t buffer) {
free(buffer.data);
}
/**
* data filenames helpers.
*/
data_filenames_t data_filenames_get(data_t const* data) {
data_filenames_t filenames = {.buffer = NULL, .size = 0};
char const* path = data->data.path;
filenames.filenames = UTIL_createFileList(
&path,
1,
&filenames.buffer,
&filenames.size,
/* followLinks */ 0);
return filenames;
}
void data_filenames_free(data_filenames_t filenames) {
UTIL_freeFileList(filenames.filenames, filenames.buffer);
}
/**
* data buffers helpers.
*/
data_buffers_t data_buffers_get(data_t const* data) {
data_buffers_t buffers = {.size = 0};
data_filenames_t filenames = data_filenames_get(data);
if (filenames.size == 0)
return buffers;
data_buffer_t* buffersPtr =
(data_buffer_t*)malloc(filenames.size * sizeof(data_buffer_t));
if (buffersPtr == NULL)
return buffers;
buffers.buffers = (data_buffer_t const*)buffersPtr;
buffers.size = filenames.size;
for (size_t i = 0; i < filenames.size; ++i) {
buffersPtr[i] = data_buffer_read(filenames.filenames[i]);
if (buffersPtr[i].data == NULL) {
data_buffers_t const kEmptyBuffer = {};
data_buffers_free(buffers);
return kEmptyBuffer;
}
}
return buffers;
}
/**
* Frees the data buffers.
*/
void data_buffers_free(data_buffers_t buffers) {
free((data_buffer_t*)buffers.buffers);
}
/**
* Initialization and download functions.
*/
@ -174,18 +269,23 @@ out:
static char* cat3(char const* str1, char const* str2, char const* str3) {
size_t const size1 = strlen(str1);
size_t const size2 = strlen(str2);
size_t const size3 = strlen(str3);
size_t const size3 = str3 == NULL ? 0 : strlen(str3);
size_t const size = size1 + size2 + size3 + 1;
char* const dst = (char*)malloc(size);
if (dst == NULL)
return NULL;
strcpy(dst, str1);
strcpy(dst + size1, str2);
strcpy(dst + size1 + size2, str3);
if (str3 != NULL)
strcpy(dst + size1 + size2, str3);
assert(strlen(dst) == size1 + size2 + size3);
return dst;
}
static char* cat2(char const* str1, char const* str2) {
return cat3(str1, str2, NULL);
}
/**
* State needed by the curl callback.
* It takes data from curl, hashes it, and writes it to the file.
@ -197,16 +297,18 @@ typedef struct {
} curl_data_t;
/** Create the curl state. */
static curl_data_t curl_data_create(data_t const* data) {
static curl_data_t curl_data_create(
data_resource_t const* resource,
data_type_t type) {
curl_data_t cdata = {};
XXH64_reset(&cdata.xxhash64, 0);
assert(UTIL_isDirectory(g_data_dir));
if (data->type == data_type_file) {
if (type == data_type_file) {
/* Decompress the resource and store to the path. */
char* cmd = cat3("zstd -dqfo '", data->path, "'");
char* cmd = cat3("zstd -dqfo '", resource->path, "'");
if (cmd == NULL) {
cdata.error = ENOMEM;
return cdata;
@ -243,54 +345,68 @@ static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
return written;
}
/** Download a single data object. */
static int curl_download_datum(CURL* curl, data_t const* data) {
curl_data_t cdata = curl_data_create(data);
int err = EFAULT;
if (cdata.error != 0) {
err = cdata.error;
goto out;
}
static int curl_download_resource(
CURL* curl,
data_resource_t const* resource,
data_type_t type) {
curl_data_t cdata;
/* Download the data. */
if (curl_easy_setopt(curl, CURLOPT_URL, data->url) != 0)
goto out;
if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
return EINVAL;
if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
goto out;
if (curl_easy_perform(curl) != 0) {
fprintf(stderr, "downloading '%s' failed\n", data->url);
goto out;
}
/* check that the file exists. */
if (data->type == data_type_file && !UTIL_isRegularFile(data->path)) {
fprintf(stderr, "output file '%s' does not exist\n", data->path);
goto out;
}
if (data->type == data_type_dir && !UTIL_isDirectory(data->path)) {
fprintf(stderr, "output directory '%s' does not exist\n", data->path);
goto out;
}
/* Check that the hash matches. */
if (XXH64_digest(&cdata.xxhash64) != data->xxhash64) {
return EINVAL;
cdata = curl_data_create(resource, type);
if (cdata.error != 0)
return cdata.error;
int const curl_err = curl_easy_perform(curl);
int const close_err = curl_data_free(cdata);
if (curl_err) {
fprintf(
stderr,
"checksum does not match: %llx != %llx\n",
"downloading '%s' for '%s' failed\n",
resource->url,
resource->path);
return EIO;
}
if (close_err) {
fprintf(stderr, "writing data to '%s' failed\n", resource->path);
return EIO;
}
/* check that the file exists. */
if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
fprintf(stderr, "output file '%s' does not exist\n", resource->path);
return EIO;
}
if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
fprintf(
stderr, "output directory '%s' does not exist\n", resource->path);
return EIO;
}
/* Check that the hash matches. */
if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
fprintf(
stderr,
"checksum does not match: 0x%llxLL != 0x%llxLL\n",
(unsigned long long)XXH64_digest(&cdata.xxhash64),
(unsigned long long)data->xxhash64);
goto out;
(unsigned long long)resource->xxhash64);
return EINVAL;
}
err = 0;
out:
if (err != 0)
fprintf(stderr, "downloading '%s' failed\n", data->name);
int const close_err = curl_data_free(cdata);
if (close_err != 0 && err == 0) {
fprintf(stderr, "failed to write data for '%s'\n", data->name);
err = close_err;
return 0;
}
/** Download a single data object. */
static int curl_download_datum(CURL* curl, data_t const* data) {
int ret;
ret = curl_download_resource(curl, &data->data, data->type);
if (ret != 0)
return ret;
if (data_has_dict(data)) {
ret = curl_download_resource(curl, &data->dict, data_type_file);
if (ret != 0)
return ret;
}
return err;
return ret;
}
/** Download all the data. */
@ -331,9 +447,14 @@ static int data_create_paths(data_t* const* data, char const* dir) {
assert(data != NULL);
for (; *data != NULL; ++data) {
data_t* const datum = *data;
datum->path = cat3(dir, "/", datum->name);
if (datum->path == NULL)
datum->data.path = cat3(dir, "/", datum->name);
if (datum->data.path == NULL)
return ENOMEM;
if (data_has_dict(datum)) {
datum->dict.path = cat2(datum->data.path, ".dict");
if (datum->dict.path == NULL)
return ENOMEM;
}
}
return 0;
}
@ -343,8 +464,10 @@ static void data_free_paths(data_t* const* data) {
assert(data != NULL);
for (; *data != NULL; ++data) {
data_t* datum = *data;
free((void*)datum->path);
datum->path = NULL;
free((void*)datum->data.path);
free((void*)datum->dict.path);
datum->data.path = NULL;
datum->dict.path = NULL;
}
}
@ -367,7 +490,8 @@ static uint64_t stamp_hash(data_t const* const* data) {
/* We don't care about the URL that we fetch from. */
/* The path is derived from the name. */
XXH64_update(&state, datum->name, strlen(datum->name));
xxh_update_le(&state, datum->xxhash64);
xxh_update_le(&state, datum->data.xxhash64);
xxh_update_le(&state, datum->dict.xxhash64);
xxh_update_le(&state, datum->type);
}
return XXH64_digest(&state);

View File

@ -22,10 +22,14 @@ typedef enum {
typedef struct {
char const* url; /**< Where to get this resource. */
uint64_t xxhash64; /**< Hash of the url contents. */
char const* name; /**< The logical name of the resource (no extension). */
data_type_t type; /**< The type of this resource. */
char const* path; /**< The path of the unpacked resource (derived). */
size_t size;
} data_resource_t;
typedef struct {
data_resource_t data;
data_resource_t dict;
data_type_t type; /**< The type of the data. */
char const* name; /**< The logical name of the data (no extension). */
} data_t;
/**
@ -33,6 +37,9 @@ typedef struct {
*/
extern data_t const* const* data;
int data_has_dict(data_t const* data);
/**
* Initializes the data module and downloads the data necessary.
* Caches the downloads in dir. We add a stamp file in the directory after
@ -62,7 +69,14 @@ typedef struct {
*
* @returns The buffer, which is NULL on failure.
*/
data_buffer_t data_buffer_get(data_t const* data);
data_buffer_t data_buffer_get_data(data_t const* data);
/**
* Read the dictionary that the data points to into a buffer.
*
* @returns The buffer, which is NULL on failure.
*/
data_buffer_t data_buffer_get_dict(data_t const* data);
/**
* Read the contents of filename into a buffer.
@ -88,5 +102,39 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
*/
void data_buffer_free(data_buffer_t buffer);
typedef struct {
char* buffer;
char const** filenames;
unsigned size;
} data_filenames_t;
/**
* Get a recursive list of filenames in the data object. If it is a file, it
* will only contain one entry. If it is a directory, it will recursively walk
* the directory.
*
* @returns The list of filenames, which has size 0 and NULL pointers on error.
*/
data_filenames_t data_filenames_get(data_t const* data);
/**
* Frees the filenames table.
*/
void data_filenames_free(data_filenames_t filenames);
typedef struct {
data_buffer_t const* buffers;
size_t size;
} data_buffers_t;
/**
* @returns a list of buffers for every file in data. It is zero sized on error.
*/
data_buffers_t data_buffers_get(data_t const* data);
/**
* Frees the data buffers.
*/
void data_buffers_free(data_buffers_t buffers);
#endif

View File

@ -26,62 +26,88 @@ void method_set_zstdcli(char const* zstdcli) {
* the given name, member.
*
* method_state_t* base = ...;
* simple_state_t* state = container_of(base, simple_state_t, base);
* buffer_state_t* state = container_of(base, buffer_state_t, base);
*/
#define container_of(ptr, type, member) \
((type*)(char*)(ptr)-offsetof(type, member))
((type*)(ptr == NULL ? NULL : (char*)(ptr)-offsetof(type, member)))
/** State to reuse the same buffers between compression calls. */
typedef struct {
method_state_t base;
data_buffer_t buffer; /**< The constant input data buffer. */
data_buffer_t compressed; /**< The compressed data buffer. */
data_buffer_t decompressed; /**< The decompressed data buffer. */
} simple_state_t;
data_buffers_t inputs; /**< The input buffer for each file. */
data_buffer_t compressed; /**< The compressed data buffer. */
data_buffer_t decompressed; /**< The decompressed data buffer. */
} buffer_state_t;
static method_state_t* simple_create(data_t const* data) {
simple_state_t* state = (simple_state_t*)calloc(1, sizeof(simple_state_t));
static size_t buffers_max_size(data_buffers_t buffers) {
size_t max = 0;
for (size_t i = 0; i < buffers.size; ++i) {
if (buffers.buffers[i].size > max)
max = buffers.buffers[i].size;
}
return max;
}
static method_state_t* buffer_state_create(data_t const* data) {
buffer_state_t* state = (buffer_state_t*)calloc(1, sizeof(buffer_state_t));
if (state == NULL)
return NULL;
state->base.data = data;
state->buffer = data_buffer_get(data);
state->compressed =
data_buffer_create(ZSTD_compressBound(state->buffer.size));
state->decompressed = data_buffer_create(state->buffer.size);
state->inputs = data_buffers_get(data);
size_t const max_size = buffers_max_size(state->inputs);
state->compressed = data_buffer_create(ZSTD_compressBound(max_size));
state->decompressed = data_buffer_create(max_size);
return &state->base;
}
static void simple_destroy(method_state_t* base) {
static void buffer_state_destroy(method_state_t* base) {
if (base == NULL)
return;
simple_state_t* state = container_of(base, simple_state_t, base);
buffer_state_t* state = container_of(base, buffer_state_t, base);
free(state);
}
static result_t simple_compress(method_state_t* base, config_t const* config) {
if (base == NULL)
return result_error(result_error_system_error);
simple_state_t* state = container_of(base, simple_state_t, base);
static int buffer_state_bad(buffer_state_t const* state) {
if (state == NULL) {
fprintf(stderr, "buffer_state_t is NULL\n");
return 1;
}
if (state->inputs.size == 0 || state->compressed.data == NULL ||
state->decompressed.data == NULL) {
fprintf(stderr, "buffer state allocation failure\n");
return 1;
}
return 0;
}
static result_t simple_compress(method_state_t* base, config_t const* config) {
buffer_state_t* state = container_of(base, buffer_state_t, base);
if (buffer_state_bad(state))
return result_error(result_error_system_error);
/* Keep the tests short by skipping directories, since behavior shouldn't
* change.
*/
if (base->data->type != data_type_file)
return result_error(result_error_skip);
if (state->buffer.data == NULL || state->compressed.data == NULL ||
state->decompressed.data == NULL) {
return result_error(result_error_system_error);
}
if (config->use_dictionary || config->no_pledged_src_size)
return result_error(result_error_skip);
/* If the config doesn't specify a level, skip. */
int const level = config_get_level(config);
if (level == CONFIG_NO_LEVEL)
return result_error(result_error_skip);
data_buffer_t const input = state->inputs.buffers[0];
/* Compress, decompress, and check the result. */
state->compressed.size = ZSTD_compress(
state->compressed.data,
state->compressed.capacity,
state->buffer.data,
state->buffer.size,
input.data,
input.size,
level);
if (ZSTD_isError(state->compressed.size))
return result_error(result_error_compression_error);
@ -93,7 +119,7 @@ static result_t simple_compress(method_state_t* base, config_t const* config) {
state->compressed.size);
if (ZSTD_isError(state->decompressed.size))
return result_error(result_error_decompression_error);
if (data_buffer_compare(state->buffer, state->decompressed))
if (data_buffer_compare(input, state->decompressed))
return result_error(result_error_round_trip_error);
result_data_t data;
@ -101,6 +127,70 @@ static result_t simple_compress(method_state_t* base, config_t const* config) {
return result_data(data);
}
static result_t compress_cctx_compress(
method_state_t* base,
config_t const* config) {
buffer_state_t* state = container_of(base, buffer_state_t, base);
if (buffer_state_bad(state))
return result_error(result_error_system_error);
if (config->use_dictionary || config->no_pledged_src_size)
return result_error(result_error_skip);
if (base->data->type != data_type_dir)
return result_error(result_error_skip);
int const level = config_get_level(config);
if (level == CONFIG_NO_LEVEL)
return result_error(result_error_skip);
ZSTD_CCtx* cctx = ZSTD_createCCtx();
if (cctx == NULL) {
fprintf(stderr, "ZSTD_createCCtx() failed\n");
return result_error(result_error_system_error);
}
result_t result;
result_data_t data = {.total_size = 0};
for (size_t i = 0; i < state->inputs.size; ++i) {
data_buffer_t const input = state->inputs.buffers[i];
state->compressed.size = ZSTD_compressCCtx(
cctx,
state->compressed.data,
state->compressed.capacity,
input.data,
input.size,
level);
if (ZSTD_isError(state->compressed.size)) {
result = result_error(result_error_compression_error);
goto out;
}
state->decompressed.size = ZSTD_decompress(
state->decompressed.data,
state->decompressed.capacity,
state->compressed.data,
state->compressed.size);
if (ZSTD_isError(state->decompressed.size)) {
result = result_error(result_error_decompression_error);
goto out;
}
if (data_buffer_compare(input, state->decompressed)) {
result = result_error(result_error_round_trip_error);
goto out;
}
data.total_size += state->compressed.size;
}
result = result_data(data);
out:
ZSTD_freeCCtx(cctx);
return result;
}
/** Generic state creation function. */
static method_state_t* method_state_create(data_t const* data) {
method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t));
@ -114,26 +204,32 @@ static void method_state_destroy(method_state_t* state) {
free(state);
}
#define MAX_OUT 32
static result_t cli_file_compress(
static result_t cli_compress(
method_state_t* state,
config_t const* config) {
if (config->cli_args == NULL)
return result_error(result_error_skip);
/* We don't support no pledged source size with directories. Too slow. */
if (state->data->type == data_type_dir && config->no_pledged_src_size)
return result_error(result_error_skip);
if (g_zstdcli == NULL)
return result_error(result_error_system_error);
/* '<zstd>' -r <args> '<file/dir>' | wc -c */
/* '<zstd>' -cqr <args> [-D '<dict>'] '<file/dir>' */
char cmd[1024];
size_t const cmd_size = snprintf(
cmd,
sizeof(cmd),
"'%s' -cqr %s '%s' | wc -c",
"'%s' -cqr %s %s%s%s %s '%s'",
g_zstdcli,
config->cli_args,
state->data->path);
config->use_dictionary ? "-D '" : "",
config->use_dictionary ? state->data->dict.path : "",
config->use_dictionary ? "'" : "",
config->no_pledged_src_size ? "<" : "",
state->data->data.path);
if (cmd_size >= sizeof(cmd)) {
fprintf(stderr, "command too large: %s\n", cmd);
return result_error(result_error_system_error);
@ -144,42 +240,48 @@ static result_t cli_file_compress(
return result_error(result_error_system_error);
}
/* Read the total compressed size. */
char out[MAX_OUT + 1];
size_t const out_size = fread(out, 1, MAX_OUT, zstd);
out[out_size] = '\0';
int const zstd_ret = pclose(zstd);
if (zstd_ret != 0) {
char out[4096];
size_t total_size = 0;
while (1) {
size_t const size = fread(out, 1, sizeof(out), zstd);
total_size += size;
if (size != sizeof(out))
break;
}
if (ferror(zstd) || pclose(zstd) != 0) {
fprintf(stderr, "zstd failed with command: %s\n", cmd);
return result_error(result_error_compression_error);
}
if (out_size == MAX_OUT) {
fprintf(stderr, "wc -c produced more bytes than expected: %s\n", out);
return result_error(result_error_system_error);
}
result_data_t data;
data.total_size = atoll(out);
result_data_t const data = {.total_size = total_size};
return result_data(data);
}
method_t const simple = {
.name = "simple",
.create = simple_create,
.name = "ZSTD_compress",
.create = buffer_state_create,
.compress = simple_compress,
.destroy = simple_destroy,
.destroy = buffer_state_destroy,
};
method_t const cli_file = {
.name = "cli file",
method_t const compress_cctx = {
.name = "ZSTD_compressCCtx",
.create = buffer_state_create,
.compress = compress_cctx_compress,
.destroy = buffer_state_destroy,
};
method_t const cli = {
.name = "zstdcli",
.create = method_state_create,
.compress = cli_file_compress,
.compress = cli_compress,
.destroy = method_state_destroy,
};
static method_t const* g_methods[] = {
&simple,
&cli_file,
&compress_cctx,
&cli,
NULL,
};

View File

@ -1,43 +1,101 @@
Data, Config, Method, Total compressed size
silesia.tar, level -5, simple, 106176430
silesia.tar, level -3, simple, 98476550
silesia.tar, level -1, simple, 87206767
silesia.tar, level 0, simple, 66996953
silesia.tar, level 1, simple, 73658303
silesia.tar, level 3, simple, 66996953
silesia.tar, level 4, simple, 65996020
silesia.tar, level 5, simple, 64421326
silesia.tar, level 6, simple, 62388673
silesia.tar, level 7, simple, 61159525
silesia.tar, level 9, simple, 60214921
silesia.tar, level 13, simple, 58428642
silesia.tar, level 16, simple, 56363759
silesia.tar, level 19, simple, 53274173
silesia, level -5, cli file, 106202112
silesia, level -3, cli file, 98518660
silesia, level -1, cli file, 87226203
silesia, level 0, cli file, 67049190
silesia, level 1, cli file, 73676282
silesia, level 3, cli file, 67049190
silesia, level 4, cli file, 66090040
silesia, level 5, cli file, 64503721
silesia, level 6, cli file, 62446177
silesia, level 7, cli file, 61217029
silesia, level 9, cli file, 60282841
silesia, level 13, cli file, 58480658
silesia, level 16, cli file, 56414170
silesia, level 19, cli file, 53365292
silesia.tar, level -5, cli file, 106250113
silesia.tar, level -3, cli file, 98550747
silesia.tar, level -1, cli file, 87227322
silesia.tar, level 0, cli file, 67111168
silesia.tar, level 1, cli file, 73694374
silesia.tar, level 3, cli file, 67111168
silesia.tar, level 4, cli file, 66154079
silesia.tar, level 5, cli file, 64546998
silesia.tar, level 6, cli file, 62458454
silesia.tar, level 7, cli file, 61231085
silesia.tar, level 9, cli file, 60310313
silesia.tar, level 13, cli file, 58517476
silesia.tar, level 16, cli file, 56448694
silesia.tar, level 19, cli file, 53444920
Data, Config, Method, Total compressed size
This line is intentionally added to see how the nightly job reports failures
silesia.tar, level -5, ZSTD_compress, 7160438
silesia.tar, level -3, ZSTD_compress, 6789024
silesia.tar, level -1, ZSTD_compress, 6195462
silesia.tar, level 0, ZSTD_compress, 4875071
silesia.tar, level 1, ZSTD_compress, 5339697
silesia.tar, level 3, ZSTD_compress, 4875071
silesia.tar, level 4, ZSTD_compress, 4813104
silesia.tar, level 5, ZSTD_compress, 4726961
silesia.tar, level 6, ZSTD_compress, 4654401
silesia.tar, level 7, ZSTD_compress, 4591933
silesia.tar, level 9, ZSTD_compress, 4554098
silesia.tar, level 13, ZSTD_compress, 4503496
silesia.tar, level 16, ZSTD_compress, 4387233
silesia.tar, level 19, ZSTD_compress, 4283123
silesia, level -5, ZSTD_compressCCtx, 7152294
silesia, level -3, ZSTD_compressCCtx, 6789969
silesia, level -1, ZSTD_compressCCtx, 6191548
silesia, level 0, ZSTD_compressCCtx, 4862377
silesia, level 1, ZSTD_compressCCtx, 5318036
silesia, level 3, ZSTD_compressCCtx, 4862377
silesia, level 4, ZSTD_compressCCtx, 4800629
silesia, level 5, ZSTD_compressCCtx, 4715005
silesia, level 6, ZSTD_compressCCtx, 4644055
silesia, level 7, ZSTD_compressCCtx, 4581559
silesia, level 9, ZSTD_compressCCtx, 4543862
silesia, level 13, ZSTD_compressCCtx, 4493931
silesia, level 16, ZSTD_compressCCtx, 4381885
silesia, level 19, ZSTD_compressCCtx, 4296899
github, level -5, ZSTD_compressCCtx, 232744
github, level -3, ZSTD_compressCCtx, 220611
github, level -1, ZSTD_compressCCtx, 176575
github, level 0, ZSTD_compressCCtx, 136397
github, level 1, ZSTD_compressCCtx, 143457
github, level 3, ZSTD_compressCCtx, 136397
github, level 4, ZSTD_compressCCtx, 136144
github, level 5, ZSTD_compressCCtx, 135106
github, level 6, ZSTD_compressCCtx, 135108
github, level 7, ZSTD_compressCCtx, 135108
github, level 9, ZSTD_compressCCtx, 135108
github, level 13, ZSTD_compressCCtx, 133741
github, level 16, ZSTD_compressCCtx, 133741
github, level 19, ZSTD_compressCCtx, 133717
silesia, level -5, zstdcli, 7152342
silesia, level -3, zstdcli, 6790021
silesia, level -1, zstdcli, 6191597
silesia, level 0, zstdcli, 4862425
silesia, level 1, zstdcli, 5318084
silesia, level 3, zstdcli, 4862425
silesia, level 4, zstdcli, 4800677
silesia, level 5, zstdcli, 4715053
silesia, level 6, zstdcli, 4644103
silesia, level 7, zstdcli, 4581607
silesia, level 9, zstdcli, 4543910
silesia, level 13, zstdcli, 4493979
silesia, level 16, zstdcli, 4381933
silesia, level 19, zstdcli, 4296947
silesia.tar, level -5, zstdcli, 7159586
silesia.tar, level -3, zstdcli, 6791018
silesia.tar, level -1, zstdcli, 6196283
silesia.tar, level 0, zstdcli, 4876730
silesia.tar, level 1, zstdcli, 5340312
silesia.tar, level 3, zstdcli, 4876730
silesia.tar, level 4, zstdcli, 4817723
silesia.tar, level 5, zstdcli, 4730389
silesia.tar, level 6, zstdcli, 4655708
silesia.tar, level 7, zstdcli, 4593407
silesia.tar, level 9, zstdcli, 4556135
silesia.tar, level 13, zstdcli, 4503500
silesia.tar, level 16, zstdcli, 4387237
silesia.tar, level 19, zstdcli, 4283127
silesia.tar, no source size, zstdcli, 4876726
github, level -5, zstdcli, 234744
github, level -5 with dict, zstdcli, 47528
github, level -3, zstdcli, 222611
github, level -3 with dict, zstdcli, 46394
github, level -1, zstdcli, 178575
github, level -1 with dict, zstdcli, 43401
github, level 0, zstdcli, 138397
github, level 0 with dict, zstdcli, 40316
github, level 1, zstdcli, 145457
github, level 1 with dict, zstdcli, 43242
github, level 3, zstdcli, 138397
github, level 3 with dict, zstdcli, 40316
github, level 4, zstdcli, 138144
github, level 4 with dict, zstdcli, 40292
github, level 5, zstdcli, 137106
github, level 5 with dict, zstdcli, 40938
github, level 6, zstdcli, 137108
github, level 6 with dict, zstdcli, 40632
github, level 7, zstdcli, 137108
github, level 7 with dict, zstdcli, 40766
github, level 9, zstdcli, 137108
github, level 9 with dict, zstdcli, 41326
github, level 13, zstdcli, 135741
github, level 13 with dict, zstdcli, 41670
github, level 16, zstdcli, 135741
github, level 16 with dict, zstdcli, 39940
github, level 19, zstdcli, 135717
github, level 19 with dict, zstdcli, 39576

1 Data Data, Config, Method, Total compressed size Config Method Total compressed size
2 silesia.tar This line is intentionally added to see how the nightly job reports failures level -5 simple 106176430
3 silesia.tar silesia.tar, level -5, ZSTD_compress, 7160438 level -3 simple 98476550
4 silesia.tar silesia.tar, level -3, ZSTD_compress, 6789024 level -1 simple 87206767
5 silesia.tar silesia.tar, level -1, ZSTD_compress, 6195462 level 0 simple 66996953
6 silesia.tar silesia.tar, level 0, ZSTD_compress, 4875071 level 1 simple 73658303
7 silesia.tar silesia.tar, level 1, ZSTD_compress, 5339697 level 3 simple 66996953
8 silesia.tar silesia.tar, level 3, ZSTD_compress, 4875071 level 4 simple 65996020
9 silesia.tar silesia.tar, level 4, ZSTD_compress, 4813104 level 5 simple 64421326
10 silesia.tar silesia.tar, level 5, ZSTD_compress, 4726961 level 6 simple 62388673
11 silesia.tar silesia.tar, level 6, ZSTD_compress, 4654401 level 7 simple 61159525
12 silesia.tar silesia.tar, level 7, ZSTD_compress, 4591933 level 9 simple 60214921
13 silesia.tar silesia.tar, level 9, ZSTD_compress, 4554098 level 13 simple 58428642
14 silesia.tar silesia.tar, level 13, ZSTD_compress, 4503496 level 16 simple 56363759
15 silesia.tar silesia.tar, level 16, ZSTD_compress, 4387233 level 19 simple 53274173
16 silesia silesia.tar, level 19, ZSTD_compress, 4283123 level -5 cli file 106202112
17 silesia silesia, level -5, ZSTD_compressCCtx, 7152294 level -3 cli file 98518660
18 silesia silesia, level -3, ZSTD_compressCCtx, 6789969 level -1 cli file 87226203
19 silesia silesia, level -1, ZSTD_compressCCtx, 6191548 level 0 cli file 67049190
20 silesia silesia, level 0, ZSTD_compressCCtx, 4862377 level 1 cli file 73676282
21 silesia silesia, level 1, ZSTD_compressCCtx, 5318036 level 3 cli file 67049190
22 silesia silesia, level 3, ZSTD_compressCCtx, 4862377 level 4 cli file 66090040
23 silesia silesia, level 4, ZSTD_compressCCtx, 4800629 level 5 cli file 64503721
24 silesia silesia, level 5, ZSTD_compressCCtx, 4715005 level 6 cli file 62446177
25 silesia silesia, level 6, ZSTD_compressCCtx, 4644055 level 7 cli file 61217029
26 silesia silesia, level 7, ZSTD_compressCCtx, 4581559 level 9 cli file 60282841
27 silesia silesia, level 9, ZSTD_compressCCtx, 4543862 level 13 cli file 58480658
28 silesia silesia, level 13, ZSTD_compressCCtx, 4493931 level 16 cli file 56414170
29 silesia silesia, level 16, ZSTD_compressCCtx, 4381885 level 19 cli file 53365292
30 silesia.tar silesia, level 19, ZSTD_compressCCtx, 4296899 level -5 cli file 106250113
31 silesia.tar github, level -5, ZSTD_compressCCtx, 232744 level -3 cli file 98550747
32 silesia.tar github, level -3, ZSTD_compressCCtx, 220611 level -1 cli file 87227322
33 silesia.tar github, level -1, ZSTD_compressCCtx, 176575 level 0 cli file 67111168
34 silesia.tar github, level 0, ZSTD_compressCCtx, 136397 level 1 cli file 73694374
35 silesia.tar github, level 1, ZSTD_compressCCtx, 143457 level 3 cli file 67111168
36 silesia.tar github, level 3, ZSTD_compressCCtx, 136397 level 4 cli file 66154079
37 silesia.tar github, level 4, ZSTD_compressCCtx, 136144 level 5 cli file 64546998
38 silesia.tar github, level 5, ZSTD_compressCCtx, 135106 level 6 cli file 62458454
39 silesia.tar github, level 6, ZSTD_compressCCtx, 135108 level 7 cli file 61231085
40 silesia.tar github, level 7, ZSTD_compressCCtx, 135108 level 9 cli file 60310313
41 silesia.tar github, level 9, ZSTD_compressCCtx, 135108 level 13 cli file 58517476
42 silesia.tar github, level 13, ZSTD_compressCCtx, 133741 level 16 cli file 56448694
43 silesia.tar github, level 16, ZSTD_compressCCtx, 133741 level 19 cli file 53444920
44 github, level 19, ZSTD_compressCCtx, 133717
45 silesia, level -5, zstdcli, 7152342
46 silesia, level -3, zstdcli, 6790021
47 silesia, level -1, zstdcli, 6191597
48 silesia, level 0, zstdcli, 4862425
49 silesia, level 1, zstdcli, 5318084
50 silesia, level 3, zstdcli, 4862425
51 silesia, level 4, zstdcli, 4800677
52 silesia, level 5, zstdcli, 4715053
53 silesia, level 6, zstdcli, 4644103
54 silesia, level 7, zstdcli, 4581607
55 silesia, level 9, zstdcli, 4543910
56 silesia, level 13, zstdcli, 4493979
57 silesia, level 16, zstdcli, 4381933
58 silesia, level 19, zstdcli, 4296947
59 silesia.tar, level -5, zstdcli, 7159586
60 silesia.tar, level -3, zstdcli, 6791018
61 silesia.tar, level -1, zstdcli, 6196283
62 silesia.tar, level 0, zstdcli, 4876730
63 silesia.tar, level 1, zstdcli, 5340312
64 silesia.tar, level 3, zstdcli, 4876730
65 silesia.tar, level 4, zstdcli, 4817723
66 silesia.tar, level 5, zstdcli, 4730389
67 silesia.tar, level 6, zstdcli, 4655708
68 silesia.tar, level 7, zstdcli, 4593407
69 silesia.tar, level 9, zstdcli, 4556135
70 silesia.tar, level 13, zstdcli, 4503500
71 silesia.tar, level 16, zstdcli, 4387237
72 silesia.tar, level 19, zstdcli, 4283127
73 silesia.tar, no source size, zstdcli, 4876726
74 github, level -5, zstdcli, 234744
75 github, level -5 with dict, zstdcli, 47528
76 github, level -3, zstdcli, 222611
77 github, level -3 with dict, zstdcli, 46394
78 github, level -1, zstdcli, 178575
79 github, level -1 with dict, zstdcli, 43401
80 github, level 0, zstdcli, 138397
81 github, level 0 with dict, zstdcli, 40316
82 github, level 1, zstdcli, 145457
83 github, level 1 with dict, zstdcli, 43242
84 github, level 3, zstdcli, 138397
85 github, level 3 with dict, zstdcli, 40316
86 github, level 4, zstdcli, 138144
87 github, level 4 with dict, zstdcli, 40292
88 github, level 5, zstdcli, 137106
89 github, level 5 with dict, zstdcli, 40938
90 github, level 6, zstdcli, 137108
91 github, level 6 with dict, zstdcli, 40632
92 github, level 7, zstdcli, 137108
93 github, level 7 with dict, zstdcli, 40766
94 github, level 9, zstdcli, 137108
95 github, level 9 with dict, zstdcli, 41326
96 github, level 13, zstdcli, 135741
97 github, level 13 with dict, zstdcli, 41670
98 github, level 16, zstdcli, 135741
99 github, level 16 with dict, zstdcli, 39940
100 github, level 19, zstdcli, 135717
101 github, level 19 with dict, zstdcli, 39576

View File

@ -17,10 +17,15 @@
#include "data.h"
#include "method.h"
/** Check if a name contains a comma. */
static int g_max_name_len = 0;
/** Check if a name contains a comma or is too long. */
static int is_name_bad(char const* name) {
if (name == NULL)
return 1;
int const len = strlen(name);
if (len > g_max_name_len)
g_max_name_len = len;
for (; *name != '\0'; ++name)
if (*name == ',')
return 1;
@ -47,57 +52,6 @@ static int are_names_bad() {
return 0;
}
/** Helper macro to print to stderr and a file. */
#define tprintf(file, ...) \
do { \
fprintf(file, __VA_ARGS__); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
/** Helper macro to flush stderr and a file. */
#define tflush(file) \
do { \
fflush(file); \
fflush(stderr); \
} while (0)
/**
* Run all the regression tests and record the results table to results and
* stderr progressively.
*/
static int run_all(FILE* results) {
tprintf(results, "Data,\tConfig,\tMethod,\tTotal compressed size\n");
for (size_t method = 0; methods[method] != NULL; ++method) {
for (size_t datum = 0; data[datum] != NULL; ++datum) {
/* Create the state common to all configs */
method_state_t* state = methods[method]->create(data[datum]);
for (size_t config = 0; configs[config] != NULL; ++config) {
/* Print the result for the (method, data, config) tuple. */
result_t const result =
methods[method]->compress(state, configs[config]);
if (result_is_skip(result))
continue;
tprintf(
results,
"%s,\t%s,\t%s,\t",
data[datum]->name,
configs[config]->name,
methods[method]->name);
if (result_is_error(result)) {
tprintf(results, "%s\n", result_get_error_string(result));
} else {
tprintf(
results,
"%llu\n",
(unsigned long long)result_get_data(result).total_size);
}
tflush(results);
}
methods[method]->destroy(state);
}
}
return 0;
}
/**
* Option parsing using getopt.
* When you add a new option update: long_options, long_extras, and
@ -109,6 +63,9 @@ static char const* g_output = NULL;
static char const* g_diff = NULL;
static char const* g_cache = NULL;
static char const* g_zstdcli = NULL;
static char const* g_config = NULL;
static char const* g_data = NULL;
static char const* g_method = NULL;
typedef enum {
required_option,
@ -120,19 +77,22 @@ typedef enum {
* Extra state that we need to keep per-option that we can't store in getopt.
*/
struct option_extra {
int id; /**< The short option name, used as an id. */
char const* help; /**< The help message. */
int id; /**< The short option name, used as an id. */
char const* help; /**< The help message. */
option_type opt_type; /**< The option type: required, optional, or help. */
char const** value; /**< The value to set or NULL if no_argument. */
char const** value; /**< The value to set or NULL if no_argument. */
};
/** The options. */
static struct option long_options[] = {
{"cache", required_argument, NULL, 'c'},
{"diff", required_argument, NULL, 'd'},
{"help", no_argument, NULL, 'h'},
{"output", required_argument, NULL, 'o'},
{"zstd", required_argument, NULL, 'z'},
{"config", required_argument, NULL, 128},
{"data", required_argument, NULL, 129},
{"method", required_argument, NULL, 130},
{"diff", required_argument, NULL, 'd'},
{"help", no_argument, NULL, 'h'},
};
static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
@ -140,10 +100,13 @@ static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
/** The extra info for the options. Must be in the same order as the options. */
static struct option_extra long_extras[] = {
{'c', "the cache directory", required_option, &g_cache},
{'d', "compare the results to this file", optional_option, &g_diff},
{'h', "display this message", help_option, NULL},
{'o', "write the results here", required_option, &g_output},
{'z', "zstd cli tool", required_option, &g_zstdcli},
{128, "use this config", optional_option, &g_config},
{129, "use this data", optional_option, &g_data},
{130, "use this method", optional_option, &g_method},
{'d', "compare the results to this file", optional_option, &g_diff},
{'h', "display this message", help_option, NULL},
};
/** The short options. Must correspond to the options. */
@ -169,14 +132,24 @@ static void print_help(void) {
fprintf(stderr, "regression test runner\n");
size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
for (size_t i = 0; i < nargs; ++i) {
/* Short / long - help [option type] */
fprintf(
stderr,
"-%c / --%s \t- %s %s\n",
long_options[i].val,
long_options[i].name,
long_extras[i].help,
required_message(long_extras[i].opt_type));
if (long_options[i].val < 128) {
/* Long / short - help [option type] */
fprintf(
stderr,
"--%s / -%c \t- %s %s\n",
long_options[i].name,
long_options[i].val,
long_extras[i].help,
required_message(long_extras[i].opt_type));
} else {
/* Short / long - help [option type] */
fprintf(
stderr,
"--%s \t- %s %s\n",
long_options[i].name,
long_extras[i].help,
required_message(long_extras[i].opt_type));
}
}
}
@ -220,8 +193,7 @@ static int parse_args(int argc, char** argv) {
continue;
fprintf(
stderr,
"-%c / --%s is a required argument but is not set\n",
long_options[i].val,
"--%s is a required argument but is not set\n",
long_options[i].name);
bad = 1;
}
@ -234,6 +206,88 @@ static int parse_args(int argc, char** argv) {
return 0;
}
/** Helper macro to print to stderr and a file. */
#define tprintf(file, ...) \
do { \
fprintf(file, __VA_ARGS__); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
/** Helper macro to flush stderr and a file. */
#define tflush(file) \
do { \
fflush(file); \
fflush(stderr); \
} while (0)
void tprint_names(
FILE* results,
char const* data_name,
char const* config_name,
char const* method_name) {
int const data_padding = g_max_name_len - strlen(data_name);
int const config_padding = g_max_name_len - strlen(config_name);
int const method_padding = g_max_name_len - strlen(method_name);
tprintf(
results,
"%s, %*s%s, %*s%s, %*s",
data_name,
data_padding,
"",
config_name,
config_padding,
"",
method_name,
method_padding,
"");
}
/**
* Run all the regression tests and record the results table to results and
* stderr progressively.
*/
static int run_all(FILE* results) {
tprint_names(results, "Data", "Config", "Method");
tprintf(results, "Total compressed size\n");
for (size_t method = 0; methods[method] != NULL; ++method) {
if (g_method != NULL && strcmp(methods[method]->name, g_method))
continue;
for (size_t datum = 0; data[datum] != NULL; ++datum) {
if (g_data != NULL && strcmp(data[datum]->name, g_data))
continue;
/* Create the state common to all configs */
method_state_t* state = methods[method]->create(data[datum]);
for (size_t config = 0; configs[config] != NULL; ++config) {
if (g_config != NULL && strcmp(configs[config]->name, g_config))
continue;
if (config_skip_data(configs[config], data[datum]))
continue;
/* Print the result for the (method, data, config) tuple. */
result_t const result =
methods[method]->compress(state, configs[config]);
if (result_is_skip(result))
continue;
tprint_names(
results,
data[datum]->name,
configs[config]->name,
methods[method]->name);
if (result_is_error(result)) {
tprintf(results, "%s\n", result_get_error_string(result));
} else {
tprintf(
results,
"%llu\n",
(unsigned long long)result_get_data(result).total_size);
}
tflush(results);
}
methods[method]->destroy(state);
}
}
return 0;
}
/** memcmp() the old results file and the new results file. */
static int diff_results(char const* actual_file, char const* expected_file) {
data_buffer_t const actual = data_buffer_read(actual_file);