Merge pull request #526 from iburinoc/educational

Add educational decoder to /contrib
This commit is contained in:
Yann Collet 2017-02-07 15:34:33 -08:00 committed by GitHub
commit 60259eb9a0
4 changed files with 2500 additions and 0 deletions

View File

@ -0,0 +1,19 @@
Educational Decoder
===================
`zstd_decompress.c` is a self-contained implementation in C99 of a decoder,
according to the [Zstandard format specification].
While it does not implement as many features as the reference decoder,
such as the streaming API or content checksums, it is written to be easy to
follow and understand, to help understand how the Zstandard format works.
It's laid out to match the [format specification],
so it can be used to understand how complex segments could be implemented.
It also contains implementations of Huffman and FSE table decoding.
[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
`harness.c` provides a simple test harness around the decoder:
harness <input-file> <output-file> [dictionary]

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#include <stdio.h>
#include <stdlib.h>
#include "zstd_decompress.h"
typedef unsigned char u8;
// If the data doesn't have decompressed size with it, fallback on assuming the
// compression ratio is at most 16
#define MAX_COMPRESSION_RATIO (16)
// Protect against allocating too much memory for output
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
u8 *input;
u8 *output;
u8 *dict;
size_t read_file(const char *path, u8 **ptr) {
FILE *f = fopen(path, "rb");
if (!f) {
fprintf(stderr, "failed to open file %s\n", path);
exit(1);
}
fseek(f, 0L, SEEK_END);
size_t size = ftell(f);
rewind(f);
*ptr = malloc(size);
if (!ptr) {
fprintf(stderr, "failed to allocate memory to hold %s\n", path);
exit(1);
}
size_t pos = 0;
while (!feof(f)) {
size_t read = fread(&(*ptr)[pos], 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while reading file %s\n", path);
exit(1);
}
pos += read;
}
fclose(f);
return pos;
}
void write_file(const char *path, const u8 *ptr, size_t size) {
FILE *f = fopen(path, "wb");
size_t written = 0;
while (written < size) {
written += fwrite(&ptr[written], 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while writing file %s\n", path);
exit(1);
}
}
fclose(f);
}
int main(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
argv[0]);
return 1;
}
size_t input_size = read_file(argv[1], &input);
size_t dict_size = 0;
if (argc >= 4) {
dict_size = read_file(argv[3], &dict);
}
size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
if (decompressed_size == -1) {
decompressed_size = MAX_COMPRESSION_RATIO * input_size;
fprintf(stderr, "WARNING: Compressed data does not contain "
"decompressed size, going to assume the compression "
"ratio is at most %d (decompressed size of at most "
"%zu)\n",
MAX_COMPRESSION_RATIO, decompressed_size);
}
if (decompressed_size > MAX_OUTPUT_SIZE) {
fprintf(stderr,
"Required output size too large for this implementation\n");
return 1;
}
output = malloc(decompressed_size);
if (!output) {
fprintf(stderr, "failed to allocate memory\n");
return 1;
}
size_t decompressed =
ZSTD_decompress_with_dict(output, decompressed_size,
input, input_size, dict, dict_size);
write_file(argv[2], output, decompressed);
free(input);
free(output);
free(dict);
input = output = dict = NULL;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len);
size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len,
const void *const dict, const size_t dict_len);
size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);