Merge pull request #654 from iburinoc/splittable
[RFC] Splittable Format and API
This commit is contained in:
commit
d47709b6ea
4
contrib/seekable_format/examples/.gitignore
vendored
Normal file
4
contrib/seekable_format/examples/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
seekable_compression
|
||||
seekable_decompression
|
||||
parallel_processing
|
||||
parallel_compression
|
42
contrib/seekable_format/examples/Makefile
Normal file
42
contrib/seekable_format/examples/Makefile
Normal file
@ -0,0 +1,42 @@
|
||||
# ################################################################
|
||||
# Copyright (c) 2017-present, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree. An additional grant
|
||||
# of patent rights can be found in the PATENTS file in the same directory.
|
||||
# ################################################################
|
||||
|
||||
# This Makefile presumes libzstd is built, using `make` in / or /lib/
|
||||
|
||||
LDFLAGS += ../../../lib/libzstd.a
|
||||
CPPFLAGS += -I../ -I../../../lib -I../../../lib/common
|
||||
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -g
|
||||
|
||||
SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c
|
||||
|
||||
.PHONY: default all clean test
|
||||
|
||||
default: all
|
||||
|
||||
all: seekable_compression seekable_decompression parallel_processing
|
||||
|
||||
seekable_compression : seekable_compression.c $(SEEKABLE_OBJS)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
seekable_decompression : seekable_decompression.c $(SEEKABLE_OBJS)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
parallel_processing : parallel_processing.c $(SEEKABLE_OBJS)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
|
||||
|
||||
parallel_compression : parallel_compression.c $(SEEKABLE_OBJS)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
|
||||
|
||||
clean:
|
||||
@rm -f core *.o tmp* result* *.zst \
|
||||
seekable_compression seekable_decompression \
|
||||
parallel_processing parallel_compression
|
||||
@echo Cleaning completed
|
214
contrib/seekable_format/examples/parallel_compression.c
Normal file
214
contrib/seekable_format/examples/parallel_compression.c
Normal file
@ -0,0 +1,214 @@
|
||||
/**
|
||||
* Copyright 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the license found in the
|
||||
* LICENSE-examples file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit, atoi
|
||||
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
|
||||
#include <string.h> // strlen, memset, strcat
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# define SLEEP(x) Sleep(x)
|
||||
#else
|
||||
# include <unistd.h>
|
||||
# define SLEEP(x) usleep(x * 1000)
|
||||
#endif
|
||||
|
||||
#define XXH_NAMESPACE ZSTD_
|
||||
#include "xxhash.h"
|
||||
|
||||
#include "pool.h" // use zstd thread pool for demo
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc:");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin)
|
||||
{
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
static long int ftell_orDie(FILE* file)
|
||||
{
|
||||
long int off = ftell(file);
|
||||
if (off != -1) return off;
|
||||
/* error */
|
||||
perror("ftell");
|
||||
exit(8);
|
||||
}
|
||||
|
||||
struct job {
|
||||
const void* src;
|
||||
size_t srcSize;
|
||||
void* dst;
|
||||
size_t dstSize;
|
||||
|
||||
unsigned checksum;
|
||||
|
||||
int compressionLevel;
|
||||
int done;
|
||||
};
|
||||
|
||||
static void compressFrame(void* opaque)
|
||||
{
|
||||
struct job* job = opaque;
|
||||
|
||||
job->checksum = XXH64(job->src, job->srcSize, 0);
|
||||
|
||||
size_t ret = ZSTD_compress(job->dst, job->dstSize, job->src, job->srcSize, job->compressionLevel);
|
||||
if (ZSTD_isError(ret)) {
|
||||
fprintf(stderr, "ZSTD_compress() error : %s \n", ZSTD_getErrorName(ret));
|
||||
exit(20);
|
||||
}
|
||||
|
||||
job->dstSize = ret;
|
||||
job->done = 1;
|
||||
}
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize, int nbThreads)
|
||||
{
|
||||
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
|
||||
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
|
||||
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
|
||||
if (ZSTD_compressBound(frameSize) > 0xFFFFFFFFU) { fprintf(stderr, "Frame size too large \n"); exit(10); }
|
||||
unsigned dstSize = ZSTD_compressBound(frameSize);
|
||||
|
||||
|
||||
fseek_orDie(fin, 0, SEEK_END);
|
||||
long int length = ftell_orDie(fin);
|
||||
fseek_orDie(fin, 0, SEEK_SET);
|
||||
|
||||
size_t numFrames = (length + frameSize - 1) / frameSize;
|
||||
|
||||
struct job* jobs = malloc_orDie(sizeof(struct job) * numFrames);
|
||||
|
||||
size_t i;
|
||||
for(i = 0; i < numFrames; i++) {
|
||||
void* in = malloc_orDie(frameSize);
|
||||
void* out = malloc_orDie(dstSize);
|
||||
|
||||
size_t inSize = fread_orDie(in, frameSize, fin);
|
||||
|
||||
jobs[i].src = in;
|
||||
jobs[i].srcSize = inSize;
|
||||
jobs[i].dst = out;
|
||||
jobs[i].dstSize = dstSize;
|
||||
jobs[i].compressionLevel = cLevel;
|
||||
jobs[i].done = 0;
|
||||
POOL_add(pool, compressFrame, &jobs[i]);
|
||||
}
|
||||
|
||||
ZSTD_frameLog* fl = ZSTD_seekable_createFrameLog(1);
|
||||
if (fl == NULL) { fprintf(stderr, "ZSTD_seekable_createFrameLog() failed \n"); exit(11); }
|
||||
for (i = 0; i < numFrames; i++) {
|
||||
while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
|
||||
fwrite_orDie(jobs[i].dst, jobs[i].dstSize, fout);
|
||||
free((void*)jobs[i].src);
|
||||
free(jobs[i].dst);
|
||||
|
||||
size_t ret = ZSTD_seekable_logFrame(fl, jobs[i].dstSize, jobs[i].srcSize, jobs[i].checksum);
|
||||
if (ZSTD_isError(ret)) { fprintf(stderr, "ZSTD_seekable_logFrame() error : %s \n", ZSTD_getErrorName(ret)); }
|
||||
}
|
||||
|
||||
{ unsigned char seekTableBuff[1024];
|
||||
ZSTD_outBuffer out = {seekTableBuff, 1024, 0};
|
||||
while (ZSTD_seekable_writeSeekTable(fl, &out) != 0) {
|
||||
fwrite_orDie(seekTableBuff, out.pos, fout);
|
||||
out.pos = 0;
|
||||
}
|
||||
fwrite_orDie(seekTableBuff, out.pos, fout);
|
||||
}
|
||||
|
||||
ZSTD_seekable_freeFrameLog(fl);
|
||||
free(jobs);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
}
|
||||
|
||||
static const char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (const char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
const char* const exeName = argv[0];
|
||||
if (argc!=4) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE FRAME_SIZE NB_THREADS\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{ const char* const inFileName = argv[1];
|
||||
unsigned const frameSize = (unsigned)atoi(argv[2]);
|
||||
int const nbThreads = atoi(argv[3]);
|
||||
|
||||
const char* const outFileName = createOutFilename_orDie(inFileName);
|
||||
compressFile_orDie(inFileName, outFileName, 5, frameSize, nbThreads);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
193
contrib/seekable_format/examples/parallel_processing.c
Normal file
193
contrib/seekable_format/examples/parallel_processing.c
Normal file
@ -0,0 +1,193 @@
|
||||
/**
|
||||
* Copyright 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the license found in the
|
||||
* LICENSE-examples file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* A simple demo that sums up all the bytes in the file in parallel using
|
||||
* seekable decompression and the zstd thread pool
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, exit
|
||||
#include <stdio.h> // fprintf, perror, feof
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# define SLEEP(x) Sleep(x)
|
||||
#else
|
||||
# include <unistd.h>
|
||||
# define SLEEP(x) usleep(x * 1000)
|
||||
#endif
|
||||
|
||||
#include "pool.h" // use zstd thread pool for demo
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void* realloc_orDie(void* ptr, size_t size)
|
||||
{
|
||||
ptr = realloc(ptr, size);
|
||||
if (ptr) return ptr;
|
||||
/* error */
|
||||
perror("realloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin) {
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
struct sum_job {
|
||||
const char* fname;
|
||||
unsigned long long sum;
|
||||
unsigned frameNb;
|
||||
int done;
|
||||
};
|
||||
|
||||
static void sumFrame(void* opaque)
|
||||
{
|
||||
struct sum_job* job = (struct sum_job*)opaque;
|
||||
job->done = 0;
|
||||
|
||||
FILE* const fin = fopen_orDie(job->fname, "rb");
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
size_t const frameSize = ZSTD_seekable_getFrameDecompressedSize(seekable, job->frameNb);
|
||||
unsigned char* data = malloc_orDie(frameSize);
|
||||
|
||||
size_t result = ZSTD_seekable_decompressFrame(seekable, data, frameSize, job->frameNb);
|
||||
if (ZSTD_isError(result)) { fprintf(stderr, "ZSTD_seekable_decompressFrame() error : %s \n", ZSTD_getErrorName(result)); exit(12); }
|
||||
|
||||
unsigned long long sum = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < frameSize; i++) {
|
||||
sum += data[i];
|
||||
}
|
||||
job->sum = sum;
|
||||
job->done = 1;
|
||||
|
||||
fclose(fin);
|
||||
ZSTD_seekable_free(seekable);
|
||||
free(data);
|
||||
}
|
||||
|
||||
static void sumFile_orDie(const char* fname, int nbThreads)
|
||||
{
|
||||
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
|
||||
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
|
||||
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
size_t const numFrames = ZSTD_seekable_getNumFrames(seekable);
|
||||
struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job));
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i < numFrames; i++) {
|
||||
jobs[i] = (struct sum_job){ fname, 0, i, 0 };
|
||||
POOL_add(pool, sumFrame, &jobs[i]);
|
||||
}
|
||||
|
||||
unsigned long long total = 0;
|
||||
|
||||
for (i = 0; i < numFrames; i++) {
|
||||
while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
|
||||
total += jobs[i].sum;
|
||||
}
|
||||
|
||||
printf("Sum: %llu\n", total);
|
||||
|
||||
POOL_free(pool);
|
||||
ZSTD_seekable_free(seekable);
|
||||
fclose(fin);
|
||||
free(jobs);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=3) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE NB_THREADS\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
const char* const inFilename = argv[1];
|
||||
int const nbThreads = atoi(argv[2]);
|
||||
sumFile_orDie(inFilename, nbThreads);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
131
contrib/seekable_format/examples/seekable_compression.c
Normal file
131
contrib/seekable_format/examples/seekable_compression.c
Normal file
@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Copyright 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the license found in the
|
||||
* LICENSE-examples file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit, atoi
|
||||
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
|
||||
#include <string.h> // strlen, memset, strcat
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc:");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
|
||||
if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
|
||||
size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
size_t read, toRead = buffInSize;
|
||||
while( (read = fread_orDie(buffIn, toRead, fin)) ) {
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
|
||||
if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
|
||||
if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */
|
||||
if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); }
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
if (!remainingToFlush) break;
|
||||
}
|
||||
|
||||
ZSTD_seekable_freeCStream(cstream);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
static const char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (const char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
const char* const exeName = argv[0];
|
||||
if (argc!=3) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE FRAME_SIZE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{ const char* const inFileName = argv[1];
|
||||
unsigned const frameSize = (unsigned)atoi(argv[2]);
|
||||
|
||||
const char* const outFileName = createOutFilename_orDie(inFileName);
|
||||
compressFile_orDie(inFileName, outFileName, 5, frameSize);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
137
contrib/seekable_format/examples/seekable_decompression.c
Normal file
137
contrib/seekable_format/examples/seekable_decompression.c
Normal file
@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Copyright 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the license found in the
|
||||
* LICENSE-examples file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h> // malloc, exit
|
||||
#include <stdio.h> // fprintf, perror, feof
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void* realloc_orDie(void* ptr, size_t size)
|
||||
{
|
||||
ptr = realloc(ptr, size);
|
||||
if (ptr) return ptr;
|
||||
/* error */
|
||||
perror("realloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin) {
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
|
||||
static void decompressFile_orDie(const char* fname, unsigned startOffset, unsigned endOffset)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = stdout;
|
||||
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
while (startOffset < endOffset) {
|
||||
size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
|
||||
|
||||
if (ZSTD_isError(result)) {
|
||||
fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
|
||||
ZSTD_getErrorName(result));
|
||||
exit(12);
|
||||
}
|
||||
fwrite_orDie(buffOut, result, fout);
|
||||
startOffset += result;
|
||||
}
|
||||
|
||||
ZSTD_seekable_free(seekable);
|
||||
fclose_orDie(fin);
|
||||
fclose_orDie(fout);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=4) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE START END\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
const char* const inFilename = argv[1];
|
||||
unsigned const startOffset = (unsigned) atoi(argv[2]);
|
||||
unsigned const endOffset = (unsigned) atoi(argv[3]);
|
||||
decompressFile_orDie(inFilename, startOffset, endOffset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
184
contrib/seekable_format/zstd_seekable.h
Normal file
184
contrib/seekable_format/zstd_seekable.h
Normal file
@ -0,0 +1,184 @@
|
||||
#ifndef SEEKABLE_H
|
||||
#define SEEKABLE_H
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static const unsigned ZSTD_seekTableFooterSize = 9;
|
||||
|
||||
#define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
|
||||
|
||||
#define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
|
||||
|
||||
/* Limit the maximum size to avoid any potential issues storing the compressed size */
|
||||
#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable Format
|
||||
*
|
||||
* The seekable format splits the compressed data into a series of "frames",
|
||||
* each compressed individually so that decompression of a section in the
|
||||
* middle of an archive only requires zstd to decompress at most a frame's
|
||||
* worth of extra data, instead of the entire archive.
|
||||
******************************************************************************/
|
||||
|
||||
typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
|
||||
typedef struct ZSTD_seekable_s ZSTD_seekable;
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable compression - HowTo
|
||||
* A ZSTD_seekable_CStream object is required to tracking streaming operation.
|
||||
* Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/
|
||||
* release resources.
|
||||
*
|
||||
* Streaming objects are reusable to avoid allocation and deallocation,
|
||||
* to start a new compression operation call ZSTD_seekable_initCStream() on the
|
||||
* compressor.
|
||||
*
|
||||
* Data streamed to the seekable compressor will automatically be split into
|
||||
* frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
|
||||
* or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
|
||||
* called or when the default maximum frame size (2GB) is reached.
|
||||
*
|
||||
* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
|
||||
* for a new compression operation.
|
||||
* `maxFrameSize` indicates the size at which to automatically start a new
|
||||
* seekable frame. `maxFrameSize == 0` implies the default maximum size.
|
||||
* `checksumFlag` indicates whether or not the seek table should include frame
|
||||
* checksums on the uncompressed data for verification.
|
||||
* @return : a size hint for input to provide for compression, or an error code
|
||||
* checkable with ZSTD_isError()
|
||||
*
|
||||
* Use ZSTD_seekable_compressStream() repetitively to consume input stream.
|
||||
* The function will automatically update both `pos` fields.
|
||||
* Note that it may not consume the entire input, in which case `pos < size`,
|
||||
* and it's up to the caller to present again remaining data.
|
||||
* @return : a size hint, preferred nb of bytes to use as input for next
|
||||
* function call or an error code, which can be tested using
|
||||
* ZSTD_isError().
|
||||
* Note 1 : it's just a hint, to help latency a little, any other
|
||||
* value will work fine.
|
||||
*
|
||||
* At any time, call ZSTD_seekable_endFrame() to end the current frame and
|
||||
* start a new one.
|
||||
*
|
||||
* ZSTD_seekable_endStream() will end the current frame, and then write the seek
|
||||
* table so that decompressors can efficiently find compressed frames.
|
||||
* ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
|
||||
* all the necessary data to `output`. In this case, it should be called again
|
||||
* until all remaining data is flushed out and 0 is returned.
|
||||
******************************************************************************/
|
||||
|
||||
/*===== Seekable compressor management =====*/
|
||||
ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
|
||||
|
||||
/*===== Seekable compression functions =====*/
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
|
||||
|
||||
/*= Raw seek table API
|
||||
* These functions allow for the seek table to be constructed directly.
|
||||
* This table can then be appended to a file of concatenated frames.
|
||||
* This allows the frames to be compressed independently, even in parallel,
|
||||
* and compiled together afterward into a seekable archive.
|
||||
*
|
||||
* Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking
|
||||
* structure.
|
||||
*
|
||||
* Call ZSTD_seekable_logFrame() once for each frame in the archive.
|
||||
* checksum is optional, and will not be used if checksumFlag was 0 when the
|
||||
* frame log was created. If present, it should be the least significant 32
|
||||
* bits of the XXH64 hash of the uncompressed data.
|
||||
*
|
||||
* Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table.
|
||||
* If the entire table was written, the return value will be 0. Otherwise,
|
||||
* it will be equal to the number of bytes left to write. */
|
||||
typedef struct ZSTD_frameLog_s ZSTD_frameLog;
|
||||
ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable decompression - HowTo
|
||||
* A ZSTD_seekable object is required to tracking the seekTable.
|
||||
*
|
||||
* Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
|
||||
* the seek table provided in the input.
|
||||
* There are three modes for ZSTD_seekable_init:
|
||||
* - ZSTD_seekable_initBuff() : An in-memory API. The data contained in
|
||||
* `src` should be the entire seekable file, including the seek table.
|
||||
* `src` should be kept alive and unmodified until the ZSTD_seekable object
|
||||
* is freed or reset.
|
||||
* - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and
|
||||
* fseek will be used to access the required data for building the seek
|
||||
* table and doing decompression operations. `src` should not be closed
|
||||
* or modified until the ZSTD_seekable object is freed or reset.
|
||||
* - ZSTD_seekable_initAdvanced() : A general API allowing the client to
|
||||
* provide its own read and seek callbacks.
|
||||
* + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
|
||||
* Premature EOF should be treated as an error.
|
||||
* + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
|
||||
* where origin is either SEEK_SET (beginning of
|
||||
* file), or SEEK_END (end of file).
|
||||
* Both functions should return a non-negative value in case of success, and a
|
||||
* negative value in case of failure. If implementing using this API and
|
||||
* stdio, be careful with files larger than 4GB and fseek. All of these
|
||||
* functions return an error code checkable with ZSTD_isError().
|
||||
*
|
||||
* Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
|
||||
* offset `offset`. ZSTD_seekable_decompress may have to decompress the entire
|
||||
* prefix of the frame before the desired data if it has not already processed
|
||||
* this section. If ZSTD_seekable_decompress is called multiple times for a
|
||||
* consecutive range of data, it will efficiently retain the decompressor object
|
||||
* and avoid redecompressing frame prefixes. The return value is the number of
|
||||
* bytes decompressed, or an error code checkable with ZSTD_isError().
|
||||
*
|
||||
* The seek table access functions can be used to obtain the data contained
|
||||
* in the seek table. If frameIndex is larger than the value returned by
|
||||
* ZSTD_seekable_getNumFrames(), they will return error codes checkable with
|
||||
* ZSTD_isError(). Note that since the offset access functions return
|
||||
* unsigned long long instead of size_t, in this case they will instead return
|
||||
* the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
|
||||
******************************************************************************/
|
||||
|
||||
/*===== Seekable decompressor management =====*/
|
||||
ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
|
||||
|
||||
/*===== Seekable decompression functions =====*/
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
|
||||
|
||||
#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
|
||||
/*===== Seek Table access functions =====*/
|
||||
ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset);
|
||||
|
||||
/*===== Seekable advanced I/O API =====*/
|
||||
typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
|
||||
typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
|
||||
typedef struct {
|
||||
void* opaque;
|
||||
ZSTD_seekable_read* read;
|
||||
ZSTD_seekable_seek* seek;
|
||||
} ZSTD_seekable_customFile;
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
116
contrib/seekable_format/zstd_seekable_compression_format.md
Normal file
116
contrib/seekable_format/zstd_seekable_compression_format.md
Normal file
@ -0,0 +1,116 @@
|
||||
# Zstandard Seekable Format
|
||||
|
||||
### Notices
|
||||
|
||||
Copyright (c) 2017-present Facebook, Inc.
|
||||
|
||||
Permission is granted to copy and distribute this document
|
||||
for any purpose and without charge,
|
||||
including translations into other languages
|
||||
and incorporation into compilations,
|
||||
provided that the copyright notice and this notice are preserved,
|
||||
and that any substantive changes or deletions from the original
|
||||
are clearly marked.
|
||||
Distribution of this document is unlimited.
|
||||
|
||||
### Version
|
||||
0.1.0 (11/04/17)
|
||||
|
||||
## Introduction
|
||||
This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed.
|
||||
This is done by splitting up the input data into frames,
|
||||
each of which are compressed independently,
|
||||
and so can be decompressed independently.
|
||||
Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame.
|
||||
|
||||
### Overall conventions
|
||||
In this document:
|
||||
- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters.
|
||||
- the naming convention for identifiers is `Mixed_Case_With_Underscores`
|
||||
- All numeric fields are little-endian unless specified otherwise
|
||||
|
||||
## Format
|
||||
|
||||
The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table.
|
||||
|
||||
### Seek Table Format
|
||||
The structure of the seek table frame is as follows:
|
||||
|
||||
|`Skippable_Magic_Number`|`Frame_Size`|`[Seek_Table_Entries]`|`Seek_Table_Footer`|
|
||||
|------------------------|------------|----------------------|-------------------|
|
||||
| 4 bytes | 4 bytes | 8-12 bytes each | 9 bytes |
|
||||
|
||||
__`Skippable_Magic_Number`__
|
||||
|
||||
Value : 0x184D2A5E.
|
||||
This is for compatibility with [Zstandard skippable frames].
|
||||
Since it is legal for other Zstandard skippable frames to use the same
|
||||
magic number, it is not recommended for a decoder to recognize frames
|
||||
solely on this.
|
||||
|
||||
__`Frame_Size`__
|
||||
|
||||
The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`.
|
||||
This is for compatibility with [Zstandard skippable frames].
|
||||
|
||||
[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#skippable-frames
|
||||
|
||||
#### `Seek_Table_Footer`
|
||||
The seek table footer format is as follows:
|
||||
|
||||
|`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`|
|
||||
|------------------|-----------------------|-----------------------|
|
||||
| 4 bytes | 1 byte | 4 bytes |
|
||||
|
||||
__`Seekable_Magic_Number`__
|
||||
|
||||
Value : 0x8F92EAB1.
|
||||
This value must be the last bytes present in the compressed file so that decoders
|
||||
can efficiently find it and determine if there is an actual seek table present.
|
||||
|
||||
__`Number_Of_Frames`__
|
||||
|
||||
The number of stored frames in the data.
|
||||
|
||||
__`Seek_Table_Descriptor`__
|
||||
|
||||
A bitfield describing the format of the seek table.
|
||||
|
||||
| Bit number | Field name |
|
||||
| ---------- | ---------- |
|
||||
| 7 | `Checksum_Flag` |
|
||||
| 6-2 | `Reserved_Bits` |
|
||||
| 1-0 | `Unused_Bits` |
|
||||
|
||||
While only `Checksum_Flag` currently exists, there are 7 other bits in this field that can be used for future changes to the format,
|
||||
for example the addition of inline dictionaries.
|
||||
|
||||
__`Checksum_Flag`__
|
||||
|
||||
If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its frame.
|
||||
|
||||
`Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits.
|
||||
|
||||
#### __`Seek_Table_Entries`__
|
||||
|
||||
`Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence:
|
||||
|
||||
|`Compressed_Size`|`Decompressed_Size`|`[Checksum]`|
|
||||
|-----------------|-------------------|------------|
|
||||
| 4 bytes | 4 bytes | 4 bytes |
|
||||
|
||||
__`Compressed_Size`__
|
||||
|
||||
The compressed size of the frame.
|
||||
The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`.
|
||||
|
||||
__`Decompressed_Size`__
|
||||
|
||||
The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0.
|
||||
|
||||
__`Checksum`__
|
||||
|
||||
Only present if `Checksum_Flag` is set in the `Seek_Table_Descriptor`. Value : the least significant 32 bits of the XXH64 digest of the uncompressed data, stored in little-endian format.
|
||||
|
||||
## Version Changes
|
||||
- 0.1.0: initial version
|
366
contrib/seekable_format/zstdseek_compress.c
Normal file
366
contrib/seekable_format/zstdseek_compress.c
Normal file
@ -0,0 +1,366 @@
|
||||
/**
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the BSD-style license found in the
|
||||
* LICENSE file in the root directory of this source tree. An additional grant
|
||||
* of patent rights can be found in the PATENTS file in the same directory.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#define XXH_NAMESPACE ZSTD_
|
||||
#include "xxhash.h"
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "mem.h"
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(name) ((size_t)-ZSTD_error_##name)
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
typedef struct {
|
||||
U32 cSize;
|
||||
U32 dSize;
|
||||
U32 checksum;
|
||||
} framelogEntry_t;
|
||||
|
||||
struct ZSTD_frameLog_s {
|
||||
framelogEntry_t* entries;
|
||||
U32 size;
|
||||
U32 capacity;
|
||||
|
||||
int checksumFlag;
|
||||
|
||||
/* for use when streaming out the seek table */
|
||||
U32 seekTablePos;
|
||||
U32 seekTableIndex;
|
||||
} framelog_t;
|
||||
|
||||
struct ZSTD_seekable_CStream_s {
|
||||
ZSTD_CStream* cstream;
|
||||
ZSTD_frameLog framelog;
|
||||
|
||||
U32 frameCSize;
|
||||
U32 frameDSize;
|
||||
|
||||
XXH64_state_t xxhState;
|
||||
|
||||
U32 maxFrameSize;
|
||||
|
||||
int writingSeekTable;
|
||||
};
|
||||
|
||||
size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
|
||||
{
|
||||
/* allocate some initial space */
|
||||
size_t const FRAMELOG_STARTING_CAPACITY = 16;
|
||||
fl->entries = (framelogEntry_t*)malloc(
|
||||
sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
|
||||
if (fl->entries == NULL) return ERROR(memory_allocation);
|
||||
fl->capacity = FRAMELOG_STARTING_CAPACITY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
|
||||
{
|
||||
if (fl != NULL) free(fl->entries);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
|
||||
{
|
||||
ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog));
|
||||
if (fl == NULL) return NULL;
|
||||
|
||||
if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
|
||||
free(fl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fl->checksumFlag = checksumFlag;
|
||||
fl->seekTablePos = 0;
|
||||
fl->seekTableIndex = 0;
|
||||
fl->size = 0;
|
||||
|
||||
return fl;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl)
|
||||
{
|
||||
ZSTD_seekable_frameLog_freeVec(fl);
|
||||
free(fl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
|
||||
{
|
||||
ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream));
|
||||
|
||||
if (zcs == NULL) return NULL;
|
||||
|
||||
memset(zcs, 0, sizeof(*zcs));
|
||||
|
||||
zcs->cstream = ZSTD_createCStream();
|
||||
if (zcs->cstream == NULL) goto failed1;
|
||||
|
||||
if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2;
|
||||
|
||||
return zcs;
|
||||
|
||||
failed2:
|
||||
ZSTD_freeCStream(zcs->cstream);
|
||||
failed1:
|
||||
free(zcs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
|
||||
{
|
||||
if (zcs == NULL) return 0; /* support free on null */
|
||||
ZSTD_freeCStream(zcs->cstream);
|
||||
ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
|
||||
free(zcs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
|
||||
int compressionLevel,
|
||||
int checksumFlag,
|
||||
U32 maxFrameSize)
|
||||
{
|
||||
zcs->framelog.size = 0;
|
||||
zcs->frameCSize = 0;
|
||||
zcs->frameDSize = 0;
|
||||
|
||||
/* make sure maxFrameSize has a reasonable value */
|
||||
if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) {
|
||||
return ERROR(compressionParameter_unsupported);
|
||||
}
|
||||
|
||||
zcs->maxFrameSize = maxFrameSize
|
||||
? maxFrameSize
|
||||
: ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
|
||||
|
||||
zcs->framelog.checksumFlag = checksumFlag;
|
||||
if (zcs->framelog.checksumFlag) {
|
||||
XXH64_reset(&zcs->xxhState, 0);
|
||||
}
|
||||
|
||||
zcs->framelog.seekTablePos = 0;
|
||||
zcs->framelog.seekTableIndex = 0;
|
||||
zcs->writingSeekTable = 0;
|
||||
|
||||
return ZSTD_initCStream(zcs->cstream, compressionLevel);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
|
||||
unsigned compressedSize,
|
||||
unsigned decompressedSize,
|
||||
unsigned checksum)
|
||||
{
|
||||
if (fl->size == ZSTD_SEEKABLE_MAXFRAMES)
|
||||
return ERROR(frameIndex_tooLarge);
|
||||
|
||||
/* grow the buffer if required */
|
||||
if (fl->size == fl->capacity) {
|
||||
/* exponential size increase for constant amortized runtime */
|
||||
size_t const newCapacity = fl->capacity * 2;
|
||||
framelogEntry_t* const newEntries = realloc(fl->entries,
|
||||
sizeof(framelogEntry_t) * newCapacity);
|
||||
|
||||
if (newEntries == NULL) return ERROR(memory_allocation);
|
||||
|
||||
fl->entries = newEntries;
|
||||
fl->capacity = newCapacity;
|
||||
}
|
||||
|
||||
fl->entries[fl->size] = (framelogEntry_t){
|
||||
compressedSize, decompressedSize, checksum
|
||||
};
|
||||
fl->size++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
|
||||
{
|
||||
size_t const prevOutPos = output->pos;
|
||||
/* end the frame */
|
||||
size_t ret = ZSTD_endStream(zcs->cstream, output);
|
||||
|
||||
zcs->frameCSize += output->pos - prevOutPos;
|
||||
|
||||
/* need to flush before doing the rest */
|
||||
if (ret) return ret;
|
||||
|
||||
/* frame done */
|
||||
|
||||
/* store the frame data for later */
|
||||
ret = ZSTD_seekable_logFrame(
|
||||
&zcs->framelog, zcs->frameCSize, zcs->frameDSize,
|
||||
zcs->framelog.checksumFlag
|
||||
? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU
|
||||
: 0);
|
||||
if (ret) return ret;
|
||||
|
||||
/* reset for the next frame */
|
||||
zcs->frameCSize = 0;
|
||||
zcs->frameDSize = 0;
|
||||
|
||||
ZSTD_resetCStream(zcs->cstream, 0);
|
||||
if (zcs->framelog.checksumFlag)
|
||||
XXH64_reset(&zcs->xxhState, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
const BYTE* const inBase = (const BYTE*) input->src + input->pos;
|
||||
size_t inLen = input->size - input->pos;
|
||||
|
||||
inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize));
|
||||
|
||||
/* if we haven't finished flushing the last frame, don't start writing a new one */
|
||||
if (inLen > 0) {
|
||||
ZSTD_inBuffer inTmp = { inBase, inLen, 0 };
|
||||
size_t const prevOutPos = output->pos;
|
||||
|
||||
size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp);
|
||||
|
||||
if (zcs->framelog.checksumFlag) {
|
||||
XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
|
||||
}
|
||||
|
||||
zcs->frameCSize += output->pos - prevOutPos;
|
||||
zcs->frameDSize += inTmp.pos;
|
||||
|
||||
input->pos += inTmp.pos;
|
||||
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
}
|
||||
|
||||
if (zcs->maxFrameSize == zcs->frameDSize) {
|
||||
/* log the frame and start over */
|
||||
size_t const ret = ZSTD_seekable_endFrame(zcs, output);
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
|
||||
/* get the client ready for the next frame */
|
||||
return (size_t)zcs->maxFrameSize;
|
||||
}
|
||||
|
||||
return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
|
||||
}
|
||||
|
||||
static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl)
|
||||
{
|
||||
size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
|
||||
size_t const seekTableLen = ZSTD_skippableHeaderSize +
|
||||
sizePerFrame * fl->size +
|
||||
ZSTD_seekTableFooterSize;
|
||||
|
||||
return seekTableLen;
|
||||
}
|
||||
|
||||
static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl,
|
||||
ZSTD_outBuffer* output, U32 const value,
|
||||
U32 const offset)
|
||||
{
|
||||
if (fl->seekTablePos < offset + 4) {
|
||||
BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
|
||||
size_t const lenWrite =
|
||||
MIN(output->size - output->pos, offset + 4 - fl->seekTablePos);
|
||||
MEM_writeLE32(tmp, value);
|
||||
memcpy((BYTE*)output->dst + output->pos,
|
||||
tmp + (fl->seekTablePos - offset), lenWrite);
|
||||
output->pos += lenWrite;
|
||||
fl->seekTablePos += lenWrite;
|
||||
|
||||
if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
|
||||
{
|
||||
/* seekTableIndex: the current index in the table and
|
||||
* seekTableSize: the amount of the table written so far
|
||||
*
|
||||
* This function is written this way so that if it has to return early
|
||||
* because of a small buffer, it can keep going where it left off.
|
||||
*/
|
||||
|
||||
size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
|
||||
size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl);
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, seekTableLen - ZSTD_skippableHeaderSize,
|
||||
4));
|
||||
|
||||
while (fl->seekTableIndex < fl->size) {
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output,
|
||||
fl->entries[fl->seekTableIndex].cSize,
|
||||
ZSTD_skippableHeaderSize +
|
||||
sizePerFrame * fl->seekTableIndex + 0));
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output,
|
||||
fl->entries[fl->seekTableIndex].dSize,
|
||||
ZSTD_skippableHeaderSize +
|
||||
sizePerFrame * fl->seekTableIndex + 4));
|
||||
|
||||
if (fl->checksumFlag) {
|
||||
CHECK_Z(ZSTD_stwrite32(
|
||||
fl, output, fl->entries[fl->seekTableIndex].checksum,
|
||||
ZSTD_skippableHeaderSize +
|
||||
sizePerFrame * fl->seekTableIndex + 8));
|
||||
}
|
||||
|
||||
fl->seekTableIndex++;
|
||||
}
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, fl->size,
|
||||
seekTableLen - ZSTD_seekTableFooterSize));
|
||||
|
||||
if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
|
||||
if (fl->seekTablePos < seekTableLen - 4) {
|
||||
BYTE sfd = 0;
|
||||
sfd |= (fl->checksumFlag) << 7;
|
||||
|
||||
((BYTE*)output->dst)[output->pos] = sfd;
|
||||
output->pos++;
|
||||
fl->seekTablePos++;
|
||||
}
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER,
|
||||
seekTableLen - 4));
|
||||
|
||||
if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
|
||||
{
|
||||
if (!zcs->writingSeekTable && zcs->frameDSize) {
|
||||
const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
|
||||
if (ZSTD_isError(endFrame)) return endFrame;
|
||||
/* return an accurate size hint */
|
||||
if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog);
|
||||
}
|
||||
|
||||
zcs->writingSeekTable = 1;
|
||||
|
||||
return ZSTD_seekable_writeSeekTable(&zcs->framelog, output);
|
||||
}
|
461
contrib/seekable_format/zstdseek_decompress.c
Normal file
461
contrib/seekable_format/zstdseek_decompress.c
Normal file
@ -0,0 +1,461 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the BSD-style license found in the
|
||||
* LICENSE file in the root directory of this source tree. An additional grant
|
||||
* of patent rights can be found in the PATENTS file in the same directory.
|
||||
*/
|
||||
|
||||
/* *********************************************************
|
||||
* Turn on Large Files support (>4GB) for 32-bit Linux/Unix
|
||||
***********************************************************/
|
||||
#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
|
||||
# if !defined(_FILE_OFFSET_BITS)
|
||||
# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
|
||||
# endif
|
||||
# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
|
||||
# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
|
||||
# endif
|
||||
# if defined(_AIX) || defined(__hpux)
|
||||
# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* ************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
|
||||
***************************************************************/
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1400
|
||||
# define LONG_SEEK _fseeki64
|
||||
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
|
||||
# define LONG_SEEK fseeko
|
||||
#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
|
||||
# define LONG_SEEK fseeko64
|
||||
#elif defined(_WIN32) && !defined(__DJGPP__)
|
||||
# include <windows.h>
|
||||
static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
|
||||
LARGE_INTEGER off;
|
||||
DWORD method;
|
||||
off.QuadPart = offset;
|
||||
if (origin == SEEK_END)
|
||||
method = FILE_END;
|
||||
else if (origin == SEEK_CUR)
|
||||
method = FILE_CURRENT;
|
||||
else
|
||||
method = FILE_BEGIN;
|
||||
|
||||
if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
# define LONG_SEEK fseek
|
||||
#endif
|
||||
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <stdio.h> /* FILE* */
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#define XXH_NAMESPACE ZSTD_
|
||||
#include "xxhash.h"
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "mem.h"
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(name) ((size_t)-ZSTD_error_##name)
|
||||
|
||||
#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* Special-case callbacks for FILE* and in-memory modes, so that we can treat
|
||||
* them the same way as the advanced API */
|
||||
static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
|
||||
{
|
||||
size_t const result = fread(buffer, 1, n, (FILE*)opaque);
|
||||
if (result != n) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ZSTD_seekable_seek_FILE(void* opaque, S64 offset, int origin)
|
||||
{
|
||||
int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
|
||||
if (ret) return ret;
|
||||
return fflush((FILE*)opaque);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const void *ptr;
|
||||
size_t size;
|
||||
size_t pos;
|
||||
} buffWrapper_t;
|
||||
|
||||
static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
|
||||
{
|
||||
buffWrapper_t* buff = (buffWrapper_t*) opaque;
|
||||
if (buff->size + n > buff->pos) return -1;
|
||||
memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
|
||||
buff->pos += n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ZSTD_seekable_seek_buff(void* opaque, S64 offset, int origin)
|
||||
{
|
||||
buffWrapper_t* buff = (buffWrapper_t*) opaque;
|
||||
unsigned long long newOffset;
|
||||
switch (origin) {
|
||||
case SEEK_SET:
|
||||
newOffset = offset;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
newOffset = (unsigned long long)buff->pos + offset;
|
||||
break;
|
||||
case SEEK_END:
|
||||
newOffset = (unsigned long long)buff->size - offset;
|
||||
break;
|
||||
}
|
||||
if (newOffset < 0 || newOffset > buff->size) {
|
||||
return -1;
|
||||
}
|
||||
buff->pos = newOffset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U64 cOffset;
|
||||
U64 dOffset;
|
||||
U32 checksum;
|
||||
} seekEntry_t;
|
||||
|
||||
typedef struct {
|
||||
seekEntry_t* entries;
|
||||
size_t tableLen;
|
||||
|
||||
int checksumFlag;
|
||||
} seekTable_t;
|
||||
|
||||
#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_ABSOLUTEMAX
|
||||
|
||||
struct ZSTD_seekable_s {
|
||||
ZSTD_DStream* dstream;
|
||||
seekTable_t seekTable;
|
||||
ZSTD_seekable_customFile src;
|
||||
|
||||
U64 decompressedOffset;
|
||||
U32 curFrame;
|
||||
|
||||
BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
|
||||
BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
|
||||
starts of chunks before we get to the
|
||||
desired section */
|
||||
ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
|
||||
buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
|
||||
|
||||
XXH64_state_t xxhState;
|
||||
};
|
||||
|
||||
ZSTD_seekable* ZSTD_seekable_create(void)
|
||||
{
|
||||
ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
|
||||
|
||||
if (zs == NULL) return NULL;
|
||||
|
||||
/* also initializes stage to zsds_init */
|
||||
memset(zs, 0, sizeof(*zs));
|
||||
|
||||
zs->dstream = ZSTD_createDStream();
|
||||
if (zs->dstream == NULL) {
|
||||
free(zs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return zs;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_free(ZSTD_seekable* zs)
|
||||
{
|
||||
if (zs == NULL) return 0; /* support free on null */
|
||||
ZSTD_freeDStream(zs->dstream);
|
||||
free(zs->seekTable.entries);
|
||||
free(zs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_seekable_offsetToFrameIndex() :
|
||||
* Performs a binary search to find the last frame with a decompressed offset
|
||||
* <= pos
|
||||
* @return : the frame's index */
|
||||
U32 ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, U64 pos)
|
||||
{
|
||||
U32 lo = 0;
|
||||
U32 hi = zs->seekTable.tableLen;
|
||||
|
||||
if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
|
||||
return zs->seekTable.tableLen;
|
||||
}
|
||||
|
||||
while (lo + 1 < hi) {
|
||||
U32 const mid = lo + ((hi - lo) >> 1);
|
||||
if (zs->seekTable.entries[mid].dOffset <= pos) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
return lo;
|
||||
}
|
||||
|
||||
U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
|
||||
{
|
||||
return zs->seekTable.tableLen;
|
||||
}
|
||||
|
||||
U64 ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
|
||||
{
|
||||
if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
|
||||
return zs->seekTable.entries[frameIndex].cOffset;
|
||||
}
|
||||
|
||||
U64 ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
|
||||
{
|
||||
if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
|
||||
return zs->seekTable.entries[frameIndex].dOffset;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
|
||||
{
|
||||
if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
|
||||
return zs->seekTable.entries[frameIndex + 1].cOffset -
|
||||
zs->seekTable.entries[frameIndex].cOffset;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
|
||||
{
|
||||
if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
|
||||
return zs->seekTable.entries[frameIndex + 1].dOffset -
|
||||
zs->seekTable.entries[frameIndex].dOffset;
|
||||
}
|
||||
|
||||
static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
|
||||
{
|
||||
int checksumFlag;
|
||||
ZSTD_seekable_customFile src = zs->src;
|
||||
/* read the footer, fixed size */
|
||||
CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
|
||||
|
||||
if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
{ BYTE const sfd = zs->inBuff[4];
|
||||
checksumFlag = sfd >> 7;
|
||||
|
||||
/* check reserved bits */
|
||||
if ((checksumFlag >> 2) & 0x1f) {
|
||||
return ERROR(corruption_detected);
|
||||
}
|
||||
}
|
||||
|
||||
{ U32 const numFrames = MEM_readLE32(zs->inBuff);
|
||||
U32 const sizePerEntry = 8 + (checksumFlag?4:0);
|
||||
U32 const tableSize = sizePerEntry * numFrames;
|
||||
U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;
|
||||
|
||||
U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
|
||||
{
|
||||
U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
|
||||
|
||||
CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
|
||||
|
||||
remaining -= toRead;
|
||||
}
|
||||
|
||||
if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
if (MEM_readLE32(zs->inBuff+4) + ZSTD_skippableHeaderSize != frameSize) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
{ /* Allocate an extra entry at the end so that we can do size
|
||||
* computations on the last element without special case */
|
||||
seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
|
||||
const BYTE* tableBase = zs->inBuff + ZSTD_skippableHeaderSize;
|
||||
|
||||
U32 idx = 0;
|
||||
U32 pos = 8;
|
||||
|
||||
|
||||
U64 cOffset = 0;
|
||||
U64 dOffset = 0;
|
||||
|
||||
if (!entries) {
|
||||
free(entries);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
/* compute cumulative positions */
|
||||
for (; idx < numFrames; idx++) {
|
||||
if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
|
||||
U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
|
||||
U32 const offset = SEEKABLE_BUFF_SIZE - pos;
|
||||
memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
|
||||
remaining -= toRead;
|
||||
pos = 0;
|
||||
}
|
||||
entries[idx].cOffset = cOffset;
|
||||
entries[idx].dOffset = dOffset;
|
||||
|
||||
cOffset += MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
dOffset += MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
if (checksumFlag) {
|
||||
entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
}
|
||||
}
|
||||
entries[numFrames].cOffset = cOffset;
|
||||
entries[numFrames].dOffset = dOffset;
|
||||
|
||||
zs->seekTable.entries = entries;
|
||||
zs->seekTable.tableLen = numFrames;
|
||||
zs->seekTable.checksumFlag = checksumFlag;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
|
||||
{
|
||||
zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
|
||||
{ ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
|
||||
&ZSTD_seekable_read_buff,
|
||||
&ZSTD_seekable_seek_buff};
|
||||
return ZSTD_seekable_initAdvanced(zs, srcFile); }
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
|
||||
{
|
||||
ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
|
||||
&ZSTD_seekable_seek_FILE};
|
||||
return ZSTD_seekable_initAdvanced(zs, srcFile);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
|
||||
{
|
||||
zs->src = src;
|
||||
|
||||
{ const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
|
||||
if (ZSTD_isError(seekTableInit)) return seekTableInit; }
|
||||
|
||||
zs->decompressedOffset = (U64)-1;
|
||||
zs->curFrame = (U32)-1;
|
||||
|
||||
{ const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
|
||||
if (ZSTD_isError(dstreamInit)) return dstreamInit; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, U64 offset)
|
||||
{
|
||||
U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
|
||||
do {
|
||||
/* check if we can continue from a previous decompress job */
|
||||
if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
|
||||
zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
|
||||
zs->curFrame = targetFrame;
|
||||
|
||||
CHECK_IO(zs->src.seek(zs->src.opaque,
|
||||
zs->seekTable.entries[targetFrame].cOffset,
|
||||
SEEK_SET));
|
||||
zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
|
||||
XXH64_reset(&zs->xxhState, 0);
|
||||
ZSTD_resetDStream(zs->dstream);
|
||||
}
|
||||
|
||||
while (zs->decompressedOffset < offset + len) {
|
||||
size_t toRead;
|
||||
ZSTD_outBuffer outTmp;
|
||||
size_t prevOutPos;
|
||||
if (zs->decompressedOffset < offset) {
|
||||
/* dummy decompressions until we get to the target offset */
|
||||
outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
|
||||
} else {
|
||||
outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
|
||||
}
|
||||
|
||||
prevOutPos = outTmp.pos;
|
||||
toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
|
||||
if (ZSTD_isError(toRead)) {
|
||||
return toRead;
|
||||
}
|
||||
|
||||
if (zs->seekTable.checksumFlag) {
|
||||
XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
|
||||
outTmp.pos - prevOutPos);
|
||||
}
|
||||
zs->decompressedOffset += outTmp.pos - prevOutPos;
|
||||
|
||||
if (toRead == 0) {
|
||||
/* frame complete */
|
||||
|
||||
/* verify checksum */
|
||||
if (zs->seekTable.checksumFlag &&
|
||||
(XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
|
||||
zs->seekTable.entries[targetFrame].checksum) {
|
||||
return ERROR(corruption_detected);
|
||||
}
|
||||
|
||||
if (zs->decompressedOffset < offset + len) {
|
||||
/* go back to the start and force a reset of the stream */
|
||||
targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* read in more data if we're done with this buffer */
|
||||
if (zs->in.pos == zs->in.size) {
|
||||
toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
|
||||
CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
|
||||
zs->in.size = toRead;
|
||||
zs->in.pos = 0;
|
||||
}
|
||||
}
|
||||
} while (zs->decompressedOffset != offset + len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, U32 frameIndex)
|
||||
{
|
||||
if (frameIndex >= zs->seekTable.tableLen) {
|
||||
return ERROR(frameIndex_tooLarge);
|
||||
}
|
||||
|
||||
{
|
||||
size_t const decompressedSize =
|
||||
zs->seekTable.entries[frameIndex + 1].dOffset -
|
||||
zs->seekTable.entries[frameIndex].dOffset;
|
||||
if (dstSize < decompressedSize) {
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
return ZSTD_seekable_decompress(
|
||||
zs, dst, decompressedSize,
|
||||
zs->seekTable.entries[frameIndex].dOffset);
|
||||
}
|
||||
}
|
@ -38,6 +38,8 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
|
||||
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
|
||||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
|
||||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
|
@ -58,6 +58,8 @@ typedef enum {
|
||||
ZSTD_error_dictionary_corrupted,
|
||||
ZSTD_error_dictionary_wrong,
|
||||
ZSTD_error_dictionaryCreation_failed,
|
||||
ZSTD_error_frameIndex_tooLarge,
|
||||
ZSTD_error_seekableIO,
|
||||
ZSTD_error_maxCode
|
||||
} ZSTD_ErrorCode;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user