Merge pull request #403 from felixhandte/lz4-cli-dict-support-tests
Support Dictionaries on the Command Line
This commit is contained in:
commit
34da12c6e6
@ -113,6 +113,7 @@ static int usage(const char* exeName)
|
||||
DISPLAY( " -9 : High compression \n");
|
||||
DISPLAY( " -d : decompression (default for %s extension)\n", LZ4_EXTENSION);
|
||||
DISPLAY( " -z : force compression \n");
|
||||
DISPLAY( " -D FILE: use dictionary in FILE \n");
|
||||
DISPLAY( " -f : overwrite output without prompting \n");
|
||||
DISPLAY( " -k : preserve source files(s) (default) \n");
|
||||
DISPLAY( "--rm : remove source file(s) after successful de/compression \n");
|
||||
@ -290,6 +291,7 @@ int main(int argc, const char** argv)
|
||||
operationMode_e mode = om_auto;
|
||||
const char* input_filename = NULL;
|
||||
const char* output_filename= NULL;
|
||||
const char* dictionary_filename = NULL;
|
||||
char* dynNameSpace = NULL;
|
||||
const char** inFileNames = (const char**) calloc(argc, sizeof(char*));
|
||||
unsigned ifnIdx=0;
|
||||
@ -399,6 +401,22 @@ int main(int argc, const char** argv)
|
||||
/* Compression (default) */
|
||||
case 'z': mode = om_compress; break;
|
||||
|
||||
case 'D':
|
||||
if (argument[1] == '\0') {
|
||||
/* path is next arg */
|
||||
if (i + 1 == argc) {
|
||||
/* there is no next arg */
|
||||
badusage(exeName);
|
||||
}
|
||||
dictionary_filename = argv[++i];
|
||||
} else {
|
||||
/* path follows immediately */
|
||||
dictionary_filename = argument + 1;
|
||||
}
|
||||
/* skip to end of argument so that we jump to parsing next argument */
|
||||
argument += strlen(argument) - 1;
|
||||
break;
|
||||
|
||||
/* Use Legacy format (ex : Linux kernel compression) */
|
||||
case 'l': legacy_format = 1; blockSize = 8 MB; break;
|
||||
|
||||
@ -560,6 +578,15 @@ int main(int argc, const char** argv)
|
||||
mode = om_decompress; /* defer to decompress */
|
||||
}
|
||||
|
||||
if (dictionary_filename) {
|
||||
if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) {
|
||||
DISPLAYLEVEL(1, "refusing to read from a console\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
LZ4IO_setDictionaryFilename(dictionary_filename);
|
||||
}
|
||||
|
||||
/* compress or decompress */
|
||||
if (!input_filename) input_filename = stdinmark;
|
||||
/* Check if input is defined as console; trigger an error in this case */
|
||||
|
111
programs/lz4io.c
111
programs/lz4io.c
@ -57,6 +57,7 @@
|
||||
#include "lz4.h" /* still required for legacy format */
|
||||
#include "lz4hc.h" /* still required for legacy format */
|
||||
#include "lz4frame.h"
|
||||
#include "lz4frame_static.h"
|
||||
|
||||
|
||||
/*****************************
|
||||
@ -82,6 +83,7 @@
|
||||
#define LEGACY_BLOCKSIZE (8 MB)
|
||||
#define MIN_STREAM_BUFSIZE (192 KB)
|
||||
#define LZ4IO_BLOCKSIZEID_DEFAULT 7
|
||||
#define LZ4_MAX_DICT_SIZE (64 KB)
|
||||
|
||||
|
||||
/**************************************
|
||||
@ -110,6 +112,8 @@ static int g_streamChecksum = 1;
|
||||
static int g_blockIndependence = 1;
|
||||
static int g_sparseFileSupport = 1;
|
||||
static int g_contentSizeFlag = 0;
|
||||
static int g_useDictionary = 0;
|
||||
static const char* g_dictionaryFilename = NULL;
|
||||
|
||||
|
||||
/**************************************
|
||||
@ -142,6 +146,12 @@ static int g_contentSizeFlag = 0;
|
||||
/* ****************** Parameters ******************** */
|
||||
/* ************************************************** */
|
||||
|
||||
int LZ4IO_setDictionaryFilename(const char* dictionaryFilename) {
|
||||
g_dictionaryFilename = dictionaryFilename;
|
||||
g_useDictionary = dictionaryFilename != NULL;
|
||||
return g_useDictionary;
|
||||
}
|
||||
|
||||
/* Default setting : overwrite = 1; return : overwrite mode (0/1) */
|
||||
int LZ4IO_setOverwrite(int yes)
|
||||
{
|
||||
@ -395,8 +405,79 @@ typedef struct {
|
||||
void* dstBuffer;
|
||||
size_t dstBufferSize;
|
||||
LZ4F_compressionContext_t ctx;
|
||||
LZ4F_CDict* cdict;
|
||||
} cRess_t;
|
||||
|
||||
static void* LZ4IO_createDict(const char* dictFilename, size_t *dictSize) {
|
||||
size_t readSize;
|
||||
size_t dictEnd = 0;
|
||||
size_t dictLen = 0;
|
||||
size_t dictStart;
|
||||
size_t circularBufSize = LZ4_MAX_DICT_SIZE;
|
||||
char* circularBuf;
|
||||
char* dictBuf;
|
||||
FILE* dictFile;
|
||||
|
||||
if (!dictFilename) EXM_THROW(25, "Dictionary error : no filename provided");
|
||||
|
||||
circularBuf = (char *) malloc(circularBufSize);
|
||||
if (!circularBuf) EXM_THROW(25, "Allocation error : not enough memory");
|
||||
|
||||
dictFile = LZ4IO_openSrcFile(dictFilename);
|
||||
if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file");
|
||||
|
||||
/* opportunistically seek to the part of the file we care about. If this */
|
||||
/* fails it's not a problem since we'll just read everything anyways. */
|
||||
if (strcmp(dictFilename, stdinmark)) {
|
||||
UTIL_fseek(dictFile, -LZ4_MAX_DICT_SIZE, SEEK_END);
|
||||
}
|
||||
|
||||
do {
|
||||
readSize = fread(circularBuf + dictEnd, 1, circularBufSize - dictEnd, dictFile);
|
||||
dictEnd = (dictEnd + readSize) % circularBufSize;
|
||||
dictLen += readSize;
|
||||
} while (readSize>0);
|
||||
|
||||
if (dictLen > LZ4_MAX_DICT_SIZE) {
|
||||
dictLen = LZ4_MAX_DICT_SIZE;
|
||||
}
|
||||
|
||||
*dictSize = dictLen;
|
||||
|
||||
dictStart = (circularBufSize + dictEnd - dictLen) % circularBufSize;
|
||||
|
||||
if (dictStart == 0) {
|
||||
/* We're in the simple case where the dict starts at the beginning of our circular buffer. */
|
||||
dictBuf = circularBuf;
|
||||
circularBuf = NULL;
|
||||
} else {
|
||||
/* Otherwise, we will alloc a new buffer and copy our dict into that. */
|
||||
dictBuf = (char *) malloc(dictLen ? dictLen : 1);
|
||||
if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory");
|
||||
|
||||
memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart);
|
||||
memcpy(dictBuf + circularBufSize - dictStart, circularBuf, dictLen - (circularBufSize - dictStart));
|
||||
}
|
||||
|
||||
free(circularBuf);
|
||||
|
||||
return dictBuf;
|
||||
}
|
||||
|
||||
static LZ4F_CDict* LZ4IO_createCDict(void) {
|
||||
size_t dictionarySize;
|
||||
void* dictionaryBuffer;
|
||||
LZ4F_CDict* cdict;
|
||||
if (!g_useDictionary) {
|
||||
return NULL;
|
||||
}
|
||||
dictionaryBuffer = LZ4IO_createDict(g_dictionaryFilename, &dictionarySize);
|
||||
if (!dictionaryBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
|
||||
cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
|
||||
free(dictionaryBuffer);
|
||||
return cdict;
|
||||
}
|
||||
|
||||
static cRess_t LZ4IO_createCResources(void)
|
||||
{
|
||||
const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
|
||||
@ -412,6 +493,8 @@ static cRess_t LZ4IO_createCResources(void)
|
||||
ress.dstBuffer = malloc(ress.dstBufferSize);
|
||||
if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
|
||||
|
||||
ress.cdict = LZ4IO_createCDict();
|
||||
|
||||
return ress;
|
||||
}
|
||||
|
||||
@ -419,6 +502,10 @@ static void LZ4IO_freeCResources(cRess_t ress)
|
||||
{
|
||||
free(ress.srcBuffer);
|
||||
free(ress.dstBuffer);
|
||||
|
||||
LZ4F_freeCDict(ress.cdict);
|
||||
ress.cdict = NULL;
|
||||
|
||||
{ LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx);
|
||||
if (LZ4F_isError(errorCode)) EXM_THROW(38, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); }
|
||||
}
|
||||
@ -472,7 +559,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
|
||||
/* single-block file */
|
||||
if (readSize < blockSize) {
|
||||
/* Compress in single pass */
|
||||
size_t const cSize = LZ4F_compressFrame(dstBuffer, dstBufferSize, srcBuffer, readSize, &prefs);
|
||||
size_t cSize = LZ4F_compressFrame_usingCDict(dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
|
||||
if (LZ4F_isError(cSize)) EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
|
||||
compressedfilesize = cSize;
|
||||
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
|
||||
@ -488,7 +575,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
|
||||
/* multiple-blocks file */
|
||||
{
|
||||
/* Write Archive Header */
|
||||
size_t headerSize = LZ4F_compressBegin(ctx, dstBuffer, dstBufferSize, &prefs);
|
||||
size_t headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs);
|
||||
if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
|
||||
{ size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
|
||||
if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
|
||||
@ -745,8 +832,21 @@ typedef struct {
|
||||
size_t dstBufferSize;
|
||||
FILE* dstFile;
|
||||
LZ4F_decompressionContext_t dCtx;
|
||||
void* dictBuffer;
|
||||
size_t dictBufferSize;
|
||||
} dRess_t;
|
||||
|
||||
static void LZ4IO_loadDDict(dRess_t* ress) {
|
||||
if (!g_useDictionary) {
|
||||
ress->dictBuffer = NULL;
|
||||
ress->dictBufferSize = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
ress->dictBuffer = LZ4IO_createDict(g_dictionaryFilename, &ress->dictBufferSize);
|
||||
if (!ress->dictBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
|
||||
}
|
||||
|
||||
static const size_t LZ4IO_dBufferSize = 64 KB;
|
||||
static dRess_t LZ4IO_createDResources(void)
|
||||
{
|
||||
@ -763,6 +863,8 @@ static dRess_t LZ4IO_createDResources(void)
|
||||
ress.dstBuffer = malloc(ress.dstBufferSize);
|
||||
if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
|
||||
|
||||
LZ4IO_loadDDict(&ress);
|
||||
|
||||
ress.dstFile = NULL;
|
||||
return ress;
|
||||
}
|
||||
@ -773,6 +875,7 @@ static void LZ4IO_freeDResources(dRess_t ress)
|
||||
if (LZ4F_isError(errorCode)) EXM_THROW(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode));
|
||||
free(ress.srcBuffer);
|
||||
free(ress.dstBuffer);
|
||||
free(ress.dictBuffer);
|
||||
}
|
||||
|
||||
|
||||
@ -786,7 +889,7 @@ static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE
|
||||
{ size_t inSize = MAGICNUMBER_SIZE;
|
||||
size_t outSize= 0;
|
||||
LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
|
||||
nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, NULL);
|
||||
nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, ress.dictBuffer, ress.dictBufferSize, NULL);
|
||||
if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(nextToLoad));
|
||||
}
|
||||
|
||||
@ -805,7 +908,7 @@ static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE
|
||||
/* Decode Input (at least partially) */
|
||||
size_t remaining = readSize - pos;
|
||||
decodedBytes = ress.dstBufferSize;
|
||||
nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
|
||||
nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, ress.dictBuffer, ress.dictBufferSize, NULL);
|
||||
if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
|
||||
pos += remaining;
|
||||
|
||||
|
@ -64,6 +64,8 @@ int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSiz
|
||||
/* ****************** Parameters ******************** */
|
||||
/* ************************************************** */
|
||||
|
||||
int LZ4IO_setDictionaryFilename(const char* dictionaryFilename);
|
||||
|
||||
/* Default setting : overwrite = 1;
|
||||
return : overwrite mode (0/1) */
|
||||
int LZ4IO_setOverwrite(int yes);
|
||||
|
@ -129,6 +129,8 @@ ifneq (,$(filter $(shell uname),SunOS))
|
||||
DIFF:=gdiff
|
||||
endif
|
||||
|
||||
DD:=dd
|
||||
|
||||
|
||||
test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer
|
||||
|
||||
@ -253,6 +255,31 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat
|
||||
$(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum
|
||||
@$(RM) tmp-tlb*
|
||||
|
||||
test-lz4-dict: lz4 datagen
|
||||
@echo "\n ---- test lz4 compression/decompression with dictionary ----"
|
||||
./datagen -g16KB > tmp-dict
|
||||
./datagen -g32KB > tmp-dict-sample-32k
|
||||
< tmp-dict-sample-32k $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-32k
|
||||
./datagen -g128MB > tmp-dict-sample-128m
|
||||
< tmp-dict-sample-128m $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-128m
|
||||
touch tmp-dict-sample-0
|
||||
< tmp-dict-sample-0 $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-0
|
||||
|
||||
< tmp-dict-sample-32k $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-32k
|
||||
< tmp-dict-sample-0 $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-0
|
||||
|
||||
@echo "\n ---- test lz4 dictionary loading ----"
|
||||
./datagen -g128KB > tmp-dict-data-128KB
|
||||
set -e; \
|
||||
for l in 0 1 4 128 32767 32768 32769 65535 65536 65537 98303 98304 98305 131071 131072 131073; do \
|
||||
./datagen -g$$l > tmp-dict-$$l; \
|
||||
$(DD) if=tmp-dict-$$l of=tmp-dict-$$l-tail bs=1 count=65536 skip=$$((l > 65536 ? l - 65536 : 0)); \
|
||||
< tmp-dict-$$l $(LZ4) -D stdin tmp-dict-data-128KB | $(LZ4) -dD tmp-dict-$$l-tail | $(DIFF) - tmp-dict-data-128KB; \
|
||||
< tmp-dict-$$l-tail $(LZ4) -D stdin tmp-dict-data-128KB | $(LZ4) -dD tmp-dict-$$l | $(DIFF) - tmp-dict-data-128KB; \
|
||||
done
|
||||
|
||||
@$(RM) tmp-dict*
|
||||
|
||||
test-lz4-hugefile: lz4 datagen
|
||||
@echo "\n ---- test huge files compression/decompression ----"
|
||||
./datagen -g6GB | $(LZ4) -vB5D | $(LZ4) -qt
|
||||
@ -292,7 +319,7 @@ test-lz4-opt-parser: lz4 datagen
|
||||
|
||||
test-lz4: lz4 datagen test-lz4-basic test-lz4-opt-parser test-lz4-multiple \
|
||||
test-lz4-sparse test-lz4-frame-concatenation test-lz4-testmode \
|
||||
test-lz4-contentSize test-lz4-hugefile
|
||||
test-lz4-contentSize test-lz4-hugefile test-lz4-dict
|
||||
@$(RM) tmp*
|
||||
|
||||
test-lz4c: lz4c datagen
|
||||
|
Loading…
Reference in New Issue
Block a user