Merge pull request #586 from blezsan/custom_block_sizes

support custom block sizes
This commit is contained in:
Yann Collet 2018-10-02 10:37:20 -07:00 committed by GitHub
commit a963621eb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 417 additions and 13 deletions

View File

@ -134,7 +134,8 @@ static int usage_advanced(const char* exeName)
DISPLAY( " -r : operate recursively on directories (sets also -m) \n");
#endif
DISPLAY( " -l : compress using Legacy format (Linux kernel compression)\n");
DISPLAY( " -B# : Block size [4-7] (default : 7) \n");
DISPLAY( " -B# : cut file into blocks of size # bytes [32+] \n");
DISPLAY( " or predefined block size [4-7] (default: 7) \n");
DISPLAY( " -BD : Block dependency (improve compression ratio) \n");
DISPLAY( " -BX : enable block checksum (default:disabled) \n");
DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n");
@ -146,8 +147,6 @@ static int usage_advanced(const char* exeName)
DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n");
DISPLAY( " -e# : test all compression levels from -bX to # (default : 1)\n");
DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s) \n");
DISPLAY( " -B# : cut file into independent blocks of size # bytes [32+] \n");
DISPLAY( " or predefined block size [4-7] (default: 7) \n");
if (g_lz4c_legacy_commands) {
DISPLAY( "Legacy arguments : \n");
DISPLAY( " -c0 : fast compression \n");
@ -497,11 +496,12 @@ int main(int argc, const char** argv)
DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
} else {
if (B < 32) badusage(exeName);
BMK_setBlockSize(B);
if (B >= 1024) {
DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10));
blockSize = LZ4IO_setBlockSize(B);
BMK_setBlockSize(blockSize);
if (blockSize >= 1024) {
DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
} else {
DISPLAYLEVEL(2, "bench: using blocks of size %u bytes \n", (U32)(B));
DISPLAYLEVEL(2, "using blocks of size %u bytes \n", (U32)(blockSize));
}
}
break;

View File

@ -110,6 +110,7 @@ static clock_t g_time = 0;
static int g_overwrite = 1;
static int g_testMode = 0;
static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
static size_t g_blockSize = 0;
static int g_blockChecksum = 0;
static int g_streamChecksum = 1;
static int g_blockIndependence = 1;
@ -178,7 +179,25 @@ size_t LZ4IO_setBlockSizeID(unsigned bsid)
static const unsigned maxBlockSizeID = 7;
if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0;
g_blockSizeId = bsid;
return blockSizeTable[g_blockSizeId-minBlockSizeID];
g_blockSize = blockSizeTable[g_blockSizeId-minBlockSizeID];
return g_blockSize;
}
size_t LZ4IO_setBlockSize(size_t blockSize)
{
static const size_t minBlockSize = 32;
static const size_t maxBlockSize = 4 MB;
unsigned bsid = 0;
if (blockSize < minBlockSize) blockSize = minBlockSize;
if (blockSize > maxBlockSize) blockSize = maxBlockSize;
g_blockSize = blockSize;
blockSize--;
/* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */
while (blockSize >>= 2)
bsid++;
if (bsid < 7) bsid = 7;
g_blockSizeId = bsid-3;
return g_blockSize;
}
int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode)
@ -237,7 +256,6 @@ void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
** ********************** LZ4 File / Pipe compression ********************* **
** ************************************************************************ */
static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
}
@ -499,7 +517,7 @@ static LZ4F_CDict* LZ4IO_createCDict(void) {
static cRess_t LZ4IO_createCResources(void)
{
const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
const size_t blockSize = g_blockSize;
cRess_t ress;
LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION);
@ -543,7 +561,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
void* const srcBuffer = ress.srcBuffer;
void* const dstBuffer = ress.dstBuffer;
const size_t dstBufferSize = ress.dstBufferSize;
const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
const size_t blockSize = g_blockSize;
size_t readSize;
LZ4F_compressionContext_t ctx = ress.ctx; /* just a pointer */
LZ4F_preferences_t prefs;

View File

@ -78,6 +78,10 @@ int LZ4IO_setTestMode(int yes);
return : 0 if error, blockSize if OK */
size_t LZ4IO_setBlockSizeID(unsigned blockSizeID);
/* blockSize : valid values : 32 -> 4MB
return : 0 if error, actual blocksize if OK */
size_t LZ4IO_setBlockSize(size_t blockSize);
/* Default setting : independent blocks */
typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t;
int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode);

View File

@ -63,7 +63,7 @@ NB_LOOPS ?= -i1
default: all
all: fullbench fuzzer frametest roundTripTest datagen
all: fullbench fuzzer frametest roundTripTest datagen checkFrame
all32: CFLAGS+=-m32
all32: all
@ -109,6 +109,9 @@ roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c
datagen : $(PRGDIR)/datagen.c datagencli.c
$(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT)
checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
clean:
@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
@ -118,7 +121,8 @@ clean:
fuzzer$(EXT) fuzzer32$(EXT) \
frametest$(EXT) frametest32$(EXT) \
fasttest$(EXT) roundTripTest$(EXT) \
datagen$(EXT) checkTag$(EXT)
datagen$(EXT) checkTag$(EXT) \
frameTest$(EXT)
@rm -fR $(TESTDIR)
@echo Cleaning completed

306
tests/checkFrame.c Normal file
View File

@ -0,0 +1,306 @@
/*
checkFrame - verify frame headers
Copyright (C) Yann Collet 2014-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- LZ4 homepage : http://www.lz4.org
- LZ4 source repository : https://github.com/lz4/lz4
*/
/*-************************************
* Compiler specific
**************************************/
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */
#endif
/*-************************************
* Includes
**************************************/
#include "util.h" /* U32 */
#include <stdlib.h> /* malloc, free */
#include <stdio.h> /* fprintf */
#include <string.h> /* strcmp */
#include <time.h> /* clock_t, clock(), CLOCKS_PER_SEC */
#include <assert.h>
#include "lz4frame.h" /* include multiple times to test correctness/safety */
#include "lz4frame.h"
#define LZ4F_STATIC_LINKING_ONLY
#include "lz4frame.h"
#include "lz4frame.h"
#include "lz4.h" /* LZ4_VERSION_STRING */
#define XXH_STATIC_LINKING_ONLY
#include "xxhash.h" /* XXH64 */
/*-************************************
* Constants
**************************************/
#define KB *(1U<<10)
#define MB *(1U<<20)
#define GB *(1U<<30)
/*-************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
/**************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
return(error); \
}
/*-***************************************
* Local Parameters
*****************************************/
static U32 no_prompt = 0;
static U32 displayLevel = 2;
static U32 use_pause = 0;
/*-*******************************************************
* Fuzzer functions
*********************************************************/
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
typedef struct {
void* srcBuffer;
size_t srcBufferSize;
void* dstBuffer;
size_t dstBufferSize;
LZ4F_decompressionContext_t ctx;
} cRess_t;
static int createCResources(cRess_t *ress)
{
ress->srcBufferSize = 4 MB;
ress->srcBuffer = malloc(ress->srcBufferSize);
ress->dstBufferSize = 4 MB;
ress->dstBuffer = malloc(ress->dstBufferSize);
if (!ress->srcBuffer || !ress->dstBuffer) {
free(ress->srcBuffer);
free(ress->dstBuffer);
EXM_THROW(20, "Allocation error : not enough memory");
}
if (LZ4F_isError( LZ4F_createDecompressionContext(&(ress->ctx), LZ4F_VERSION) )) {
free(ress->srcBuffer);
free(ress->dstBuffer);
EXM_THROW(21, "Unable to create decompression context");
}
return 0;
}
static void freeCResources(cRess_t ress)
{
free(ress.srcBuffer);
free(ress.dstBuffer);
(void) LZ4F_freeDecompressionContext(ress.ctx);
}
int frameCheck(cRess_t ress, FILE* const srcFile, unsigned bsid, size_t blockSize)
{
LZ4F_errorCode_t nextToLoad = 0;
size_t curblocksize = 0;
int partialBlock = 0;
/* Main Loop */
for (;;) {
size_t readSize;
size_t pos = 0;
size_t decodedBytes = ress.dstBufferSize;
size_t remaining;
LZ4F_frameInfo_t frameInfo;
/* Read input */
readSize = fread(ress.srcBuffer, 1, ress.srcBufferSize, srcFile);
if (!readSize) break; /* reached end of file or stream */
while (pos < readSize) { /* still to read */
/* Decode Input (at least partially) */
if (!nextToLoad) {
/* LZ4F_decompress returned 0 : starting new frame */
curblocksize = 0;
remaining = readSize - pos;
nextToLoad = LZ4F_getFrameInfo(ress.ctx, &frameInfo, (char*)(ress.srcBuffer)+pos, &remaining);
if (LZ4F_isError(nextToLoad)) EXM_THROW(22, "Error getting frame info: %s", LZ4F_getErrorName(nextToLoad)); /* XXX */
if (frameInfo.blockSizeID != bsid) EXM_THROW(23, "Block size ID %u != expected %u", frameInfo.blockSizeID, bsid);
pos += remaining;
/* nextToLoad should be block header size */
remaining = nextToLoad;
decodedBytes = ress.dstBufferSize;
nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
pos += remaining;
}
decodedBytes = ress.dstBufferSize;
/* nextToLoad should be just enough to cover the next block */
if (nextToLoad > (readSize - pos)) {
/* block is not fully contained in current buffer */
partialBlock = 1;
remaining = readSize - pos;
} else {
if (partialBlock) {
partialBlock = 0;
}
remaining = nextToLoad;
}
nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
curblocksize += decodedBytes;
pos += remaining;
if (!partialBlock) {
/* detect small block due to end of frame; the final 4-byte frame checksum could be left in the buffer */
if ((curblocksize != 0) && (nextToLoad > 4)) {
if (curblocksize != blockSize)
EXM_THROW(25, "Block size %zu != expected %zu, pos %zu\n", curblocksize, blockSize, pos);
}
curblocksize = 0;
}
}
}
/* can be out because readSize == 0, which could be an fread() error */
if (ferror(srcFile)) EXM_THROW(26, "Read error");
if (nextToLoad!=0) EXM_THROW(27, "Unfinished stream");
return 0;
}
int FUZ_usage(const char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [args] filename\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -b# : expected blocksizeID [4-7] (required)\n");
DISPLAY( " -B# : expected blocksize [32-4194304] (required)\n");
DISPLAY( " -v : verbose\n");
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, const char** argv)
{
int argNb;
int bsid=0;
size_t blockSize=0;
const char* const programName = argv[0];
/* Check command line */
for (argNb=1; argNb<argc; argNb++) {
const char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Decode command (note : aggregated short commands are allowed) */
if (argument[0]=='-') {
if (!strcmp(argument, "--no-prompt")) {
no_prompt=1;
displayLevel=1;
continue;
}
argument++;
while (*argument!=0) {
switch(*argument)
{
case 'h':
return FUZ_usage(programName);
case 'v':
argument++;
displayLevel++;
break;
case 'q':
argument++;
displayLevel--;
break;
case 'p': /* pause at the end */
argument++;
use_pause = 1;
break;
case 'b':
argument++;
bsid=0;
while ((*argument>='0') && (*argument<='9')) {
bsid *= 10;
bsid += *argument - '0';
argument++;
}
break;
case 'B':
argument++;
blockSize=0;
while ((*argument>='0') && (*argument<='9')) {
blockSize *= 10;
blockSize += *argument - '0';
argument++;
}
break;
default:
;
return FUZ_usage(programName);
}
}
} else {
int err;
FILE *srcFile;
cRess_t ress;
if (bsid == 0 || blockSize == 0)
return FUZ_usage(programName);
DISPLAY("Starting frame checker (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING);
err = createCResources(&ress);
if (err) return (err);
srcFile = fopen(argument, "rb");
if ( srcFile==NULL ) {
freeCResources(ress);
EXM_THROW(1, "%s: %s \n", argument, strerror(errno));
}
err = frameCheck(ress, srcFile, bsid, blockSize);
freeCResources(ress);
fclose(srcFile);
return (err);
}
}
return 0;
}

View File

@ -0,0 +1,72 @@
#/usr/bin/env sh
set -e
LZ4=../lz4
CHECKFRAME=./checkFrame
DATAGEN=./datagen
failures=""
TMPFILE=/tmp/test_custom_block_sizes.$$
TMPFILE1=/tmp/test_custom_block_sizes1.$$
TMPFILE2=/tmp/test_custom_block_sizes2.$$
$DATAGEN -g12345678 > $TMPFILE1
$DATAGEN -g12345678 > $TMPFILE2
echo Testing -B31
$LZ4 -f -B31 $TMPFILE1 && failures="31 (should fail) "
for blocksize in 32 65535 65536
do
echo Testing -B$blocksize
$LZ4 -f -B$blocksize $TMPFILE1
$LZ4 -f -B$blocksize $TMPFILE2
cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
$CHECKFRAME -B$blocksize -b4 $TMPFILE.lz4 || failures="$failures $blocksize "
done
for blocksize in 65537 262143 262144
do
echo Testing -B$blocksize
$LZ4 -f -B$blocksize $TMPFILE1
$LZ4 -f -B$blocksize $TMPFILE2
cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
$CHECKFRAME -B$blocksize -b5 $TMPFILE.lz4 || failures="$failures $blocksize "
done
for blocksize in 262145 1048575 1048576
do
echo Testing -B$blocksize
$LZ4 -f -B$blocksize $TMPFILE1
$LZ4 -f -B$blocksize $TMPFILE2
cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
$CHECKFRAME -B$blocksize -b6 $TMPFILE.lz4 || failures="$failures $blocksize "
done
for blocksize in 1048577 4194303 4194304
do
echo Testing -B$blocksize
$LZ4 -f -B$blocksize $TMPFILE1
$LZ4 -f -B$blocksize $TMPFILE2
cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
$CHECKFRAME -B$blocksize -b7 $TMPFILE.lz4 || failures="$failures $blocksize "
done
for blocksize in 4194305 10485760
do
echo Testing -B$blocksize
$LZ4 -f -B$blocksize $TMPFILE1
$LZ4 -f -B$blocksize $TMPFILE2
cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
$CHECKFRAME -B4194304 -b7 $TMPFILE.lz4 || failures="$failures $blocksize "
done
rm $TMPFILE.lz4 $TMPFILE1 $TMPFILE1.lz4 $TMPFILE2 $TMPFILE2.lz4
if [ "$failures" == "" ]
then
echo ---- All tests passed
exit 0
else
echo ---- The following tests had failures: $failures
exit 1
fi