Merge remote-tracking branch 'refs/remotes/lz4/dev' into dev

This commit is contained in:
Przemyslaw Skibinski 2016-11-06 11:11:55 +01:00
commit 112499a73a
9 changed files with 226 additions and 43 deletions

5
NEWS
View File

@ -1,10 +1,13 @@
v1.7.2
Changed : moved to versioning; package, cli and library have same version number
Improved: Small decompression speed boost (+4%)
Improved: Small decompression speed boost
Improved: Small compression speed improvement on 64-bits systems
Improved: Performance on ARMv6 and ARMv7
Added : Debianization, by Evgeniy Polyakov
Makefile: Generates object files (*.o) for faster (re)compilation on low power systems
Fix : cli : crash on some invalid inputs
Fix : cli : -t correctly validates lz4-compressed files, by Nick Terrell
Fix : better ratio on 64-bits big-endian targets
r131
New : Dos/DJGPP target, thanks to Louis Santillan (#114)

View File

@ -415,27 +415,32 @@ int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
{
if (tableType == byU16)
return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
else
return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
}
static const U64 prime5bytes = 889523592379ULL;
static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
static U32 LZ4_hashSequence64(U64 sequence, tableType_t const tableType)
{
static const U64 prime5bytes = 889523592379ULL;
static const U64 prime8bytes = 11400714785074694791ULL;
const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
const U32 hashMask = (1<<hashLog) - 1;
return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
if (LZ4_isLittleEndian())
return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
else
return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
}
static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
{
if (LZ4_64bits())
return LZ4_hashSequence64(sequence, tableType);
if (LZ4_64bits()) return LZ4_hashSequence64(sequence, tableType);
return LZ4_hashSequence((U32)sequence, tableType);
}
static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
static U32 LZ4_hashPosition(const void* p, tableType_t tableType)
{
return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType);
}
static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
{
@ -1361,7 +1366,7 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
lz4sd->prefixEnd += originalSize;
} else {
lz4sd->extDictSize = lz4sd->prefixSize;
lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
result = LZ4_decompress_generic(source, dest, 0, originalSize,
endOnOutputSize, full, 0,
usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);

View File

@ -45,13 +45,11 @@
extern "C" {
#endif
/*-************************************
* Dependency
**************************************/
/* --- Dependency --- */
#include <stddef.h> /* size_t */
/*-***************************************************************
* Export parameters
* Compiler specifics
*****************************************************************/
/*!
* LZ4_DLL_EXPORT :
@ -67,6 +65,15 @@ extern "C" {
# define LZ4FLIB_API
#endif
#if defined(_MSC_VER)
# define LZ4F_DEPRECATE(x) __declspec(deprecated) x
#elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
# define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
#else
# define LZ4F_DEPRECATE(x) x /* no deprecation warning for this compiler */
#endif
/*-************************************
* Error management
**************************************/
@ -81,7 +88,7 @@ LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /* return er
**************************************/
/* #define LZ4F_DISABLE_OBSOLETE_ENUMS */ /* uncomment to disable obsolete enums */
#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
# define LZ4F_OBSOLETE_ENUM(x) ,x
# define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
#else
# define LZ4F_OBSOLETE_ENUM(x)
#endif
@ -92,30 +99,30 @@ typedef enum {
LZ4F_max256KB=5,
LZ4F_max1MB=6,
LZ4F_max4MB=7
LZ4F_OBSOLETE_ENUM(max64KB = LZ4F_max64KB)
LZ4F_OBSOLETE_ENUM(max256KB = LZ4F_max256KB)
LZ4F_OBSOLETE_ENUM(max1MB = LZ4F_max1MB)
LZ4F_OBSOLETE_ENUM(max4MB = LZ4F_max4MB)
LZ4F_OBSOLETE_ENUM(max64KB)
LZ4F_OBSOLETE_ENUM(max256KB)
LZ4F_OBSOLETE_ENUM(max1MB)
LZ4F_OBSOLETE_ENUM(max4MB)
} LZ4F_blockSizeID_t;
typedef enum {
LZ4F_blockLinked=0,
LZ4F_blockIndependent
LZ4F_OBSOLETE_ENUM(blockLinked = LZ4F_blockLinked)
LZ4F_OBSOLETE_ENUM(blockIndependent = LZ4F_blockIndependent)
LZ4F_OBSOLETE_ENUM(blockLinked)
LZ4F_OBSOLETE_ENUM(blockIndependent)
} LZ4F_blockMode_t;
typedef enum {
LZ4F_noContentChecksum=0,
LZ4F_contentChecksumEnabled
LZ4F_OBSOLETE_ENUM(noContentChecksum = LZ4F_noContentChecksum)
LZ4F_OBSOLETE_ENUM(contentChecksumEnabled = LZ4F_contentChecksumEnabled)
LZ4F_OBSOLETE_ENUM(noContentChecksum)
LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
} LZ4F_contentChecksum_t;
typedef enum {
LZ4F_frame=0,
LZ4F_skippableFrame
LZ4F_OBSOLETE_ENUM(skippableFrame = LZ4F_skippableFrame)
LZ4F_OBSOLETE_ENUM(skippableFrame)
} LZ4F_frameType_t;
#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS

View File

@ -64,7 +64,7 @@ extern "C" {
ITEM(ERROR_headerChecksum_invalid) ITEM(ERROR_contentChecksum_invalid) \
ITEM(ERROR_maxCode)
//#define LZ4F_DISABLE_OLD_ENUMS /* uncomment to disable deprecated enums */
#define LZ4F_DISABLE_OLD_ENUMS /* comment to enable deprecated enums */
#ifndef LZ4F_DISABLE_OLD_ENUMS
# define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, ENUM = LZ4F_##ENUM,
#else

View File

@ -2,20 +2,20 @@ Command Line Interface for LZ4 library
============================================
Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
There are however other Makefile targets that create different variations of CLI:
- `lz4` : default CLI supporting gzip-like arguments
- `lz4c` : Same as `lz4` with additional support for decompression of legacy lz4 versions
There are also multiple targets that create different variations of CLI:
- `lz4` : default CLI, with a command line syntax close to gzip
- `lz4c` : Same as `lz4` with additional support legacy lz4 commands (incompatible with gzip)
- `lz4c32` : Same as `lz4c`, but forced to compile in 32-bits mode
#### Aggregation of parameters
CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
#### Benchmark in Command Line Interface
CLI includes in-memory compression benchmark module for lz4.
The benchmark is conducted using a given filename.
The benchmark is conducted using a given filename.
The file is read into memory.
It makes benchmark more precise as it eliminates I/O overhead.
@ -26,7 +26,7 @@ The `-i` parameter selects a number of iterations used for each of tested levels
#### Usage of Command Line Interface
The full list of options can be obtained with `-h` or `-H` parameter:
The full list of commands can be obtained with `-h` or `-H` parameter:
```
Usage :
lz4 [arg] [input] [output]

View File

@ -50,12 +50,16 @@ The native file format is the
format.
.B lz4
supports a command line syntax similar but not identical to
supports a command line syntax similar \fIbut not identical\fR to
.BR gzip (1).
Differences are :
\fBlz4\fR preserve original files ;
\fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2 ;
\fBlz4 file\fR shows real-time statistics during compression .
\fBlz4\fR preserves original files
\fBlz4\fR compresses a single file by default (use \fB-m\fR for multiple files)
\fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2
When no destination name is provided, compressed file name receives a \fB.lz4\fR suffix
When no destination name is provided, if \fBstdout\fR is \fInot\fR the console, it becomes the output (like a silent \fB-c\fR)
Therefore \fBlz4 file > /dev/null\fR will not create \fBfile.lz4\fR
\fBlz4 file\fR shows real-time statistics during compression (use \fB-q\fR to silent them)
Default behaviors can be modified by opt-in commands, described below.
\fBlz4 --quiet --multiple\fR more closely mimics \fBgzip\fR behavior.
@ -121,7 +125,7 @@ Decompress.
.B --decompress
is also the default operation when the input filename has an
.B .lz4
extensionq
extension.
.TP
.BR \-t ", " \-\-test
Test the integrity of compressed
@ -129,6 +133,11 @@ Test the integrity of compressed
files.
The decompressed data is discarded.
No files are created nor removed.
.TP
.BR \-b#
Benchmark mode, using # compression level.
.
.SS "Operation modifiers"
.TP
@ -215,12 +224,22 @@ hence for a file. It won't work with unknown source size, such as stdin or pipe.
.BR \-k ", " \--keep
Don't delete source file.
This is default behavior anyway, so this option is just for compatibility with gzip/xz.
.
.SS "Benchmark mode"
.TP
.B \-b
benchmark file(s)
.B \-b#
benchmark file(s), using # compression level
.TP
.B \-e#
benchmark multiple compression levels, from b# to e# (included)
.TP
.B \-i#
iteration loops [1-9](default : 3), benchmark mode only
minimum evaluation in seconds [1-9] (default : 3)
.TP
.B \-r
operate recursively on directories
.SH BUGS
Report bugs at: https://github.com/Cyan4973/lz4/issues

View File

@ -569,7 +569,7 @@ int main(int argc, const char** argv)
if (multiple_inputs)
operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION);
else
DEFAULT_DECOMPRESSOR(input_filename, output_filename);
operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename);
} else {
/* compression is default action */
if (legacy_format) {
@ -579,7 +579,7 @@ int main(int argc, const char** argv)
if (multiple_inputs)
operationResult = LZ4IO_compressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION, cLevel);
else
DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel);
operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel);
}
}

View File

@ -97,6 +97,9 @@ frametest: $(LZ4DIR)/lz4frame.o $(LZ4DIR)/lz4.o $(LZ4DIR)/lz4hc.o $(LZ4DIR)/xxha
frametest32: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxhash.c frametest.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
fasttest: $(LZ4DIR)/lz4.o fasttest.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
datagen : $(PRGDIR)/datagen.c datagencli.c
$(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT)
@ -119,7 +122,7 @@ versionsTest:
#FreeBSD targets
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD))
test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-mem
test: test-lz4 test-lz4c test-fasttest test-frametest test-fullbench test-fuzzer test-mem
test32: test-lz4c32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32
@ -220,6 +223,11 @@ test-lz4: lz4 datagen test-lz4-basic test-lz4-multiple test-lz4-sparse test-lz4-
./datagen | $(PRGDIR)/lz4 -tf && false || true
./datagen | $(PRGDIR)/lz4 -d > $(VOID) && false || true
./datagen | $(PRGDIR)/lz4 -df > $(VOID)
@echo "\n ---- test cli ----"
$(PRGDIR)/lz4 file-does-not-exist && false || true
$(PRGDIR)/lz4 -f file-does-not-exist && false || true
$(PRGDIR)/lz4 -fm file1-dne file2-dne && false || true
$(PRGDIR)/lz4 -fm file1-dne file2-dne && false || true
test-lz4c: lz4c datagen
@echo "\n ---- test lz4c version ----"
@ -262,6 +270,9 @@ test-frametest: frametest
test-frametest32: frametest32
./frametest32 $(FUZZER_TIME)
test-fasttest: fasttest
./fasttest
test-mem: lz4 datagen fuzzer frametest fullbench
@echo "\n ---- valgrind tests : memory analyzer ----"
valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)

138
tests/fasttest.c Normal file
View File

@ -0,0 +1,138 @@
/**************************************
* Compiler Options
**************************************/
#ifdef _MSC_VER /* Visual Studio */
# define _CRT_SECURE_NO_WARNINGS // for MSVC
# define snprintf sprintf_s
#endif
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
#endif
/**************************************
* Includes
**************************************/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "lz4.h"
/* Returns non-zero on failure. */
int test_compress(const char *input, int inSize, char *output, int outSize)
{
LZ4_stream_t lz4Stream_body = { 0 };
LZ4_stream_t* lz4Stream = &lz4Stream_body;
int inOffset = 0;
int outOffset = 0;
if (inSize & 3) return -1;
while (inOffset < inSize) {
const int length = inSize >> 2;
if (inSize > 1024) return -2;
if (outSize - (outOffset + 8) < LZ4_compressBound(length)) return -3;
{
const int outBytes = LZ4_compress_continue(
lz4Stream, input + inOffset, output + outOffset + 8, length);
if(outBytes <= 0) return -4;
memcpy(output + outOffset, &length, 4); /* input length */
memcpy(output + outOffset + 4, &outBytes, 4); /* output length */
inOffset += length;
outOffset += outBytes + 8;
}
}
if (outOffset + 8 > outSize) return -5;
memset(output + outOffset, 0, 4);
memset(output + outOffset + 4, 0, 4);
return 0;
}
void swap(void **a, void **b) {
void *tmp = *a;
*a = *b;
*b = tmp;
}
/* Returns non-zero on failure. Not a safe function. */
int test_decompress(const char *uncompressed, const char *compressed)
{
char outBufferA[1024];
char spacing; /* So prefixEnd != dest */
char outBufferB[1024];
char *output = outBufferA;
char *lastOutput = outBufferB;
LZ4_streamDecode_t lz4StreamDecode_body = { 0 };
LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;
int offset = 0;
int unOffset = 0;
int lastBytes = 0;
(void)spacing;
for(;;) {
int32_t bytes;
int32_t unBytes;
/* Read uncompressed size and compressed size */
memcpy(&unBytes, compressed + offset, 4);
memcpy(&bytes, compressed + offset + 4, 4);
offset += 8;
/* Check if we reached end of stream or error */
if(bytes == 0 && unBytes == 0) return 0;
if(bytes <= 0 || unBytes <= 0 || unBytes > 1024) return 1;
/* Put the last output in the dictionary */
LZ4_setStreamDecode(lz4StreamDecode, lastOutput, lastBytes);
/* Decompress */
bytes = LZ4_decompress_fast_continue(
lz4StreamDecode, compressed + offset, output, unBytes);
if(bytes <= 0) return 2;
/* Check result */
{
int r = memcmp(uncompressed + unOffset, output, unBytes);
if (r) return 3;
}
swap((void**)&output, (void**)&lastOutput);
offset += bytes;
unOffset += unBytes;
lastBytes = unBytes;
}
}
int main(int argc, char **argv)
{
char input[] =
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello!"
"Hello Hello Hello Hello Hello Hello Hello Hello";
char output[LZ4_COMPRESSBOUND(4096)];
int r;
(void)argc;
(void)argv;
if ((r = test_compress(input, sizeof(input), output, sizeof(output)))) {
return r;
}
if ((r = test_decompress(input, output))) {
return r;
}
return 0;
}