commit
2d4fed5ed2
@ -3,19 +3,21 @@ compiler: gcc
|
||||
script: make test-travis
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gcc-arm-linux-gnueabi
|
||||
- sudo apt-get install -qq clang
|
||||
- sudo apt-get install -qq g++-multilib
|
||||
- sudo apt-get install -qq gcc-multilib
|
||||
- sudo apt-get install -qq valgrind
|
||||
|
||||
env:
|
||||
- LZ4_TRAVIS_CI_ENV=dist
|
||||
- LZ4_TRAVIS_CI_ENV=travis-install
|
||||
- LZ4_TRAVIS_CI_ENV=streaming-examples
|
||||
- LZ4_TRAVIS_CI_ENV=cmake
|
||||
- LZ4_TRAVIS_CI_ENV=gpptest
|
||||
- LZ4_TRAVIS_CI_ENV=clangtest
|
||||
- LZ4_TRAVIS_CI_ENV=staticAnalyze
|
||||
- LZ4_TRAVIS_CI_ENV=dist
|
||||
- LZ4_TRAVIS_CI_ENV=gpptest
|
||||
- LZ4_TRAVIS_CI_ENV=armtest
|
||||
- LZ4_TRAVIS_CI_ENV=test-lz4
|
||||
- LZ4_TRAVIS_CI_ENV=test-lz4c
|
||||
- LZ4_TRAVIS_CI_ENV=test-lz4c32
|
||||
|
27
Makefile
27
Makefile
@ -53,7 +53,7 @@ TEXT = $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4.h $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4hc.h \
|
||||
$(PRGDIR)/datagen.c $(PRGDIR)/datagen.h $(PRGDIR)/datagencli.c $(PRGDIR)/fuzzer.c \
|
||||
$(PRGDIR)/lz4io.c $(PRGDIR)/lz4io.h \
|
||||
$(PRGDIR)/bench.c $(PRGDIR)/bench.h \
|
||||
$(PRGDIR)/lz4.1 $(PRGDIR)/lz4c.1 $(PRGDIR)/lz4cat.1 \
|
||||
$(PRGDIR)/lz4.1 \
|
||||
$(PRGDIR)/Makefile $(PRGDIR)/COPYING
|
||||
NONTEXT = images/image00.png images/image01.png images/image02.png \
|
||||
images/image03.png images/image04.png images/image05.png \
|
||||
@ -68,6 +68,13 @@ else
|
||||
TRAVIS_TARGET=$(LZ4_TRAVIS_CI_ENV)
|
||||
endif
|
||||
|
||||
# Define nul output
|
||||
ifneq (,$(filter Windows%,$(OS)))
|
||||
VOID = nul
|
||||
else
|
||||
VOID = /dev/null
|
||||
endif
|
||||
|
||||
|
||||
default: lz4programs
|
||||
|
||||
@ -79,10 +86,10 @@ lz4programs:
|
||||
@cd $(PRGDIR); $(MAKE) -e
|
||||
|
||||
clean:
|
||||
@rm -f $(DISTRIBNAME) *.sha1
|
||||
@cd $(PRGDIR); $(MAKE) clean
|
||||
@cd $(LZ4DIR); $(MAKE) clean
|
||||
@cd examples; $(MAKE) clean
|
||||
@rm -f $(DISTRIBNAME) *.sha1 > $(VOID)
|
||||
@cd $(PRGDIR); $(MAKE) clean > $(VOID)
|
||||
@cd $(LZ4DIR); $(MAKE) clean > $(VOID)
|
||||
@cd examples; $(MAKE) clean > $(VOID)
|
||||
@echo Cleaning completed
|
||||
|
||||
|
||||
@ -127,15 +134,19 @@ test-travis: $(TRAVIS_TARGET)
|
||||
cmake:
|
||||
@cd cmake_unofficial; cmake CMakeLists.txt; $(MAKE)
|
||||
|
||||
gpptest: clean
|
||||
export CC=g++; export CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align"; $(MAKE) -e all
|
||||
|
||||
clangtest: clean
|
||||
export CC=clang; $(MAKE) all
|
||||
|
||||
staticAnalyze: clean
|
||||
export CFLAGS=-g; scan-build -v $(MAKE) all
|
||||
|
||||
gpptest: clean
|
||||
export CC=g++; export CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align"; $(MAKE) -e all
|
||||
|
||||
armtest: clean
|
||||
export CC=arm-linux-gnueabi-gcc; cd lib; $(MAKE) -e all
|
||||
export CC=arm-linux-gnueabi-gcc; cd programs; $(MAKE) -e bins
|
||||
|
||||
streaming-examples:
|
||||
cd examples; $(MAKE) -e test
|
||||
|
||||
|
3
NEWS
3
NEWS
@ -7,9 +7,10 @@ New : lz4frame & lz4cli frame content size support
|
||||
New : lz4frame supports skippable frames
|
||||
Changed:Default "make install" directory is /usr/local
|
||||
New : lz4 cli supports "pass-through" mode
|
||||
New : g++ compatibility tests
|
||||
New : datagen can generate sparse files
|
||||
New : scan-build tests
|
||||
New : g++ compatibility tests
|
||||
New : arm cross-compilation test
|
||||
Fixed : Fuzzer + frametest compatibility with NetBSD (issue #48)
|
||||
Added : Visual project directory
|
||||
Updated:Man page & Specification
|
||||
|
@ -54,3 +54,7 @@ Benchmark evaluates the compression of reference [Silesia Corpus](http://sun.aei
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
The LZ4 block compression format is detailed within [lz4_block_format.txt](lz4_block_format.txt).
|
||||
|
||||
For streaming unknown amount of data and compress files of any size, a frame format has been published, and can be consulted within the file LZ4_Frame_Format.html .
|
||||
|
||||
|
8
examples/README.md
Normal file
8
examples/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
# LZ4 examples
|
||||
|
||||
## Documents
|
||||
|
||||
- [Streaming API Basics](streaming_api_basics.md)
|
||||
- Examples
|
||||
- [Double Buffer](blockStreaming_doubleBuffer.md)
|
||||
- [Line by Line Text Compression](blockStreaming_lineByLine.md)
|
100
examples/blockStreaming_doubleBuffer.md
Normal file
100
examples/blockStreaming_doubleBuffer.md
Normal file
@ -0,0 +1,100 @@
|
||||
# LZ4 Streaming API Example : Double Buffer
|
||||
by *Takayuki Matsuoka*
|
||||
|
||||
`blockStreaming_doubleBuffer.c` is LZ4 Straming API example which implements double buffer (de)compression.
|
||||
|
||||
Please note :
|
||||
|
||||
- Firstly, read "LZ4 Streaming API Basics".
|
||||
- This is relatively advanced application example.
|
||||
- Output file is not compatible with lz4frame and platform dependent.
|
||||
|
||||
|
||||
## What's the point of this example ?
|
||||
|
||||
- Handle huge file in small amount of memory
|
||||
- Always better compression ratio than Block API
|
||||
- Uniform block size
|
||||
|
||||
|
||||
## How the compression works
|
||||
|
||||
First of all, allocate "Double Buffer" for input and LZ4 compressed data buffer for output.
|
||||
Double buffer has two pages, "first" page (Page#1) and "second" page (Page#2).
|
||||
|
||||
```
|
||||
Double Buffer
|
||||
|
||||
Page#1 Page#2
|
||||
+---------+---------+
|
||||
| Block#1 | |
|
||||
+----+----+---------+
|
||||
|
|
||||
v
|
||||
{Out#1}
|
||||
|
||||
|
||||
Prefix Dependency
|
||||
+---------+
|
||||
| |
|
||||
v |
|
||||
+---------+----+----+
|
||||
| Block#1 | Block#2 |
|
||||
+---------+----+----+
|
||||
|
|
||||
v
|
||||
{Out#2}
|
||||
|
||||
|
||||
External Dictionary Mode
|
||||
+---------+
|
||||
| |
|
||||
| v
|
||||
+----+----+---------+
|
||||
| Block#3 | Block#2 |
|
||||
+----+----+---------+
|
||||
|
|
||||
v
|
||||
{Out#3}
|
||||
|
||||
|
||||
Prefix Dependency
|
||||
+---------+
|
||||
| |
|
||||
v |
|
||||
+---------+----+----+
|
||||
| Block#3 | Block#4 |
|
||||
+---------+----+----+
|
||||
|
|
||||
v
|
||||
{Out#4}
|
||||
```
|
||||
|
||||
Next, read first block to double buffer's first page. And compress it by `LZ4_compress_continue()`.
|
||||
For the first time, LZ4 doesn't know any previous dependencies,
|
||||
so it just compress the line without dependencies and generates compressed block {Out#1} to LZ4 compressed data buffer.
|
||||
After that, write {Out#1} to the file.
|
||||
|
||||
Next, read second block to double buffer's second page. And compress it.
|
||||
In this time, LZ4 can use dependency to Block#1 to improve compression ratio.
|
||||
This dependency is called "Prefix mode".
|
||||
|
||||
Next, read third block to double buffer's *first* page. And compress it.
|
||||
Also this time, LZ4 can use dependency to Block#2.
|
||||
This dependency is called "External Dictonaly mode".
|
||||
|
||||
Continue these procedure to the end of the file.
|
||||
|
||||
|
||||
## How the decompression works
|
||||
|
||||
Decompression will do reverse order.
|
||||
|
||||
- Read first compressed block.
|
||||
- Decompress it to the first page and write that page to the file.
|
||||
- Read second compressed block.
|
||||
- Decompress it to the second page and write that page to the file.
|
||||
- Read third compressed block.
|
||||
- Decompress it to the *first* page and write that page to the file.
|
||||
|
||||
Continue these procedure to the end of the compressed file.
|
122
examples/blockStreaming_lineByLine.md
Normal file
122
examples/blockStreaming_lineByLine.md
Normal file
@ -0,0 +1,122 @@
|
||||
# LZ4 Streaming API Example : Line by Line Text Compression
|
||||
by *Takayuki Matsuoka*
|
||||
|
||||
`blockStreaming_lineByLine.c` is LZ4 Straming API example which implements line by line incremental (de)compression.
|
||||
|
||||
Please note the following restrictions :
|
||||
|
||||
- Firstly, read "LZ4 Streaming API Basics".
|
||||
- This is relatively advanced application example.
|
||||
- Output file is not compatible with lz4frame and platform dependent.
|
||||
|
||||
|
||||
## What's the point of this example ?
|
||||
|
||||
- Line by line incremental (de)compression.
|
||||
- Handle huge file in small amount of memory
|
||||
- Generally better compression ratio than Block API
|
||||
- Non-uniform block size
|
||||
|
||||
|
||||
## How the compression works
|
||||
|
||||
First of all, allocate "Ring Buffer" for input and LZ4 compressed data buffer for output.
|
||||
|
||||
```
|
||||
(1)
|
||||
Ring Buffer
|
||||
|
||||
+--------+
|
||||
| Line#1 |
|
||||
+---+----+
|
||||
|
|
||||
v
|
||||
{Out#1}
|
||||
|
||||
|
||||
(2)
|
||||
Prefix Mode Dependency
|
||||
+----+
|
||||
| |
|
||||
v |
|
||||
+--------+-+------+
|
||||
| Line#1 | Line#2 |
|
||||
+--------+---+----+
|
||||
|
|
||||
v
|
||||
{Out#2}
|
||||
|
||||
|
||||
(3)
|
||||
Prefix Prefix
|
||||
+----+ +----+
|
||||
| | | |
|
||||
v | v |
|
||||
+--------+-+------+-+------+
|
||||
| Line#1 | Line#2 | Line#3 |
|
||||
+--------+--------+---+----+
|
||||
|
|
||||
v
|
||||
{Out#3}
|
||||
|
||||
|
||||
(4)
|
||||
External Dictionary Mode
|
||||
+----+ +----+
|
||||
| | | |
|
||||
v | v |
|
||||
------+--------+-+------+-+--------+
|
||||
| .... | Line#X | Line#X+1 |
|
||||
------+--------+--------+-----+----+
|
||||
^ |
|
||||
| v
|
||||
| {Out#X+1}
|
||||
|
|
||||
Reset
|
||||
|
||||
|
||||
(5)
|
||||
Prefix
|
||||
+-----+
|
||||
| |
|
||||
v |
|
||||
------+--------+--------+----------+--+-------+
|
||||
| .... | Line#X | Line#X+1 | Line#X+2 |
|
||||
------+--------+--------+----------+-----+----+
|
||||
^ |
|
||||
| v
|
||||
| {Out#X+2}
|
||||
|
|
||||
Reset
|
||||
```
|
||||
|
||||
Next (see (1)), read first line to ringbuffer and compress it by `LZ4_compress_continue()`.
|
||||
For the first time, LZ4 doesn't know any previous dependencies,
|
||||
so it just compress the line without dependencies and generates compressed line {Out#1} to LZ4 compressed data buffer.
|
||||
After that, write {Out#1} to the file and forward ringbuffer offset.
|
||||
|
||||
Do the same things to second line (see (2)).
|
||||
But in this time, LZ4 can use dependency to Line#1 to improve compression ratio.
|
||||
This dependency is called "Prefix mode".
|
||||
|
||||
Eventually, we'll reach end of ringbuffer at Line#X (see (4)).
|
||||
This time, we should reset ringbuffer offset.
|
||||
After resetting, at Line#X+1 pointer is not adjacent, but LZ4 still maintain its memory.
|
||||
This is called "External Dictionary Mode".
|
||||
|
||||
In Line#X+2 (see (5)), finally LZ4 forget almost all memories but still remains Line#X+1.
|
||||
This is the same situation as Line#2.
|
||||
|
||||
Continue these procedure to the end of text file.
|
||||
|
||||
|
||||
## How the decompression works
|
||||
|
||||
Decompression will do reverse order.
|
||||
|
||||
- Read compressed line from the file to buffer.
|
||||
- Decompress it to the ringbuffer.
|
||||
- Output decompressed plain text line to the file.
|
||||
- Forward ringbuffer offset. If offset exceedes end of the ringbuffer, reset it.
|
||||
|
||||
Continue these procedure to the end of the compressed file.
|
87
examples/streaming_api_basics.md
Normal file
87
examples/streaming_api_basics.md
Normal file
@ -0,0 +1,87 @@
|
||||
# LZ4 Streaming API Basics
|
||||
by *Takayuki Matsuoka*
|
||||
## LZ4 API sets
|
||||
|
||||
LZ4 has the following API sets :
|
||||
|
||||
- "Auto Framing" API (lz4frame.h) :
|
||||
This is most recommended API for usual application.
|
||||
It guarantees interoperability with other LZ4 framing format compliant tools/libraries
|
||||
such as LZ4 command line utility, node-lz4, etc.
|
||||
- "Block" API : This is recommended for simple purpose.
|
||||
It compress single raw memory block to LZ4 memory block and vice versa.
|
||||
- "Streaming" API : This is designed for complex thing.
|
||||
For example, compress huge stream data in restricted memory environment.
|
||||
|
||||
Basically, you should use "Auto Framing" API.
|
||||
But if you want to write advanced application, it's time to use Block or Streaming APIs.
|
||||
|
||||
|
||||
## What is difference between Block and Streaming API ?
|
||||
|
||||
Block API (de)compresses single contiguous memory block.
|
||||
In other words, LZ4 library find redundancy from single contiguous memory block.
|
||||
Streaming API does same thing but (de)compress multiple adjacent contiguous memory block.
|
||||
So LZ4 library could find more redundancy than Block API.
|
||||
|
||||
The following figure shows difference between API and block sizes.
|
||||
In these figures, original data is splitted to 4KiBytes contiguous chunks.
|
||||
|
||||
```
|
||||
Original Data
|
||||
+---------------+---------------+----+----+----+
|
||||
| 4KiB Chunk A | 4KiB Chunk B | C | D |... |
|
||||
+---------------+---------------+----+----+----+
|
||||
|
||||
Example (1) : Block API, 4KiB Block
|
||||
+---------------+---------------+----+----+----+
|
||||
| 4KiB Chunk A | 4KiB Chunk B | C | D |... |
|
||||
+---------------+---------------+----+----+----+
|
||||
| Block #1 | Block #2 | #3 | #4 |... |
|
||||
+---------------+---------------+----+----+----+
|
||||
|
||||
(No Dependency)
|
||||
|
||||
|
||||
Example (2) : Block API, 8KiB Block
|
||||
+---------------+---------------+----+----+----+
|
||||
| 4KiB Chunk A | 4KiB Chunk B | C | D |... |
|
||||
+---------------+---------------+----+----+----+
|
||||
| Block #1 |Block #2 |... |
|
||||
+--------------------+----------+-------+-+----+
|
||||
^ | ^ |
|
||||
| | | |
|
||||
+--------------+ +----+
|
||||
Internal Dependency Internal Dependency
|
||||
|
||||
|
||||
Example (3) : Streaming API, 4KiB Block
|
||||
+---------------+---------------+-----+----+----+
|
||||
| 4KiB Chunk A | 4KiB Chunk B | C | D |... |
|
||||
+---------------+---------------+-----+----+----+
|
||||
| Block #1 | Block #2 | #3 | #4 |... |
|
||||
+---------------+----+----------+-+---+-+--+----+
|
||||
^ | ^ | ^ |
|
||||
| | | | | |
|
||||
+--------------+ +--------+ +---+
|
||||
Dependency Dependency Dependency
|
||||
```
|
||||
|
||||
- In example (1), there is no dependency.
|
||||
All blocks are compressed independently.
|
||||
- In example (2), naturally 8KiBytes block has internal dependency.
|
||||
But still block #1 and #2 are compressed independently.
|
||||
- In example (3), block #2 has dependency to #1,
|
||||
also #3 has dependency to #2 and #1, #4 has #3, #2 and #1, and so on.
|
||||
|
||||
Here, we can observe difference between example (2) and (3).
|
||||
In (2), there's no dependency between chunk B and C, but (3) has dependency between B and C.
|
||||
This dependency improves compression ratio.
|
||||
|
||||
|
||||
## Restriction of Streaming API
|
||||
|
||||
For the efficiency, Streaming API doesn't keep mirror copy of dependent (de)compressed memory.
|
||||
This means users should keep these dependent (de)compressed memory explicitly.
|
||||
Usually, "Dependent memory" is previous adjacent contiguous memory up to 64KiBytes.
|
||||
LZ4 will not access further memories.
|
@ -1321,7 +1321,7 @@ void* LZ4_create (const char* inputBuffer)
|
||||
char* LZ4_slideInputBuffer (void* LZ4_Data)
|
||||
{
|
||||
LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data;
|
||||
int dictSize = LZ4_saveDict((LZ4_stream_t*)ctx, (char*)ctx->bufferStart, 64 KB);
|
||||
int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
|
||||
return (char*)(ctx->bufferStart + dictSize);
|
||||
}
|
||||
|
||||
|
@ -39,8 +39,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* lz4.h provides raw compression format functions, for optimal performance and integration into programs.
|
||||
* If you need to generate data using an inter-operable format (respecting the framing specification),
|
||||
* lz4.h provides block compression functions, for optimal performance.
|
||||
* If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
|
||||
* please use lz4frame.h instead.
|
||||
*/
|
||||
|
||||
|
@ -594,7 +594,7 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr);
|
||||
/* initialization */
|
||||
void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
||||
{
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= LZ4_STREAMHCSIZE); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t)); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
|
||||
((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
|
||||
}
|
||||
|
16
lib/lz4hc.h
16
lib/lz4hc.h
@ -79,7 +79,7 @@ int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize
|
||||
|
||||
|
||||
/**************************************
|
||||
Using an external allocation
|
||||
* Using an external allocation
|
||||
**************************************/
|
||||
int LZ4_sizeofStateHC(void);
|
||||
int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
||||
@ -102,12 +102,18 @@ They just use the externally allocated memory for state instead of allocating th
|
||||
|
||||
|
||||
|
||||
/*****************************
|
||||
* Includes
|
||||
*****************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/**************************************
|
||||
Experimental Streaming Functions
|
||||
* Experimental Streaming Functions
|
||||
**************************************/
|
||||
#define LZ4_STREAMHCSIZE_U64 32774
|
||||
#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U64 * sizeof(unsigned long long))
|
||||
typedef struct { unsigned long long table[LZ4_STREAMHCSIZE_U64]; } LZ4_streamHC_t;
|
||||
#define LZ4_STREAMHCSIZE 262192
|
||||
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
||||
typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
|
||||
/*
|
||||
LZ4_streamHC_t
|
||||
This structure allows static allocation of LZ4 HC streaming state.
|
||||
|
18
lib/xxhash.h
18
lib/xxhash.h
@ -56,6 +56,12 @@ SHA1-32 0.28 GB/s 10
|
||||
Q.Score is a measure of quality of the hash function.
|
||||
It depends on successfully passing SMHasher test set.
|
||||
10 is a perfect score.
|
||||
|
||||
A new 64-bits version, named XXH64, is available since r35.
|
||||
It offers better speed for 64-bits applications.
|
||||
Name Speed on 64 bits Speed on 32 bits
|
||||
XXH64 13.8 GB/s 1.9 GB/s
|
||||
XXH32 6.8 GB/s 6.0 GB/s
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
@ -66,20 +72,15 @@ extern "C" {
|
||||
|
||||
|
||||
/*****************************
|
||||
Includes
|
||||
* Definitions
|
||||
*****************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/*****************************
|
||||
Type
|
||||
*****************************/
|
||||
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
||||
|
||||
|
||||
|
||||
/*****************************
|
||||
Simple Hash Functions
|
||||
* Simple Hash Functions
|
||||
*****************************/
|
||||
|
||||
unsigned int XXH32 (const void* input, size_t length, unsigned seed);
|
||||
@ -94,12 +95,13 @@ XXH32() :
|
||||
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
|
||||
XXH64() :
|
||||
Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
|
||||
Faster on 64-bits systems. Slower on 32-bits systems.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*****************************
|
||||
Advanced Hash Functions
|
||||
* Advanced Hash Functions
|
||||
*****************************/
|
||||
typedef struct { long long ll[ 6]; } XXH32_state_t;
|
||||
typedef struct { long long ll[11]; } XXH64_state_t;
|
||||
|
@ -1,6 +1,7 @@
|
||||
LZ4 Format Description
|
||||
Last revised: 2012-02-27
|
||||
Author : Y. Collet
|
||||
LZ4 Block Format Description
|
||||
============================
|
||||
Last revised: 2015-03-26;
|
||||
Author : Yann Collet
|
||||
|
||||
|
||||
|
||||
@ -11,19 +12,19 @@ using any programming language.
|
||||
LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
|
||||
The most important design principle behind LZ4 is simplicity.
|
||||
It helps to create an easy to read and maintain source code.
|
||||
It also helps later on for optimisations, compactness, and speed.
|
||||
There is no entropy encoder backend nor framing layer.
|
||||
It also helps later on for optimizations, compactness, and speed.
|
||||
There is no entropy encoder back-end nor framing layer.
|
||||
The latter is assumed to be handled by other parts of the system.
|
||||
|
||||
This document only describes the format,
|
||||
This document only describes the block format,
|
||||
not how the LZ4 compressor nor decompressor actually work.
|
||||
The correctness of the decompressor should not depend
|
||||
on implementation details of the compressor, and vice versa.
|
||||
|
||||
|
||||
|
||||
-- Compressed block format --
|
||||
|
||||
Compressed block format
|
||||
-----------------------
|
||||
An LZ4 compressed block is composed of sequences.
|
||||
Schematically, a sequence is a suite of literals, followed by a match copy.
|
||||
|
||||
@ -90,8 +91,8 @@ On decoding the matchlength, we reach the end of the compressed sequence,
|
||||
and therefore start another one.
|
||||
|
||||
|
||||
-- Parsing restrictions --
|
||||
|
||||
Parsing restrictions
|
||||
-----------------------
|
||||
There are specific parsing rules to respect in order to remain compatible
|
||||
with assumptions made by the decoder :
|
||||
1) The last 5 bytes are always literals
|
||||
@ -104,8 +105,8 @@ Note that the last sequence is also incomplete,
|
||||
and stops right after literals.
|
||||
|
||||
|
||||
-- Additional notes --
|
||||
|
||||
Additional notes
|
||||
-----------------------
|
||||
There is no assumption nor limits to the way the compressor
|
||||
searches and selects matches within the source data block.
|
||||
It could be a fast scan, a multi-probe, a full search using BST,
|
||||
|
@ -61,9 +61,13 @@ endif
|
||||
TRAVIS_TARGET=$(LZ4_TRAVIS_CI_ENV)
|
||||
|
||||
|
||||
default: lz4 lz4c
|
||||
default: lz4
|
||||
|
||||
all: lz4 lz4c lz4c32 fullbench fullbench32 fuzzer fuzzer32 frametest frametest32 datagen
|
||||
m32: lz4c32 fullbench32 fuzzer32 frametest32
|
||||
|
||||
bins: lz4 lz4c fullbench fuzzer frametest datagen
|
||||
|
||||
all: bins m32
|
||||
|
||||
lz4: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c $(LZ4DIR)/xxhash.c bench.c lz4io.c lz4cli.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
@ -144,16 +148,16 @@ test-travis: $(TRAVIS_TARGET)
|
||||
|
||||
test-lz4-sparse: lz4 datagen
|
||||
@echo ---- test sparse file support ----
|
||||
./datagen -g50M -P100 | ./lz4 -B4D | ./lz4 -dv --sparse-support > tmpB4
|
||||
./datagen -g50M -P100 | ./lz4 -B5D | ./lz4 -dv --sparse-support > tmpB5
|
||||
./datagen -g50M -P100 | ./lz4 -B6D | ./lz4 -dv --sparse-support > tmpB6
|
||||
./datagen -g50M -P100 | ./lz4 -B7D | ./lz4 -dv --sparse-support > tmpB7
|
||||
./datagen -g50M -P100 | ./lz4 -B4D | ./lz4 -dv --sparse > tmpB4
|
||||
./datagen -g50M -P100 | ./lz4 -B5D | ./lz4 -dv --sparse > tmpB5
|
||||
./datagen -g50M -P100 | ./lz4 -B6D | ./lz4 -dv --sparse > tmpB6
|
||||
./datagen -g50M -P100 | ./lz4 -B7D | ./lz4 -dv --sparse > tmpB7
|
||||
ls -ls tmp*
|
||||
./datagen -g50M -P100 | diff -s - tmpB4
|
||||
./datagen -g50M -P100 | diff -s - tmpB5
|
||||
./datagen -g50M -P100 | diff -s - tmpB6
|
||||
./datagen -g50M -P100 | diff -s - tmpB7
|
||||
./datagen -s1 -g1200007 -P100 | ./lz4 | ./lz4 -dv --sparse-support > tmpOdd # Odd size file (to not finish on an exact nb of blocks)
|
||||
./datagen -s1 -g1200007 -P100 | ./lz4 | ./lz4 -dv --sparse > tmpOdd # Odd size file (to not finish on an exact nb of blocks)
|
||||
./datagen -s1 -g1200007 -P100 | diff -s - tmpOdd
|
||||
ls -ls tmpOdd
|
||||
@rm tmp*
|
||||
@ -162,7 +166,7 @@ test-lz4-contentSize: lz4 datagen
|
||||
@echo ---- test original size support ----
|
||||
./datagen -g15M > tmp
|
||||
./lz4 -v tmp | ./lz4 -t
|
||||
./lz4 -v --frame-content-size tmp | ./lz4 -d > tmp2
|
||||
./lz4 -v --content-size tmp | ./lz4 -d > tmp2
|
||||
diff -s tmp tmp2
|
||||
@rm tmp*
|
||||
|
||||
@ -237,8 +241,8 @@ test-mem: lz4 datagen fuzzer frametest
|
||||
valgrind --leak-check=yes ./lz4 -9 -BD -f tmp $(VOID)
|
||||
./datagen -g16MB > tmp
|
||||
valgrind --leak-check=yes ./lz4 -9 -B5D -f tmp tmp2
|
||||
./datagen -g256MB > tmp
|
||||
valgrind --leak-check=yes ./lz4 -t tmp2
|
||||
./datagen -g256MB > tmp
|
||||
valgrind --leak-check=yes ./lz4 -B4D -f -vq tmp $(VOID)
|
||||
rm tmp*
|
||||
valgrind --leak-check=yes ./fuzzer -i64 -t1
|
||||
|
@ -59,12 +59,19 @@ files as is.
|
||||
.B lz4
|
||||
will decompress such files as if they were a single
|
||||
.B .lz4
|
||||
file.
|
||||
file. For example:
|
||||
lz4 file1 > foo.lz4
|
||||
lz4 file2 >> foo.lz4
|
||||
then
|
||||
lz4cat foo.lz4
|
||||
is equivalent to :
|
||||
cat file1 file2
|
||||
|
||||
.PP
|
||||
|
||||
.SH OPTIONS
|
||||
.
|
||||
.SS "Short command concatenation"
|
||||
.SS "Short commands concatenation"
|
||||
In some cases, some options can be expressed using short command
|
||||
.B "-x"
|
||||
or long command
|
||||
@ -75,10 +82,11 @@ is equivalent to
|
||||
.B "-dc" .
|
||||
Long commands cannot be concatenated.
|
||||
They must be clearly separated by a space.
|
||||
.SS "Multiple commands"
|
||||
When multiple contradictory commands are issued on a same command line,
|
||||
only the latest one will be applied.
|
||||
.
|
||||
.SS "Operation mode"
|
||||
If multiple operation mode options are given,
|
||||
the last one takes effect.
|
||||
.TP
|
||||
.BR \-z ", " \-\-compress
|
||||
Compress.
|
||||
@ -112,7 +120,7 @@ Test the integrity of compressed
|
||||
.B .lz4
|
||||
files.
|
||||
The decompressed data is discarded.
|
||||
No files are created or removed.
|
||||
No files are created nor removed.
|
||||
.
|
||||
.SS "Operation modifiers"
|
||||
.TP
|
||||
@ -123,7 +131,7 @@ No files are created or removed.
|
||||
high compression
|
||||
|
||||
.TP
|
||||
.BR \-f ", " --force
|
||||
.BR \-f ", " --[no-]force
|
||||
This option has several effects:
|
||||
.RS
|
||||
.IP \(bu 3
|
||||
@ -168,13 +176,15 @@ with the resulting compressed file named
|
||||
.B \-BD
|
||||
block dependency (improve compression ratio)
|
||||
.TP
|
||||
.B \--no-frame-crc
|
||||
.B \--[no-]frame-crc
|
||||
disable stream checksum (default:enabled)
|
||||
.TP
|
||||
.B \--frame-content-size
|
||||
compressed frame includes original size (default:not present)
|
||||
.B \--[no-]content-size
|
||||
compressed file includes original size (default:not present)
|
||||
Note : this option can only be activated when the original size can be determined,
|
||||
hence for a file. It won't work with unknown source size, such as stdin pipe.
|
||||
.TP
|
||||
.B \--sparse-support
|
||||
.B \--[no-]sparse
|
||||
enable sparse file (default:disabled)(experimental)
|
||||
.TP
|
||||
.B \-l
|
||||
|
@ -173,9 +173,9 @@ static int usage_advanced(void)
|
||||
DISPLAY( " -B# : Block size [4-7](default : 7)\n");
|
||||
DISPLAY( " -BD : Block dependency (improve compression ratio)\n");
|
||||
/* DISPLAY( " -BX : enable block checksum (default:disabled)\n"); *//* Option currently inactive */
|
||||
DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled)\n");
|
||||
DISPLAY( "--frame-content-size : compressed frame includes original size (default:not present)\n");
|
||||
DISPLAY( "--sparse-support : enable sparse file (default:disabled)(experimental)\n");
|
||||
DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled)\n");
|
||||
DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
|
||||
DISPLAY( "--sparse : enable sparse file (default:disabled)(experimental)\n");
|
||||
DISPLAY( "Benchmark arguments :\n");
|
||||
DISPLAY( " -b : benchmark file(s)\n");
|
||||
DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n");
|
||||
@ -293,19 +293,23 @@ int main(int argc, char** argv)
|
||||
|
||||
/* long commands (--long-word) */
|
||||
if (!strcmp(argument, "--compress")) { forceCompress = 1; continue; }
|
||||
if (!strcmp(argument, "--decompress")) { decode = 1; continue; }
|
||||
if (!strcmp(argument, "--uncompress")) { decode = 1; continue; }
|
||||
if ((!strcmp(argument, "--decompress"))
|
||||
|| (!strcmp(argument, "--uncompress"))) { decode = 1; continue; }
|
||||
if (!strcmp(argument, "--test")) { decode = 1; LZ4IO_setOverwrite(1); output_filename=nulmark; continue; }
|
||||
if (!strcmp(argument, "--force")) { LZ4IO_setOverwrite(1); continue; }
|
||||
if (!strcmp(argument, "--stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; }
|
||||
if (!strcmp(argument, "--to-stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; }
|
||||
if (!strcmp(argument, "--no-force")) { LZ4IO_setOverwrite(0); continue; }
|
||||
if ((!strcmp(argument, "--stdout"))
|
||||
|| (!strcmp(argument, "--to-stdout"))) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; }
|
||||
if (!strcmp(argument, "--frame-crc")) { LZ4IO_setStreamChecksumMode(1); continue; }
|
||||
if (!strcmp(argument, "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(0); continue; }
|
||||
if (!strcmp(argument, "--frame-content-size")) { LZ4IO_setContentSize(1); continue; }
|
||||
if (!strcmp(argument, "--sparse-support")) { LZ4IO_setSparseFile(1); continue; }
|
||||
if (!strcmp(argument, "--content-size")) { LZ4IO_setContentSize(1); continue; }
|
||||
if (!strcmp(argument, "--no-content-size")) { LZ4IO_setContentSize(0); continue; }
|
||||
if (!strcmp(argument, "--sparse")) { LZ4IO_setSparseFile(1); continue; }
|
||||
if (!strcmp(argument, "--no-sparse")) { LZ4IO_setSparseFile(0); continue; }
|
||||
if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
|
||||
if (!strcmp(argument, "--quiet")) { if (displayLevel) displayLevel--; continue; }
|
||||
if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; }
|
||||
if (!strcmp(argument, "--keep")) { continue; } /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */
|
||||
if (!strcmp(argument, "--keep")) { continue; } /* keep source file (default anyway; just for xz/lzma compatibility) */
|
||||
|
||||
/* Short commands (note : aggregated short commands are allowed) */
|
||||
if (argument[0]=='-')
|
||||
|
Loading…
Reference in New Issue
Block a user