Initial release

This commit is contained in:
Yann Collet 2015-01-24 01:58:16 +01:00
commit 4856a00164
29 changed files with 8780 additions and 0 deletions

14
.gitattributes vendored Normal file
View File

@ -0,0 +1,14 @@
# Set the default behavior
* text eol=lf
# Explicitly declare source files
*.c text eol=lf
*.h text eol=lf
# Denote files that should not be modified.
*.odt binary
# Visual Studio
*.sln binary
*.suo binary
*.vcxproj* binary

18
.gitignore vendored Normal file
View File

@ -0,0 +1,18 @@
# Object files
*.o
*.ko
# Libraries
*.lib
*.a
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app

14
.travis.yml Normal file
View File

@ -0,0 +1,14 @@
language: c
compiler: gcc
script: make test-travis
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq gcc-multilib
- sudo apt-get install -qq valgrind
env:
- ZSTD_TRAVIS_CI_ENV=travis-install
- ZSTD_TRAVIS_CI_ENV=test-all
matrix:
fast_finish: true

93
Makefile Normal file
View File

@ -0,0 +1,93 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2015
# All rights reserved.
#
# BSD license
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# You can contact the author at :
# - zstd source repository : https://github.com/Cyan4973/zstd
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# ################################################################
# Version number
export VERSION=0
export RELEASE=r$(VERSION)
DESTDIR?=
PREFIX ?= /usr
LIBDIR ?= $(PREFIX)/lib
INCLUDEDIR=$(PREFIX)/include
PRGDIR = programs
ZSTDDIR = lib
# Select test target for Travis CI's Build Matrix
ifneq (,$(filter test-%,$(ZSTD_TRAVIS_CI_ENV)))
TRAVIS_TARGET=prg-travis
else
TRAVIS_TARGET=$(ZSTD_TRAVIS_CI_ENV)
endif
default: zstdprograms
all:
@cd $(ZSTDDIR); $(MAKE) -e all
@cd $(PRGDIR); $(MAKE) -e all
zstdprograms:
@cd $(PRGDIR); $(MAKE) -e
clean:
@cd $(PRGDIR); $(MAKE) clean
@cd $(ZSTDDIR); $(MAKE) clean
# @cd examples; $(MAKE) clean
@echo Cleaning completed
#------------------------------------------------------------------------
#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
install:
@cd $(ZSTDDIR); $(MAKE) -e install
@cd $(PRGDIR); $(MAKE) -e install
uninstall:
@cd $(ZSTDDIR); $(MAKE) uninstall
@cd $(PRGDIR); $(MAKE) uninstall
travis-install:
sudo $(MAKE) install
test:
@cd $(PRGDIR); $(MAKE) -e test
test-travis: $(TRAVIS_TARGET)
prg-travis:
@cd $(PRGDIR); $(MAKE) -e $(TRAVIS_TARGET)
endif

3
NEWS Normal file
View File

@ -0,0 +1,3 @@
r0
initial release

44
README.md Executable file
View File

@ -0,0 +1,44 @@
**ZSTD**, short for Z-Standard, is a new lossless compression algorithm, which provides both good compression ratio _and_ speed for your standard compression needs. "Standard" translates into everyday situations which neither look for highest possible ratio (which LZMA and ZPAQ cover) nor extreme speeds (which LZ4 covers).
It is provided as a BSD-license package, hosted on Github.
|Branch |Status |
|------------|---------|
|master | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) |
|dev | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) |
For a taste of its performance, here are a few benchmark numbers, completed on a Core i5-4300U @ 1.9 GHz, using [fsbench 0.14.3](http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029), an open-source benchmark program by m^2.
|Name | Ratio | C.speed | D.speed |
|---------------|-------|---------|---------|
| | | MB/s | MB/s |
| zlib 1.2.8 -6 | 3.099 | 18 | 275 |
| **ZSTD** |**2.872**|**201**|**498** |
| zlib 1.2.8 -1 | 2.730 | 58 | 250 |
| LZ4 HC r127 | 2.720 | 26 | 1720 |
| QuickLZ 1.5.1b6|2.237 | 323 | 373 |
| LZO 2.06 | 2.106 | 351 | 510 |
| Snappy 1.1.0 | 2.091 | 238 | 964 |
| LZ4 r127 | 2.084 | 370 | 1590 |
| LZF 3.6 | 2.077 | 220 | 502 |
An interesting feature of ZSTD is that it can qualify as both a reasonably strong compressor and a fast one.
ZSTD delivers high decompression speed, at around ~500 MB/s per core.
Obviously, your exact mileage will vary depending on your target system.
ZSTD compression speed, on the other hand, can be configured to fit different situations.
The first, fast, derivative offers ~200 MB/s per core, which is suitable for a few real-time scenarios.
But similar to LZ4, ZSTD can offer derivatives trading compression time for compression ratio, while keeping decompression properties intact. "Offline compression", where compression time is of little importance because the content is only compressed once and decompressed many times, is therefore within the scope.
Note that high compression derivatives still have to be developed.
It's a complex area which will certainly benefit the contributions from a few experts.
Another property ZSTD is developed for is configurable memory requirement, with the objective to fit into low-memory configurations, or servers handling many connections in parallel.
ZSTD development is starting. So consider current results merely as early ones. The implementation will gradually evolve and improve overtime, especially during this first year. This is a phase which will depend a lot on user feedback, since these feedback will be key in deciding next priorities or features to add.
The "master" branch is reserved for stable release and betas.
The "dev" branch is the one where all contributions will be merged. If you plan to propose a patch, please commit into the "dev" branch. Direct commit to "master" are not permitted.
Feature branches will also exist, typically to introduce new requirements, and be temporarily available for testing before merge into "dev" branch.

26
lib/LICENSE Normal file
View File

@ -0,0 +1,26 @@
ZSTD Library
Copyright (c) 2014-2015, Yann Collet
All rights reserved.
BSD License
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

114
lib/Makefile Normal file
View File

@ -0,0 +1,114 @@
# ################################################################
# ZSTD library - Makefile
# Copyright (C) Yann Collet 2015
# All rights reserved.
#
# BSD license
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# You can contact the author at :
# - ZSTD source repository : https://github.com/Cyan4973/zstd
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# ################################################################
# Version numbers
VERSION?= 0
LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
LIBVER = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
DESTDIR?=
PREFIX ?= /usr
CFLAGS ?= -O3
CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
LIBDIR ?= $(PREFIX)/lib
INCLUDEDIR=$(PREFIX)/include
# OS X linker doesn't support -soname, and use different extension
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
ifeq ($(shell uname), Darwin)
SHARED_EXT = dylib
SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
SONAME_FLAGS = -install_name $(PREFIX)/lib/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
else
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
SHARED_EXT = so
SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
endif
default: libzstd
all: libzstd
libzstd: zstd.c
@echo compiling static library
@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
@$(AR) rcs libzstd.a zstd.o
@echo compiling dynamic library $(LIBVER)
@$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
@echo creating versioned links
@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
clean:
@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
@echo Cleaning library completed
#------------------------------------------------------------------------
#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
libzstd.pc: libzstd.pc.in Makefile
@echo creating pkgconfig
@sed -e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@LIBDIR@|$(LIBDIR)|' \
-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@
install: libzstd libzstd.pc
@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
@install -m 755 libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
@cp -a libzstd.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR)
@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
@echo zstd static and shared library installed
uninstall:
@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
@echo zstd libraries successfully uninstalled
endif

1573
lib/fse.c Executable file

File diff suppressed because it is too large Load Diff

380
lib/fse.h Executable file
View File

@ -0,0 +1,380 @@
/* ******************************************************************
FSE : Finite State Entropy coder
header file
Copyright (C) 2013-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/******************************************
* Includes
******************************************/
#include <stddef.h> // size_t, ptrdiff_t
/******************************************
* FSE simple functions
******************************************/
size_t FSE_compress(void* dst, size_t maxDstSize,
const void* src, size_t srcSize);
size_t FSE_decompress(void* dst, size_t maxDstSize,
const void* cSrc, size_t cSrcSize);
/*
FSE_compress():
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated, and sized to handle worst case situations.
Worst case size evaluation is provided by FSE_compressBound().
return : size of compressed data
Special values : if result == 0, data is uncompressible => Nothing is stored within cSrc !!
if result == 1, data is one constant element x srcSize times. Use RLE compression.
if FSE_isError(result), it's an error code.
FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'maxDstSize'.
** Important ** : This function doesn't decompress uncompressed nor RLE data !
return : size of regenerated data (<= maxDstSize)
or an error code, which can be tested using FSE_isError()
*/
size_t FSE_decompressRLE(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
/*
FSE_decompressRLE():
Decompress specific RLE corner case (equivalent to memset()).
cSrcSize must be == 1. originalSize must be exact.
return : size of regenerated data (==originalSize)
or an error code, which can be tested using FSE_isError()
Note : there is no function provided for uncompressed data, as it's just a simple memcpy()
*/
/******************************************
* Tool functions
******************************************/
size_t FSE_compressBound(size_t size); /* maximum compressed size */
/* Error Management */
unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/******************************************
* FSE advanced functions
******************************************/
/*
FSE_compress2():
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
return : size of compressed data
or -1 if there is an error
*/
size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/******************************************
FSE detailed API
******************************************/
/*
int FSE_compress(char* dest, const char* source, int inputSize) does the following:
1. count symbol occurrence from source[] into table count[]
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
3. save normalized counters to memory buffer using writeHeader()
4. build encoding table 'CTable' from normalized counters
5. encode the data stream using encoding table
int FSE_decompress(char* dest, int originalSize, const char* compressed) performs:
1. read normalized counters with readHeader()
2. build decoding table 'DTable' from normalized counters
3. decode the data stream using decoding table
The following API allows triggering specific sub-functions.
*/
/* *** COMPRESSION *** */
size_t FSE_count(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, unsigned maxSymbolValue);
size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog);
size_t FSE_writeHeader (void* headerBuffer, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
void* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
void FSE_freeCTable (void* CTable);
size_t FSE_buildCTable(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
size_t FSE_compress_usingCTable (void* dst, size_t dstSize, const void* src, size_t srcSize, const void* CTable);
/*
The first step is to count all symbols. FSE_count() provides one quick way to do this job.
Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have '*maxSymbolValuePtr+1' cells.
'source' is a table of char of size 'sourceSize'. All values within 'src' MUST be <= *maxSymbolValuePtr
*maxSymbolValuePtr will be updated, with its real value (necessarily <= original value)
FSE_count() will return the number of occurrence of the most frequent symbol.
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
The next step is to normalize the frequencies.
FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
It also guarantees a minimum of 1 to any Symbol which frequency is >= 1.
You can use input 'tableLog'==0 to mean "use default tableLog value".
If you are unsure of which tableLog value to use, you can optionally call FSE_optimalTableLog(),
which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
The result of FSE_normalizeCount() will be saved into a table,
called 'normalizedCounter', which is a table of signed short.
'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
The return value is tableLog if everything proceeded as expected.
It is 0 if there is a single symbol within distribution.
If there is an error(typically, invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeHeader().
'header' buffer must be already allocated.
For guaranteed success, buffer size must be at least FSE_headerBound().
The result of the function is the number of bytes written into 'header'.
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()) (for example, buffer size too small).
'normalizedCounter' can then be used to create the compression tables 'CTable'.
The space required by 'CTable' must be already allocated. Its size is provided by FSE_sizeof_CTable().
'CTable' must be aligned of 4 bytes boundaries.
You can then use FSE_buildCTable() to fill 'CTable'.
In both cases, if there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
'CTable' can then be used to compress 'source', with FSE_compress_usingCTable().
Similar to FSE_count(), the convention is that 'source' is assumed to be a table of char of size 'sourceSize'
The function returns the size of compressed data (without header), or -1 if failed.
*/
/* *** DECOMPRESSION *** */
size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize);
void* FSE_createDTable(unsigned tableLog);
void FSE_freeDTable(void* DTable);
size_t FSE_buildDTable (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const void* DTable, size_t fastMode);
/*
If the block is RLE compressed, or uncompressed, use the relevant specific functions.
The first step is to obtain the normalized frequencies of symbols.
This can be performed by reading a header with FSE_readHeader().
'normalizedCounter' must be already allocated, and have at least '*maxSymbolValuePtr+1' cells of short.
In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
or size the table to handle worst case situations (typically 256).
FSE_readHeader will provide 'tableLog' and 'maxSymbolValue' stored into the header.
The result of FSE_readHeader() is the number of bytes read from 'header'.
The following values have special meaning :
return 2 : there is only a single symbol value. The value is provided into the second byte of header.
return 1 : data is uncompressed
If there is an error, the function will return an error code, which can be tested using FSE_isError().
The next step is to create the decompression tables 'DTable' from 'normalizedCounter'.
This is performed by the function FSE_buildDTable().
The space required by 'DTable' must be already allocated and properly aligned.
One can create a DTable using FSE_createDTable().
The function will return 1 if DTable is compatible with fastMode, 0 otherwise.
If there is an error, the function will return an error code, which can be tested using FSE_isError().
'DTable' can then be used to decompress 'compressed', with FSE_decompress_usingDTable().
Only trigger fastMode if it was authorized by result of FSE_buildDTable(), otherwise decompression will fail.
cSrcSize must be correct, otherwise decompression will fail.
FSE_decompress_usingDTable() result will tell how many bytes were regenerated.
If there is an error, the function will return an error code, which can be tested using FSE_isError().
*/
/******************************************
* FSE streaming compression API
******************************************/
typedef struct
{
size_t bitContainer;
int bitPos;
char* startPtr;
char* ptr;
} FSE_CStream_t;
typedef struct
{
ptrdiff_t value;
const void* stateTable;
const void* symbolTT;
unsigned stateLog;
} FSE_CState_t;
void FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer);
void FSE_initCState(FSE_CState_t* CStatePtr, const void* CTable);
void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned char symbol);
void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
void FSE_flushBits(FSE_CStream_t* bitC);
void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
size_t FSE_closeCStream(FSE_CStream_t* bitC);
/*
These functions are inner components of FSE_compress_usingCTable().
They allow creation of custom streams, mixing multiple tables and bit sources.
A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
So the first symbol you will encode is the last you will decode, like a lifo stack.
You will need a few variables to track your CStream. They are :
void* CTable; // Provided by FSE_buildCTable()
FSE_CStream_t bitC; // bitStream tracking structure
FSE_CState_t state; // State tracking structure
The first thing to do is to init the bitStream, and the state.
FSE_initCStream(&bitC, dstBuffer);
FSE_initState(&state, CTable);
You can then encode your input data, byte after byte.
FSE_encodeByte() outputs a maximum of 'tableLog' bits at a time.
Remember decoding will be done in reverse direction.
FSE_encodeByte(&bitStream, &state, symbol);
At any time, you can add any bit sequence.
Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
FSE_addBits(&bitStream, bitField, nbBits);
The above methods don't commit data to memory, they just store it into local register, for speed.
Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
Writing data to memory is a manual operation, performed by the flushBits function.
FSE_flushBits(&bitStream);
Your last FSE encoding operation shall be to flush your last state value(s).
FSE_flushState(&bitStream, &state);
You must then close the bitStream if you opened it with FSE_initCStream().
It's possible to embed some user-info into the header, as an optionalId [0-31].
The function returns the size in bytes of CStream.
If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
size_t size = FSE_closeCStream(&bitStream, optionalId);
*/
/******************************************
* FSE streaming decompression API
******************************************/
//typedef unsigned int bitD_t;
typedef size_t bitD_t;
typedef struct
{
bitD_t bitContainer;
unsigned bitsConsumed;
const char* ptr;
const char* start;
} FSE_DStream_t;
typedef struct
{
bitD_t state;
const void* table;
} FSE_DState_t;
size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable);
unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
bitD_t FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
unsigned int FSE_reloadDStream(FSE_DStream_t* bitD);
unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
/*
Let's now decompose FSE_decompress_usingDTable() into its unitary elements.
You will decode FSE-encoded symbols from the bitStream,
and also any other bitFields you put in, **in reverse order**.
You will need a few variables to track your bitStream. They are :
FSE_DStream_t DStream; // Stream context
FSE_DState_t DState; // State context. Multiple ones are possible
const void* DTable; // Decoding table, provided by FSE_buildDTable()
U32 tableLog; // Provided by FSE_readHeader()
The first thing to do is to init the bitStream.
errorCode = FSE_initDStream(&DStream, &optionalId, srcBuffer, srcSize);
You should then retrieve your initial state(s) (multiple ones are possible) :
errorCode = FSE_initDState(&DState, &DStream, DTable, tableLog);
You can then decode your data, symbol after symbol.
For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
Keep in mind that symbols are decoded in reverse order, like a lifo stack (last in, first out).
unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
Note : maximum allowed nbBits is 25
unsigned int bitField = FSE_readBits(&DStream, nbBits);
All above operations only read from local register (which size is controlled by bitD_t==32 bits).
Reading data from memory is manually performed by the reload method.
endSignal = FSE_reloadDStream(&DStream);
FSE_reloadDStream() result tells if there is still some more data to read from DStream.
0 : there is still some data left into the DStream.
1 Dstream reached end of buffer, but is not yet fully extracted. It will not load data from memory any more.
2 Dstream reached its exact end, corresponding in general to decompression completed.
3 Dstream went too far. Decompression result is corrupted.
When reaching end of buffer(1), progress slowly if you decode multiple symbols per loop,
to properly detect the exact end of stream.
After each decoded symbol, check if DStream is fully consumed using this simple test :
FSE_reloadDStream(&DStream) >= 2
When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
Checking if DStream has reached its end is performed by :
FSE_endOfDStream(&DStream);
Check also the states. There might be some entropy left there, still able to decode some high probability symbol.
FSE_endOfDState(&DState);
*/
#if defined (__cplusplus)
}
#endif

108
lib/fse_static.h Executable file
View File

@ -0,0 +1,108 @@
/* ******************************************************************
FSE : Finite State Entropy coder
header file for static linking (only)
Copyright (C) 2013-2015, Yann Collet
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/******************************************
* Tool functions
******************************************/
#define FSE_MAX_HEADERSIZE 512
#define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE) /* Macro can be useful for static allocation */
/******************************************
* Static allocation
******************************************/
/* You can statically allocate a CTable as a table of U32 using below macro */
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) ((1<<maxTableLog)+1)
/******************************************
* FSE supported API for DLL
******************************************/
#include "fse.h"
/******************************************
* Error Management
******************************************/
#define FSE_LIST_ERRORS(ITEM) \
ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) \
ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
ITEM(FSE_ERROR_corruptionDetected) \
ITEM(FSE_ERROR_maxCode)
#define FSE_GENERATE_ENUM(ENUM) ENUM,
typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
/******************************************
* FSE advanced API
******************************************/
size_t FSE_countFast(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
/* same as FSE_count(), but won't check if input really respect that all values within src are <= *maxSymbolValuePtr */
size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits);
/* create a fake CTable, designed to not compress an input where each element uses nbBits */
size_t FSE_buildCTable_rle (void* CTable, unsigned char symbolValue);
/* create a fake CTable, designed to compress a single identical value */
size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits);
/* create a fake DTable, designed to read an uncompressed bitstream where each element uses nbBits */
size_t FSE_buildDTable_rle (void* DTable, unsigned char symbolValue);
/* create a fake DTable, designed to always generate the same symbolValue */
/******************************************
* FSE streaming API
******************************************/
bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
#if defined (__cplusplus)
}
#endif

14
lib/libzstd.pc.in Normal file
View File

@ -0,0 +1,14 @@
# ZSTD - standard compression algorithm
# Copyright (C) 2014-2015, Yann Collet.
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
Name: zstd
Description: lossless compression algorithm library
URL: https://github.com/Cyan4973/zstd
Version: @VERSION@
Libs: -L@LIBDIR@ -lzstd
Cflags: -I@INCLUDEDIR@

1719
lib/zstd.c Normal file

File diff suppressed because it is too large Load Diff

93
lib/zstd.h Normal file
View File

@ -0,0 +1,93 @@
/*
zstd - standard compression library
Header File
Copyright (C) 2014-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/**************************************
* Includes
**************************************/
#include <stddef.h> /* size_t */
/**************************************
* Version
**************************************/
#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
#define ZSTD_VERSION_MINOR 0 /* for new (non-breaking) interface capabilities */
#define ZSTD_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
unsigned ZSTD_versionNumber (void);
/**************************************
* Simple one-step functions
**************************************/
size_t ZSTD_compress( void* dst, size_t maxDstSize,
const void* src, size_t srcSize);
size_t ZSTD_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize);
/*
ZSTD_compress() :
Compresses 'srcSize' bytes from buffer 'src' into buffer 'dst', of maximum size 'dstSize'.
Destination buffer should be sized to handle worst cases situations (input data not compressible).
Worst case size evaluation is provided by function ZSTD_compressBound().
return : the number of bytes written into buffer 'dst'
or an error code if it fails (which can be tested using ZSTD_isError())
ZSTD_decompress() :
compressedSize : is obviously the source size
maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
It must be equal or larger than originalSize, otherwise decompression will fail.
return : the number of bytes decompressed into destination buffer (originalSize)
or an errorCode if it fails (which can be tested using ZSTD_isError())
*/
/**************************************
* Tool functions
**************************************/
size_t ZSTD_compressBound(size_t srcSize); /* maximum compressed size */
/* Error Management */
unsigned ZSTD_isError(size_t code); /* tells if a return value is an error code */
const char* ZSTD_getErrorName(size_t code); /* provides error code string (useful for debugging) */
#if defined (__cplusplus)
}
#endif

80
lib/zstd_static.h Executable file
View File

@ -0,0 +1,80 @@
/*
zstd - standard compression library
Header File for static linking only
Copyright (C) 2014-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/**************************************
* Includes
**************************************/
#include "zstd.h"
/**************************************
* Streaming functions
**************************************/
typedef void* ZSTD_cctx_t;
ZSTD_cctx_t ZSTD_createCCtx(void);
size_t ZSTD_freeCCtx(ZSTD_cctx_t cctx);
size_t ZSTD_compressBegin(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);
size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
size_t ZSTD_compressEnd(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);
typedef void* ZSTD_dctx_t;
ZSTD_dctx_t ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx);
size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx);
size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
/**************************************
* Error management
**************************************/
#define ZSTD_LIST_ERRORS(ITEM) \
ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \
ITEM(ZSTD_ERROR_wrongMagicNumber) \
ITEM(ZSTD_ERROR_wrongSrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \
ITEM(ZSTD_ERROR_wrongLBlockSize) \
ITEM(ZSTD_ERROR_maxCode)
#define ZSTD_GENERATE_ENUM(ENUM) ENUM,
typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes; /* exposed list of errors; static linking only */
#if defined (__cplusplus)
}
#endif

339
programs/COPYING Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

151
programs/Makefile Normal file
View File

@ -0,0 +1,151 @@
# ##########################################################################
# ZSTD programs - Makefile
# Copyright (C) Yann Collet 2015
#
# GPL v2 License
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# You can contact the author at :
# - ZSTD source repository : http://code.google.com/p/zstd/
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# ##########################################################################
# zstd : Command Line Utility, supporting gzip-like arguments
# datagen : Synthetic and parametrable data generator, for tests
# fuzzer : Test tool, to check zstd integrity on target platform
# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
# fullbench : Precisely measure speed for each zstd inner function
# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
# ##########################################################################
RELEASE?= r0
DESTDIR?=
PREFIX ?= /usr
CFLAGS ?= -O3
CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -DZSTD_VERSION=\"$(RELEASE)\"
FLAGS = -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/share/man/man1
ZSTDDIR=../lib
TEST_FILES = COPYING
TEST_TARGETS=test-native
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
VOID = nul
else
EXT =
VOID = /dev/null
endif
default: zstd
all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32
zstd: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zstd32: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
fullbench : $(ZSTDDIR)/zstd.c fullbench.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
fullbench32: $(ZSTDDIR)/zstd.c fullbench.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
fuzzer : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
fuzzer32: $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
datagen : datagen.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
clean:
@rm -f core *.o tmp \
zstd$(EXT) zstd32$(EXT) \
fullbench$(EXT) fullbench32$(EXT) \
fuzzer$(EXT) fuzzer32$(EXT) \
datagen$(EXT)
@echo Cleaning completed
#------------------------------------------------------------------------
#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
install: zstd
@echo Installing binaries
@install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
@install -m 755 zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat
@echo Installing man pages
@install -m 644 zstd.1 $(DESTDIR)$(MANDIR)/zstd.1
@install -m 644 zstdcat.1 $(DESTDIR)$(MANDIR)/zstdcat.1
@echo zstd installation completed
uninstall:
rm -f $(DESTDIR)$(BINDIR)/zstdcat
[ -x $(DESTDIR)$(BINDIR)/zstd$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/zstd$(EXT)
[ -f $(DESTDIR)$(MANDIR)/zstd.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstd.1
[ -f $(DESTDIR)$(MANDIR)/zstdcat.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstdcat.1
@echo zstd programs successfully uninstalled
test: test-zstd test-fullbench test-fuzzer test-mem
test32: test-zstd32 test-fullbench32 test-fuzzer32
test-all: test test32
test-zstd: zstd datagen
./datagen | ./zstd -v | ./zstd -d > $(VOID)
./datagen -g256MB | ./zstd -v | ./zstd -d > $(VOID)
./datagen -g6GB | ./zstd -vq | ./zstd -d > $(VOID)
test-zstd32: zstd32 datagen
./datagen | ./zstd32 -v | ./zstd32 -d > $(VOID)
./datagen -g256MB | ./zstd32 -v | ./zstd32 -d > $(VOID)
./datagen -g6GB | ./zstd32 -vq | ./zstd32 -d > $(VOID)
test-fullbench: fullbench
./fullbench -i1
test-fullbench32: fullbench32
./fullbench32 -i1
test-fuzzer: fuzzer
./fuzzer
test-fuzzer32: fuzzer32
./fuzzer32
test-mem: zstd datagen fuzzer fullbench
./datagen -g16KB > tmp
valgrind --leak-check=yes ./zstd -vf tmp /dev/null
./datagen -g128MB > tmp
valgrind --leak-check=yes ./zstd -vf tmp /dev/null
rm tmp
valgrind --leak-check=yes ./fuzzer -i128 -t1
valgrind --leak-check=yes ./fullbench -i1
endif

494
programs/bench.c Executable file
View File

@ -0,0 +1,494 @@
/*
bench.c - Demo module to benchmark open-source compression algorithms
Copyright (C) Yann Collet 2012-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/***************************************
* Compiler Options
***************************************/
/* Disable some Visual warning messages */
#define _CRT_SECURE_NO_WARNINGS /* fopen */
// Unix Large Files support (>4GB)
#define _FILE_OFFSET_BITS 64
#if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions
# define _LARGEFILE_SOURCE
#elif ! defined(__LP64__) // No point defining Large file for 64 bit
# define _LARGEFILE64_SOURCE
#endif
// S_ISREG & gettimeofday() are not supported by MSVC
#if defined(_MSC_VER) || defined(_WIN32)
# define BMK_LEGACY_TIMER 1
#endif
/**************************************
* Includes
**************************************/
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
#include <stdio.h> // fprintf, fopen, ftello64
#include <sys/types.h> // stat64
#include <sys/stat.h> // stat64
// Use ftime() if gettimeofday() is not available on your target
#if defined(BMK_LEGACY_TIMER)
# include <sys/timeb.h> // timeb, ftime
#else
# include <sys/time.h> // gettimeofday
#endif
#include "zstd.h"
#include "xxhash.h"
/**************************************
* Compiler specifics
**************************************/
#if !defined(S_ISREG)
# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#endif
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
* Constants
**************************************/
#define NBLOOPS 3
#define TIMELOOP 2500
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define MAX_MEM (2 GB - 64 MB)
#define DEFAULT_CHUNKSIZE (4 MB)
static U32 g_compressibilityDefault = 50;
static U32 prime1 = 2654435761U;
static U32 prime2 = 2246822519U;
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
/**************************************
* Benchmark Parameters
**************************************/
static int nbIterations = NBLOOPS;
void BMK_SetNbIterations(int nbLoops)
{
nbIterations = nbLoops;
DISPLAY("- %i iterations -\n", nbIterations);
}
/*********************************************************
* Private functions
*********************************************************/
#if defined(BMK_LEGACY_TIMER)
static int BMK_GetMilliStart(void)
{
/* Based on Legacy ftime()
* Rolls over every ~ 12.1 days (0x100000/24/60/60)
* Use GetMilliSpan to correct for rollover */
struct timeb tb;
int nCount;
ftime( &tb );
nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
return nCount;
}
#else
static int BMK_GetMilliStart(void)
{
/* Based on newer gettimeofday()
* Use GetMilliSpan to correct for rollover */
struct timeval tv;
int nCount;
gettimeofday(&tv, NULL);
nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
return nCount;
}
#endif
static int BMK_GetMilliSpan( int nTimeStart )
{
int nSpan = BMK_GetMilliStart() - nTimeStart;
if ( nSpan < 0 )
nSpan += 0x100000 * 1000;
return nSpan;
}
/*********************************************************
* Data generator
*********************************************************/
/* will hopefully be converted into ROL instruction by compiler */
static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
static U32 BMK_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= prime1;
rand32 += prime2;
rand32 = BMK_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 9;
}
#define BMK_RAND15BITS ( BMK_rand(&seed) & 0x7FFF)
#define BMK_RANDLENGTH ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
#define BMK_RANDCHAR (BYTE)((BMK_rand(&seed) & 63) + '0')
static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
{
BYTE* BBuffer = (BYTE*)buffer;
unsigned pos = 0;
U32 P32 = (U32)(32768 * proba);
/* First Byte */
BBuffer[pos++] = BMK_RANDCHAR;
while (pos < bufferSize)
{
/* Select : Literal (noise) or copy (within 64K) */
if (BMK_RAND15BITS < P32)
{
/* Match */
size_t match, end;
unsigned length = BMK_RANDLENGTH + 4;
unsigned offset = BMK_RAND15BITS + 1;
if (offset > pos) offset = pos;
match = pos - offset;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BBuffer[match++];
}
else
{
/* Literal */
size_t end;
unsigned length = BMK_RANDLENGTH;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
}
}
}
/*********************************************************
* Bench functions
*********************************************************/
static int BMK_benchMem(void* srcBuffer, size_t srcSize, char* fileName, int cLevel)
{
size_t maxCompressedSize = ZSTD_compressBound(srcSize);
void* compressedBuffer = malloc(maxCompressedSize);
void* resultBuffer = malloc(srcSize);
U64 crcOrig;
/* Init */
(void)cLevel;
/* Memory allocation & restrictions */
if (!compressedBuffer || !resultBuffer)
{
DISPLAY("\nError: not enough memory!\n");
free(compressedBuffer);
free(resultBuffer);
return 12;
}
/* Calculating input Checksum */
crcOrig = XXH64(srcBuffer, srcSize, 0);
/* warmimg up memory */
BMK_datagen(compressedBuffer, maxCompressedSize, 0.10, 1); /* warmimg up memory */
/* Bench */
{
int loopNb;
size_t cSize = 0;
double fastestC = 100000000., fastestD = 100000000.;
double ratio = 0.;
U64 crcCheck = 0;
DISPLAY("\r%79s\r", "");
for (loopNb = 1; loopNb <= nbIterations; loopNb++)
{
int nbLoops;
int milliTime;
/* Compression */
DISPLAY("%1i-%-14.14s : %9u ->\r", loopNb, fileName, (U32)srcSize);
memset(compressedBuffer, 0xE5, maxCompressedSize);
nbLoops = 0;
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
cSize = ZSTD_compress(compressedBuffer, maxCompressedSize, srcBuffer, srcSize);
nbLoops++;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
ratio = (double)cSize / (double)srcSize*100.;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.);
#if 1
/* Decompression */
memset(resultBuffer, 0xD6, srcSize);
nbLoops = 0;
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
ZSTD_decompress(resultBuffer, srcSize, compressedBuffer, cSize);
nbLoops++;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
#endif
/* CRC Checking */
crcCheck = XXH64(resultBuffer, srcSize, 0);
if (crcOrig!=crcCheck)
{
unsigned i = 0;
DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", fileName, (unsigned)crcOrig, (unsigned)crcCheck);
while (i<srcSize)
{
if (((BYTE*)srcBuffer)[i] != ((BYTE*)resultBuffer)[i])
{
printf("\nDecoding error at pos %u \n", i);
break;
}
i++;
}
break;
}
}
if (crcOrig == crcCheck)
{
if (ratio<100.)
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
else
DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
}
}
/* End cleaning */
free(compressedBuffer);
free(resultBuffer);
return 0;
}
static U64 BMK_GetFileSize(char* infilename)
{
int r;
#if defined(_MSC_VER)
struct _stat64 statbuf;
r = _stat64(infilename, &statbuf);
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
#endif
if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
return (U64)statbuf.st_size;
}
static size_t BMK_findMaxMem(U64 requiredMem)
{
size_t step = 64 MB;
BYTE* testmem = NULL;
requiredMem = (((requiredMem >> 26) + 1) << 26);
requiredMem += 2 * step;
if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
while (!testmem)
{
requiredMem -= step;
testmem = (BYTE*)malloc((size_t)requiredMem);
}
free(testmem);
return (size_t)(requiredMem - step);
}
static int BMK_benchOneFile(char* inFileName, int cLevel)
{
FILE* inFile;
U64 inFileSize;
size_t benchedSize, readSize;
void* srcBuffer;
int result;
/* Init */
(void)cLevel;
// Check file existence
inFile = fopen(inFileName, "rb");
if (inFile == NULL)
{
DISPLAY("Pb opening %s\n", inFileName);
return 11;
}
// Memory allocation & restrictions
inFileSize = BMK_GetFileSize(inFileName);
benchedSize = BMK_findMaxMem(inFileSize * 3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)
DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize >> 20));
// Alloc
srcBuffer = malloc(benchedSize);
if (!srcBuffer)
{
DISPLAY("\nError: not enough memory!\n");
free(srcBuffer);
fclose(inFile);
return 12;
}
// Fill input buffer
DISPLAY("Loading %s... \r", inFileName);
readSize = fread(srcBuffer, 1, benchedSize, inFile);
fclose(inFile);
if (readSize != benchedSize)
{
DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
free(srcBuffer);
return 13;
}
// Bench
result = BMK_benchMem(srcBuffer, benchedSize, inFileName, cLevel);
// End
free(srcBuffer);
DISPLAY("\n");
return result;
}
static int BMK_syntheticTest(int cLevel, double compressibility)
{
size_t benchedSize = 10000000;
void* srcBuffer = malloc(benchedSize);
int result;
char name[20] = {0};
/* Init */
(void)cLevel;
/* Memory allocation */
if (!srcBuffer)
{
DISPLAY("\nError: not enough memory!\n");
free(srcBuffer);
return 12;
}
/* Fill input buffer */
BMK_datagen(srcBuffer, benchedSize, compressibility, 0);
/* Bench */
#ifdef _MSC_VER
sprintf_s(name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
#else
snprintf (name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
#endif
result = BMK_benchMem(srcBuffer, benchedSize, name, cLevel);
/* End */
free(srcBuffer);
DISPLAY("\n");
return result;
}
int BMK_bench(char** fileNamesTable, unsigned nbFiles, unsigned cLevel)
{
double compressibility = (double)g_compressibilityDefault / 100;
if (nbFiles == 0)
{
BMK_syntheticTest(cLevel, compressibility);
}
else
{
/* Loop for each file */
unsigned fileIdx = 0;
while (fileIdx<nbFiles)
{
BMK_benchOneFile(fileNamesTable[fileIdx], cLevel);
fileIdx++;
}
}
return 0;
}

42
programs/bench.h Executable file
View File

@ -0,0 +1,42 @@
/*
bench.h - Demo program to benchmark open-source compression algorithm
Copyright (C) Yann Collet 2012-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- LZ4 source repository : http://code.google.com/p/lz4/
- LZ4 public forum : https://group.google.com/forum/#!forum/lz4c
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/* Main function */
int BMK_bench(char** fileNamesTable, unsigned nbFiles, unsigned cLevel);
/* Set Parameters */
void BMK_SetNbIterations(int nbLoops);
#if defined (__cplusplus)
}
#endif

320
programs/datagen.c Normal file
View File

@ -0,0 +1,320 @@
/*
datagen.c - compressible data generator test tool
Copyright (C) Yann Collet 2012-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
* Remove Visual warning messages
**************************************/
#define _CRT_SECURE_NO_WARNINGS /* fgets */
/**************************************
* Includes
**************************************/
#include <stdlib.h> /* malloc */
#include <stdio.h> /* fgets, sscanf */
#include <string.h> /* strcmp */
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
* Constants
**************************************/
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "r0"
#endif
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define CDG_SIZE_DEFAULT (64 KB)
#define CDG_SEED_DEFAULT 0
#define CDG_COMPRESSIBILITY_DEFAULT 50
#define PRIME1 2654435761U
#define PRIME2 2246822519U
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
/**************************************
* Local Parameters
**************************************/
static unsigned no_prompt = 0;
static char* programName;
static unsigned displayLevel = 2;
/*********************************************************
* Local Functions
*********************************************************/
#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static unsigned int CDG_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= PRIME1;
rand32 += PRIME2;
rand32 = CDG_rotl32(rand32, 13);
*src = rand32;
return rand32;
}
#define LTSIZE 8192
#define LTMASK (LTSIZE-1)
static const char firstChar = '(';
static const char lastChar = '}';
static void* CDG_createLiteralDistrib(double ld)
{
char* lt = malloc(LTSIZE);
U32 i = 0;
char character = '0';
while (i<LTSIZE)
{
U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
U32 end;
if (weight + i > LTSIZE) weight = LTSIZE-i;
end = i + weight;
while (i < end) lt[i++] = character;
character++;
if (character > lastChar) character = firstChar;
}
return lt;
}
static char CDG_genChar(U32* seed, const void* ltctx)
{
const BYTE* lt = ltctx;
U32 id = CDG_rand(seed) & LTMASK;
return lt[id];
}
#define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767)
#define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 7) ? (CDG_rand(seed) & 15) : (CDG_rand(seed) & 511) + 15)
#define CDG_DICTSIZE (32 KB)
static void CDG_generate(U64 size, U32* seed, double matchProba)
{
BYTE fullbuff[CDG_DICTSIZE + 128 KB + 1];
BYTE* buff = fullbuff + CDG_DICTSIZE;
U64 total=0;
U32 P32 = (U32)(32768 * matchProba);
U32 pos=1;
U32 genBlockSize = 128 KB;
double literalDistrib = 0.13;
void* ldctx = CDG_createLiteralDistrib(literalDistrib);
/* Build initial prefix */
fullbuff[0] = CDG_genChar(seed, ldctx);
while (pos<32 KB)
{
/* Select : Literal (char) or Match (within 32K) */
if (CDG_RAND15BITS < P32)
{
/* Copy (within 64K) */
U32 d;
int ref;
int length = CDG_RANDLENGTH + 4;
U32 offset = CDG_RAND15BITS + 1;
if (offset > pos) offset = pos;
ref = pos - offset;
d = pos + length;
while (pos < d) fullbuff[pos++] = fullbuff[ref++];
}
else
{
/* Literal (noise) */
U32 d = pos + CDG_RANDLENGTH;
while (pos < d) fullbuff[pos++] = CDG_genChar(seed, ldctx);
}
}
/* Generate compressible data */
pos = 0;
while (total < size)
{
if (size-total < 128 KB) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
pos = 0;
while (pos<genBlockSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (CDG_RAND15BITS < P32)
{
/* Copy (within 64K) */
int ref;
U32 d;
int length = CDG_RANDLENGTH + 4;
U32 offset = CDG_RAND15BITS + 1;
if (pos + length > genBlockSize ) length = genBlockSize - pos;
ref = pos - offset;
d = pos + length;
while (pos < d) buff[pos++] = buff[ref++];
}
else
{
/* Literal (noise) */
U32 d;
int length = CDG_RANDLENGTH;
if (pos + length > genBlockSize) length = genBlockSize - pos;
d = pos + length;
while (pos < d) buff[pos++] = CDG_genChar(seed, ldctx);
}
}
/* output datagen */
pos=0;
for (;pos+512<=genBlockSize;pos+=512)
printf("%512.512s", buff+pos);
for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
/* Regenerate prefix */
memcpy(fullbuff, buff + 96 KB, 32 KB);
}
}
/*********************************************************
* Command line
*********************************************************/
static int CDG_usage(void)
{
DISPLAY( "Compressible data generator\n");
DISPLAY( "Usage :\n");
DISPLAY( " %s [size] [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, char** argv)
{
int argNb;
int proba = CDG_COMPRESSIBILITY_DEFAULT;
U64 size = CDG_SIZE_DEFAULT;
U32 seed = CDG_SEED_DEFAULT;
/* Check command line */
programName = argv[0];
for(argNb=1; argNb<argc; argNb++)
{
char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (*argument=='-')
{
if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
argument++;
while (*argument!=0)
{
switch(*argument)
{
case 'h':
return CDG_usage();
case 'g':
argument++;
size=0;
while ((*argument>='0') && (*argument<='9'))
{
size *= 10;
size += *argument - '0';
argument++;
}
if (*argument=='K') { size <<= 10; argument++; }
if (*argument=='M') { size <<= 20; argument++; }
if (*argument=='G') { size <<= 30; argument++; }
if (*argument=='B') { argument++; }
break;
case 's':
argument++;
seed=0;
while ((*argument>='0') && (*argument<='9'))
{
seed *= 10;
seed += *argument - '0';
argument++;
}
break;
case 'p':
argument++;
proba=0;
while ((*argument>='0') && (*argument<='9'))
{
proba *= 10;
proba += *argument - '0';
argument++;
}
if (proba<0) proba=0;
if (proba>100) proba=100;
break;
case 'v':
displayLevel = 4;
argument++;
break;
default: ;
}
}
}
}
DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
DISPLAYLEVEL(3, "Seed = %u \n", seed);
if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
CDG_generate(size, &seed, ((double)proba) / 100);
return 0;
}

389
programs/fileio.c Normal file
View File

@ -0,0 +1,389 @@
/*
fileio.c - File i/o handler
Copyright (C) Yann Collet 2013-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/*
Note : this is stand-alone program.
It is not part of ZSTD compression library, it is a user program of ZSTD library.
The license of ZSTD library is BSD.
The license of this file is GPLv2.
*/
/**************************************
* Compiler Options
**************************************/
/* Disable some Visual warning messages */
#ifdef _MSC_VER
# define _CRT_SECURE_NO_WARNINGS
# define _CRT_SECURE_NO_DEPRECATE /* VS2005 */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#define _FILE_OFFSET_BITS 64 /* Large file support on 32-bits unix */
#define _POSIX_SOURCE 1 /* enable fileno() within <stdio.h> on unix */
/**************************************
* Includes
**************************************/
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strcmp, strlen */
#include <time.h> /* clock */
#include "fileio.h"
#include "zstd_static.h"
/**************************************
* OS-specific Includes
**************************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _isatty */
# ifdef __MINGW32__
int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
# endif
# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#else
# include <unistd.h> /* isatty */
# define SET_BINARY_MODE(file)
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
#endif
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
* Constants
**************************************/
#define KB *(1U<<10)
#define MB *(1U<<20)
#define GB *(1U<<30)
#define _1BIT 0x01
#define _2BITS 0x03
#define _3BITS 0x07
#define _4BITS 0x0F
#define _6BITS 0x3F
#define _8BITS 0xFF
#define BIT6 0x40
#define BIT7 0x80
static const unsigned FIO_magicNumber = 0x183E2308;
static const unsigned FIO_maxBlockSizeID = 0xB; /* => 2MB block */
static const unsigned FIO_blockHeaderSize = 3;
#define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */
#define FSE_CHECKSUM_SEED 0
#define CACHELINE 64
/**************************************
* Complex types
**************************************/
typedef enum { bt_compressed, bt_raw, bt_rle, bt_crc } bType_t;
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stdout); } }
static const unsigned refreshRate = 150;
static clock_t g_time = 0;
/**************************************
* Local Parameters
**************************************/
static U32 g_overwrite = 0;
void FIO_overwriteMode(void) { g_overwrite=1; }
void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
/**************************************
* Exceptions
**************************************/
#define DEBUG 0
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/**************************************
* Functions
**************************************/
static unsigned FIO_GetMilliSpan(clock_t nPrevious)
{
clock_t nCurrent = clock();
unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
return nSpan;
}
static void FIO_getFileHandles(FILE** pfinput, FILE** pfoutput, const char* input_filename, const char* output_filename)
{
if (!strcmp (input_filename, stdinmark))
{
DISPLAYLEVEL(4,"Using stdin for input\n");
*pfinput = stdin;
SET_BINARY_MODE(stdin);
}
else
{
*pfinput = fopen(input_filename, "rb");
}
if (!strcmp (output_filename, stdoutmark))
{
DISPLAYLEVEL(4,"Using stdout for output\n");
*pfoutput = stdout;
SET_BINARY_MODE(stdout);
}
else
{
/* Check if destination file already exists */
*pfoutput=0;
if (strcmp(output_filename,nulmark)) *pfoutput = fopen( output_filename, "rb" );
if (*pfoutput!=0)
{
fclose(*pfoutput);
if (!g_overwrite)
{
char ch;
if (g_displayLevel <= 1) /* No interaction possible */
EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename);
DISPLAYLEVEL(2, "Overwrite ? (Y/N) : ");
ch = (char)getchar();
if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
}
}
*pfoutput = fopen( output_filename, "wb" );
}
if ( *pfinput==0 ) EXM_THROW(12, "Pb opening %s", input_filename);
if ( *pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename);
}
unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename)
{
U64 filesize = 0;
U64 compressedfilesize = 0;
BYTE* inBuff;
BYTE* inSlot;
BYTE* inEnd;
BYTE* outBuff;
size_t blockSize = 128 KB;
size_t inBuffSize = 4 * blockSize;
size_t outBuffSize = ZSTD_compressBound(blockSize);
FILE* finput;
FILE* foutput;
size_t sizeCheck, cSize;
ZSTD_cctx_t ctx;
/* Init */
FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
ctx = ZSTD_createCCtx();
/* Allocate Memory */
inBuff = malloc(inBuffSize);
outBuff = malloc(outBuffSize);
if (!inBuff || !outBuff) EXM_THROW(21, "Allocation error : not enough memory");
inSlot = inBuff;
inEnd = inBuff + inBuffSize;
/* Write Frame Header */
cSize = ZSTD_compressBegin(ctx, outBuff, outBuffSize);
if (ZSTD_isError(cSize)) EXM_THROW(22, "Compression error : cannot create frame header");
sizeCheck = fwrite(outBuff, 1, cSize, foutput);
if (sizeCheck!=cSize) EXM_THROW(23, "Write error : cannot write header");
compressedfilesize += cSize;
/* Main compression loop */
while (1)
{
size_t inSize;
/* Fill input Buffer */
if (inSlot + blockSize > inEnd) inSlot = inBuff;
inSize = fread(inSlot, (size_t)1, blockSize, finput);
if (inSize==0) break;
filesize += inSize;
DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20));
/* Compress Block */
cSize = ZSTD_compressContinue(ctx, outBuff, outBuffSize, inSlot, inSize);
if (ZSTD_isError(cSize))
EXM_THROW(24, "Compression error : %s ", ZSTD_getErrorName(cSize));
/* Write cBlock */
sizeCheck = fwrite(outBuff, 1, cSize, foutput);
if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block");
compressedfilesize += cSize;
inSlot += inSize;
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
}
/* End of Frame */
cSize = ZSTD_compressEnd(ctx, outBuff, outBuffSize);
if (ZSTD_isError(cSize)) EXM_THROW(26, "Compression error : cannot create frame end");
sizeCheck = fwrite(outBuff, 1, cSize, foutput);
if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end");
compressedfilesize += cSize;
/* Status */
DISPLAYLEVEL(2, "\r%79s\r", "");
DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
(unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
/* clean */
free(inBuff);
free(outBuff);
fclose(finput);
fclose(foutput);
ZSTD_freeCCtx(ctx);
return compressedfilesize;
}
#define MAXHEADERSIZE FIO_FRAMEHEADERSIZE+3
unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename)
{
FILE* finput, *foutput;
BYTE* inBuff;
size_t inBuffSize;
BYTE* outBuff, *op, *oend;
size_t outBuffSize;
U32 blockSize = 128 KB;
U32 wNbBlocks = 4;
U64 filesize = 0;
BYTE* header[MAXHEADERSIZE];
ZSTD_cctx_t dctx;
size_t toRead;
size_t sizeCheck;
/* Init */
FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
dctx = ZSTD_createDCtx();
toRead = ZSTD_getNextcBlockSize(dctx);
if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header");
/* check header */
sizeCheck = fread(header, (size_t)1, toRead, finput);
if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
sizeCheck = ZSTD_decompressContinue(dctx, NULL, 0, header, toRead); // Decode frame header
if (ZSTD_isError(sizeCheck)) EXM_THROW(32, "Error decoding header");
/* Here later : blockSize determination */
/* Allocate Memory */
inBuffSize = blockSize + FIO_blockHeaderSize;
inBuff = malloc(inBuffSize);
outBuffSize = wNbBlocks * blockSize;
outBuff = malloc(outBuffSize);
op = outBuff;
oend = outBuff + outBuffSize;
if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory");
/* Main decompression Loop */
toRead = ZSTD_getNextcBlockSize(dctx);
while (toRead)
{
size_t readSize, decodedSize;
/* Fill input buffer */
readSize = fread(inBuff, 1, toRead, finput);
if (readSize != toRead)
EXM_THROW(34, "Read error");
/* Decode block */
decodedSize = ZSTD_decompressContinue(dctx, op, oend-op, inBuff, readSize);
/* Write block */
sizeCheck = fwrite(op, 1, decodedSize, foutput);
if (sizeCheck != decodedSize) EXM_THROW(35, "Write error : unable to write data block to destination file");
filesize += decodedSize;
/* prepare for next Block */
op += decodedSize;
if (op==oend) op = outBuff;
toRead = ZSTD_getNextcBlockSize(dctx);
DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(filesize>>20) );
}
DISPLAYLEVEL(2, "\r%79s\r", "");
DISPLAYLEVEL(2,"Decoded %llu bytes \n", (long long unsigned)filesize);
/* clean */
free(inBuff);
free(outBuff);
fclose(finput);
fclose(foutput);
ZSTD_freeDCtx(dctx);
return filesize;
}

69
programs/fileio.h Executable file
View File

@ -0,0 +1,69 @@
/*
fileio.h - file i/o handler
Copyright (C) Yann Collet 2013-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/**************************************
* Special i/o constants
**************************************/
#define nullString "null"
#define stdinmark "-"
#define stdoutmark "-"
#ifdef _WIN32
# define nulmark "nul"
#else
# define nulmark "/dev/null"
#endif
/**************************************
* Parameters
**************************************/
void FIO_overwriteMode(void);
void FIO_setNotificationLevel(unsigned level);
/**************************************
* Stream/File functions
**************************************/
unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename);
unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename);
/*
FIO_compressFilename :
result : size of compressed file
FIO_decompressFilename :
result : size of regenerated file
*/
#if defined (__cplusplus)
}
#endif

682
programs/fullbench.c Normal file
View File

@ -0,0 +1,682 @@
/*
fullbench.c - Detailed bench program for zstd
Copyright (C) Yann Collet 2014-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
* Compiler Options
**************************************/
/* Disable some Visual warning messages */
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */
/* Unix Large Files support (>4GB) */
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
# define _LARGEFILE_SOURCE
# define _FILE_OFFSET_BITS 64
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
# define _LARGEFILE64_SOURCE
#endif
/* S_ISREG & gettimeofday() are not supported by MSVC */
#if defined(_MSC_VER) || defined(_WIN32)
# define BMK_LEGACY_TIMER 1
#endif
/**************************************
* Includes
**************************************/
#include <stdlib.h> /* malloc */
#include <stdio.h> /* fprintf, fopen, ftello64 */
#include <sys/types.h> /* stat64 */
#include <sys/stat.h> /* stat64 */
#include <string.h> /* strcmp */
/* Use ftime() if gettimeofday() is not available on your target */
#if defined(BMK_LEGACY_TIMER)
# include <sys/timeb.h> /* timeb, ftime */
#else
# include <sys/time.h> /* gettimeofday */
#endif
#include "zstd.h"
#include "fse_static.h"
/**************************************
* Compiler Options
**************************************/
/* S_ISREG & gettimeofday() are not supported by MSVC */
#if !defined(S_ISREG)
# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#endif
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
* Constants
**************************************/
#define PROGRAM_DESCRIPTION "zStandard speed analyzer"
#ifndef ZSTD_VERSION
# define ZSTD_VERSION ""
#endif
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION, (int)(sizeof(void*)*8), AUTHOR, __DATE__
#define KB *(1<<10)
#define MB *(1<<20)
#define NBLOOPS 6
#define TIMELOOP 2500
#define KNUTH 2654435761U
#define MAX_MEM (1984 MB)
#define DEFAULT_CHUNKSIZE (4<<20)
static double g_compressibilityDefault = 0.50;
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
static const size_t sampleSize = 10000000;
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
/**************************************
* Benchmark Parameters
**************************************/
static int nbIterations = NBLOOPS;
void BMK_SetNbIterations(int nbLoops)
{
nbIterations = nbLoops;
DISPLAY("- %i iterations -\n", nbIterations);
}
/*********************************************************
* Private functions
*********************************************************/
#if defined(BMK_LEGACY_TIMER)
static int BMK_GetMilliStart(void)
{
/* Based on Legacy ftime()
* Rolls over every ~ 12.1 days (0x100000/24/60/60)
* Use GetMilliSpan to correct for rollover */
struct timeb tb;
int nCount;
ftime( &tb );
nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
return nCount;
}
#else
static int BMK_GetMilliStart(void)
{
/* Based on newer gettimeofday()
* Use GetMilliSpan to correct for rollover */
struct timeval tv;
int nCount;
gettimeofday(&tv, NULL);
nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
return nCount;
}
#endif
static int BMK_GetMilliSpan( int nTimeStart )
{
int nSpan = BMK_GetMilliStart() - nTimeStart;
if ( nSpan < 0 )
nSpan += 0x100000 * 1000;
return nSpan;
}
static size_t BMK_findMaxMem(U64 requiredMem)
{
size_t step = 64 MB;
BYTE* testmem=NULL;
requiredMem = (((requiredMem >> 26) + 1) << 26);
if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
requiredMem += 2*step;
while (!testmem)
{
requiredMem -= step;
testmem = (BYTE*) malloc ((size_t)requiredMem);
}
free (testmem);
return (size_t) (requiredMem - step);
}
static U64 BMK_GetFileSize(char* infilename)
{
int r;
#if defined(_MSC_VER)
struct _stat64 statbuf;
r = _stat64(infilename, &statbuf);
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
#endif
if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
return (U64)statbuf.st_size;
}
static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
static U32 BMK_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= prime1;
rand32 += prime2;
rand32 = BMK_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 9;
}
#define BMK_RAND15BITS ( BMK_rand(&seed) & 0x7FFF)
#define BMK_RANDLENGTH ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
#define BMK_RANDCHAR (BYTE)((BMK_rand(&seed) & 63) + '0')
static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
{
BYTE* BBuffer = (BYTE*)buffer;
unsigned pos = 0;
U32 P32 = (U32)(32768 * proba);
/* First Byte */
BBuffer[pos++] = BMK_RANDCHAR;
while (pos < bufferSize)
{
/* Select : Literal (noise) or copy (within 64K) */
if (BMK_RAND15BITS < P32)
{
/* Match */
size_t match, end;
unsigned length = BMK_RANDLENGTH + 4;
unsigned offset = BMK_RAND15BITS + 1;
if (offset > pos) offset = pos;
match = pos - offset;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BBuffer[match++];
}
else
{
/* Literal */
size_t end;
unsigned length = BMK_RANDLENGTH;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
}
}
}
/*********************************************************
* Benchmark wrappers
*********************************************************/
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
typedef struct
{
blockType_t blockType;
U32 unusedBits;
U32 origSize;
} blockProperties_t;
static size_t g_cSize = 0;
extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
extern size_t ZSTD_decodeLiteralsBlock(void* ctx, void* dst, size_t maxDstSize, const BYTE** litPtr, const void* src, size_t srcSize);
extern size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr, void* DTableLL, void* DTableML, void* DTableOffb, const void* src, size_t srcSize);
size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
(void)buff2;
return ZSTD_compress(dst, dstSize, src, srcSize);
}
size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
(void)src; (void)srcSize;
return ZSTD_decompress(dst, dstSize, buff2, g_cSize);
}
size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
U32 ctx[1<<12];
const BYTE* ll;
(void)src; (void)srcSize;
ZSTD_decodeLiteralsBlock(ctx, dst, dstSize, &ll, buff2, g_cSize);
return (const BYTE*)dst + dstSize - ll;
}
size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
U32 DTableML[1<<11], DTableLL[1<<10], DTableOffb[1<<9];
const BYTE* dumps;
size_t lastllSize;
(void)src; (void)srcSize; (void)dst; (void)dstSize;
return ZSTD_decodeSeqHeaders(&lastllSize, &dumps, DTableLL, DTableML, DTableOffb, buff2, g_cSize);
}
size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
U32 i;
size_t total = 0;
BYTE* data = buff2;
(void)dst; (void)dstSize; (void)src;
for (i=0; i < srcSize; i++)
{
U32 b = data[i];
total += b;
if (b==0) total = 0; // 825
//if (!b) total = 0; // 825
//total = b ? total : 0; // 622
//total *= !!b; // 465
//total &= -!b; // 622
}
return total;
}
size_t local_decodeLiteralsForward(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
{
(void)src; (void)srcSize;
return FSE_decompress(dst, dstSize, buff2, g_cSize);
}
/*********************************************************
* Bench functions
*********************************************************/
size_t benchMem(void* src, size_t srcSize, U32 benchNb)
{
BYTE* dstBuff;
size_t dstBuffSize;
BYTE* buff2;
int loopNb;
const char* benchName;
size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
double bestTime = 100000000.;
size_t errorCode = 0;
// Declaration
switch(benchNb)
{
case 1:
benchFunction = local_ZSTD_compress; benchName = "ZSTD_compress";
break;
case 11:
benchFunction = local_ZSTD_decompress; benchName = "ZSTD_decompress";
break;
case 31:
benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "ZSTD_decodeLiteralsBlock";
break;
case 32:
benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
break;
case 101:
benchFunction = local_conditionalNull; benchName = "conditionalNull";
break;
case 102:
benchFunction = local_decodeLiteralsForward; benchName = "ZSTD_decodeLiteralsForward";
break;
default :
return 0;
}
/* Allocation */
dstBuffSize = srcSize + 512;
dstBuff = malloc(dstBuffSize);
buff2 = malloc(dstBuffSize);
if ((!dstBuff) || (!buff2))
{
DISPLAY("\nError: not enough memory!\n");
free(dstBuff); free(buff2);
return 12;
}
/* Preparation */
switch(benchNb)
{
case 11:
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize);
break;
case 31: // ZSTD_decodeLiteralsBlock
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
g_cSize = ZSTD_getcBlockSize(dstBuff+7, dstBuffSize, &bp) + 3;
memcpy(buff2, dstBuff+7, g_cSize);
//srcSize = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize); // real speed
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block
break;
}
case 32: // ZSTD_decodeSeqHeaders
{
blockProperties_t bp;
const BYTE* ip = dstBuff;
const BYTE* iend;
size_t blockSize;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
ip += 4; // Jump magic Number
blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); // Get first block compressed size
iend = ip + 3 + blockSize; // Get end of first block
ip += 3; // jump first block header
ip += ZSTD_getcBlockSize(ip, iend - ip, &bp) + 3; // jump literal sub block and its header
g_cSize = iend-ip;
memcpy(buff2, ip, g_cSize); // copy rest of block (starting with SeqHeader)
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // speed relative to block
break;
}
/* test functions */
case 101: // conditionalNull
{
size_t i;
U32 seed = (U32)srcSize;
for (i=0; i<srcSize; i++)
buff2[i] = (BYTE)(BMK_rand(&seed) & 15);
break;
}
case 102: //
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
g_cSize = ZSTD_getcBlockSize(dstBuff+7, dstBuffSize, &bp);
memcpy(buff2, dstBuff+10, g_cSize);
//srcSize = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize); // real speed
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block
break;
}
default : ;
}
for (loopNb = 1; loopNb <= nbIterations; loopNb++)
{
double averageTime;
int milliTime;
U32 nbRounds=0;
DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
{ size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; } /* warming up memory */
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
errorCode = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
if (ZSTD_isError(errorCode)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(errorCode)); exit(1); }
nbRounds++;
}
milliTime = BMK_GetMilliSpan(milliTime);
averageTime = (double)milliTime / nbRounds;
if (averageTime < bestTime) bestTime = averageTime;
DISPLAY("%2i- %-30.30s : %7.1f MB/s (%9u)\r", loopNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
}
DISPLAY("%2u- %-30.30s : %7.1f MB/s (%9u)\n", benchNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
free(dstBuff);
free(buff2);
return 0;
}
int benchSample(U32 benchNb)
{
char* origBuff;
size_t benchedSize = sampleSize;
const char* name = "Sample50";
/* Allocation */
origBuff = (char*) malloc((size_t)benchedSize);
if(!origBuff)
{
DISPLAY("\nError: not enough memory!\n");
return 12;
}
/* Fill buffer */
BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0);
/* bench */
DISPLAY("\r%79s\r", "");
DISPLAY(" %s : \n", name);
if (benchNb)
benchMem(origBuff, benchedSize, benchNb);
else
for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
free(origBuff);
return 0;
}
int benchFiles(char** fileNamesTable, int nbFiles, U32 benchNb)
{
int fileIdx=0;
/* Loop for each file */
while (fileIdx<nbFiles)
{
FILE* inFile;
char* inFileName;
U64 inFileSize;
size_t benchedSize;
size_t readSize;
char* origBuff;
/* Check file existence */
inFileName = fileNamesTable[fileIdx++];
inFile = fopen( inFileName, "rb" );
if (inFile==NULL)
{
DISPLAY( "Pb opening %s\n", inFileName);
return 11;
}
// Memory allocation & restrictions
inFileSize = BMK_GetFileSize(inFileName);
benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)
{
DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
}
// Alloc
origBuff = (char*) malloc((size_t)benchedSize);
if(!origBuff)
{
DISPLAY("\nError: not enough memory!\n");
fclose(inFile);
return 12;
}
// Fill input buffer
DISPLAY("Loading %s... \r", inFileName);
readSize = fread(origBuff, 1, benchedSize, inFile);
fclose(inFile);
if(readSize != benchedSize)
{
DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
free(origBuff);
return 13;
}
// bench
DISPLAY("\r%79s\r", "");
DISPLAY(" %s : \n", inFileName);
if (benchNb)
benchMem(origBuff, benchedSize, benchNb);
else
for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
}
return 0;
}
int usage(char* exename)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename);
DISPLAY( "Arguments :\n");
DISPLAY( " -H/-h : Help (this text + advanced options)\n");
return 0;
}
int usage_advanced(void)
{
DISPLAY( "\nAdvanced options :\n");
DISPLAY( " -b# : test only function # \n");
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
return 0;
}
int badusage(char* exename)
{
DISPLAY("Wrong parameters\n");
usage(exename);
return 0;
}
int main(int argc, char** argv)
{
int i,
filenamesStart=0,
result;
char* exename=argv[0];
char* input_filename=0;
U32 benchNb = 0, main_pause = 0;
// Welcome message
DISPLAY(WELCOME_MESSAGE);
if (argc<1) { badusage(exename); return 1; }
for(i=1; i<argc; i++)
{
char* argument = argv[i];
if(!argument) continue; // Protection if argument empty
// Decode command (note : aggregated commands are allowed)
if (argument[0]=='-')
{
while (argument[1]!=0)
{
argument ++;
switch(argument[0])
{
// Display help on usage
case 'h' :
case 'H': usage(exename); usage_advanced(); return 0;
// Pause at the end (hidden option)
case 'p': main_pause = 1; break;
// Select specific bench algorithm only
case 'b':
benchNb = 0;
while ((argument[1]>= '0') && (argument[1]<= '9'))
{
benchNb *= 10;
benchNb += argument[1] - '0';
argument++;
}
break;
// Modify Nb Iterations
case 'i':
if ((argument[1] >='1') && (argument[1] <='9'))
{
int iters = argument[1] - '0';
BMK_SetNbIterations(iters);
argument++;
}
break;
// Unknown command
default : badusage(exename); return 1;
}
}
continue;
}
// first provided filename is input
if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
}
if (filenamesStart==0)
result = benchSample(benchNb);
else result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb);
if (main_pause) { printf("press enter...\n"); getchar(); }
return result;
}

494
programs/fuzzer.c Normal file
View File

@ -0,0 +1,494 @@
/*
Fuzzer test tool for zstd
Copyright (C) Yann Collet 2014-2105
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
* Compiler specific
**************************************/
#ifdef _MSC_VER /* Visual Studio */
# define _CRT_SECURE_NO_WARNINGS /* fgets */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */
#endif
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
#endif
/**************************************
* Includes
**************************************/
#include <stdlib.h> /* free */
#include <stdio.h> /* fgets, sscanf */
#include <sys/timeb.h> /* timeb */
#include <string.h> /* strcmp */
#include "zstd_static.h"
#include "xxhash.h" /* XXH64 */
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
Constants
**************************************/
#ifndef ZSTD_VERSION
# define ZSTD_VERSION ""
#endif
#define KB *(1U<<10)
#define MB *(1U<<20)
#define GB *(1U<<30)
static const U32 nbTestsDefault = 32 KB;
#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
#define FUZ_COMPRESSIBILITY_DEFAULT 50
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
/**************************************
* Display Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static U32 g_displayLevel = 2;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stdout); } }
static const U32 g_refreshRate = 150;
static U32 g_time = 0;
/*********************************************************
* Fuzzer functions
*********************************************************/
static U32 FUZ_GetMilliStart(void)
{
struct timeb tb;
U32 nCount;
ftime( &tb );
nCount = (U32) (((tb.time & 0xFFFFF) * 1000) + tb.millitm);
return nCount;
}
static U32 FUZ_GetMilliSpan(U32 nTimeStart)
{
U32 nCurrent = FUZ_GetMilliStart();
U32 nSpan = nCurrent - nTimeStart;
if (nTimeStart > nCurrent)
nSpan += 0x100000 * 1000;
return nSpan;
}
# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
unsigned int FUZ_rand(unsigned int* src)
{
U32 rand32 = *src;
rand32 *= prime1;
rand32 += prime2;
rand32 = FUZ_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
#define FUZ_RAND15BITS (FUZ_rand(seed) & 0x7FFF)
#define FUZ_RANDLENGTH ( (FUZ_rand(seed) & 3) ? (FUZ_rand(seed) % 15) : (FUZ_rand(seed) % 510) + 15)
static void FUZ_generateSynthetic(void* buffer, size_t bufferSize, double proba, U32* seed)
{
BYTE* BBuffer = (BYTE*)buffer;
unsigned pos = 0;
U32 P32 = (U32)(32768 * proba);
// First Byte
BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
while (pos < bufferSize)
{
// Select : Literal (noise) or copy (within 64K)
if (FUZ_RAND15BITS < P32)
{
// Copy (within 64K)
size_t match, end;
size_t length = FUZ_RANDLENGTH + 4;
size_t offset = FUZ_RAND15BITS + 1;
if (offset > pos) offset = pos;
if (pos + length > bufferSize) length = bufferSize - pos;
match = pos - offset;
end = pos + length;
while (pos < end) BBuffer[pos++] = BBuffer[match++];
}
else
{
// Literal (noise)
size_t end;
size_t length = FUZ_RANDLENGTH;
if (pos + length > bufferSize) length = bufferSize - pos;
end = pos + length;
while (pos < end) BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
}
}
}
/*
static unsigned FUZ_highbit(U32 v32)
{
unsigned nbBits = 0;
if (v32==0) return 0;
while (v32)
{
v32 >>= 1;
nbBits ++;
}
return nbBits;
}
*/
static int basicUnitTests(U32 seed, double compressibility)
{
int testResult = 0;
void* CNBuffer;
void* compressedBuffer;
void* decodedBuffer;
U32 randState = seed;
size_t result, cSize;
U32 testNb=0;
// Create compressible test buffer
CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH));
decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
FUZ_generateSynthetic(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState);
// Basic tests
DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH);
if (ZSTD_isError(result)) goto _output_error;
cSize = result;
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
if (ZSTD_isError(result)) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
{
size_t i;
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
for (i=0; i<COMPRESSIBLE_NOISE_LENGTH; i++)
{
if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
}
DISPLAYLEVEL(4, "OK \n");
}
DISPLAYLEVEL(4, "test%3i : decompress with 1 missing byte : ", testNb++);
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize-1);
if (!ZSTD_isError(result)) goto _output_error;
if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : decompress with 1 too much byte : ", testNb++);
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize+1);
if (!ZSTD_isError(result)) goto _output_error;
if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
/* Decompression defense tests */
DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++);
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 3);
if (!ZSTD_isError(result)) goto _output_error;
if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : Check magic Number : ", testNb++);
((char*)(CNBuffer))[0] = 1;
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 4);
if (!ZSTD_isError(result)) goto _output_error;
if (result != (size_t)-ZSTD_ERROR_wrongMagicNumber) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
_end:
free(CNBuffer);
free(compressedBuffer);
free(decodedBuffer);
return testResult;
_output_error:
testResult = 1;
DISPLAY("Error detected in Unit tests ! \n");
goto _end;
}
static size_t findDiff(const void* buf1, const void* buf2, size_t max)
{
const BYTE* b1 = buf1;
const BYTE* b2 = buf2;
size_t i;
for (i=0; i<max; i++)
{
if (b1[i] != b2[i]) break;
}
return i;
}
# define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; }
static const U32 maxSrcLog = 23;
static const U32 maxSampleLog = 22;
int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
{
BYTE* srcBuffer;
BYTE* cBuffer;
BYTE* dstBuffer;
size_t srcBufferSize = (size_t)1<<maxSrcLog;
size_t dstBufferSize = (size_t)1<<maxSampleLog;
size_t cBufferSize = ZSTD_compressBound(dstBufferSize);
U32 result = 0;
U32 testNb = 0;
U32 coreSeed = seed, lseed = 0;
(void)startTest; (void)compressibility;
/* allocation */
srcBuffer = malloc (srcBufferSize);
dstBuffer = malloc (dstBufferSize);
cBuffer = malloc (cBufferSize);
CHECK (!srcBuffer || !dstBuffer || !cBuffer, "Not enough memory, fuzzer tests cancelled");
/* Create initial sample */
FUZ_generateSynthetic(srcBuffer, srcBufferSize, 0.50, &coreSeed);
/* catch up testNb */
for (testNb=0; testNb < startTest; testNb++)
FUZ_rand(&coreSeed);
/* test loop */
for (testNb=startTest; testNb < nbTests; testNb++)
{
size_t sampleSize, sampleStart;
size_t cSize, dSize, dSupSize;
U32 sampleSizeLog;
U64 crcOrig, crcDest;
/* init */
DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests);
FUZ_rand(&coreSeed);
lseed = coreSeed ^ prime1;
sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
sampleSize = (size_t)1<<sampleSizeLog;
sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
crcOrig = XXH64(srcBuffer + sampleStart, sampleSize, 0);
/* compression tests*/
cSize = ZSTD_compress(cBuffer, cBufferSize, srcBuffer + sampleStart, sampleSize);
CHECK(ZSTD_isError(cSize), "ZSTD_compress failed");
/* decompression tests*/
dSupSize = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize);
CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s)", ZSTD_getErrorName(dSize));
crcDest = XXH64(dstBuffer, sampleSize, 0);
CHECK(crcOrig != crcDest, "dstBuffer corrupted (pos %u / %u)", (U32)findDiff(srcBuffer+sampleStart, dstBuffer, sampleSize), (U32)sampleSize);
}
DISPLAY("\rAll fuzzer tests completed \n");
_cleanup:
free(srcBuffer);
free(cBuffer);
free(dstBuffer);
return result;
_output_error:
result = 1;
goto _cleanup;
}
/*********************************************************
* Command line
*********************************************************/
int FUZ_usage(char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault);
DISPLAY( " -s# : Select seed (default:prompt user)\n");
DISPLAY( " -t# : Select starting test number (default:0)\n");
DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
DISPLAY( " -v : verbose\n");
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, char** argv)
{
U32 seed=0;
int seedset=0;
int argNb;
int nbTests = nbTestsDefault;
int testNb = 0;
int proba = FUZ_COMPRESSIBILITY_DEFAULT;
int result=0;
U32 mainPause = 0;
char* programName;
/* Check command line */
programName = argv[0];
for(argNb=1; argNb<argc; argNb++)
{
char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (argument[0]=='-')
{
argument++;
while (*argument!=0)
{
switch(*argument)
{
case 'h':
return FUZ_usage(programName);
case 'v':
argument++;
g_displayLevel=4;
break;
case 'q':
argument++;
g_displayLevel--;
break;
case 'p': /* pause at the end */
argument++;
mainPause = 1;
break;
case 'i':
argument++;
nbTests=0;
while ((*argument>='0') && (*argument<='9'))
{
nbTests *= 10;
nbTests += *argument - '0';
argument++;
}
break;
case 's':
argument++;
seed=0;
seedset=1;
while ((*argument>='0') && (*argument<='9'))
{
seed *= 10;
seed += *argument - '0';
argument++;
}
break;
case 't':
argument++;
testNb=0;
while ((*argument>='0') && (*argument<='9'))
{
testNb *= 10;
testNb += *argument - '0';
argument++;
}
break;
case 'P': /* compressibility % */
argument++;
proba=0;
while ((*argument>='0') && (*argument<='9'))
{
proba *= 10;
proba += *argument - '0';
argument++;
}
if (proba<0) proba=0;
if (proba>100) proba=100;
break;
default:
return FUZ_usage(programName);
}
}
}
}
/* Get Seed */
DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
if (!seedset) seed = FUZ_GetMilliStart() % 10000;
DISPLAY("Seed = %u\n", seed);
if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
if (nbTests<=0) nbTests=1;
if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */
if (!result)
result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
if (mainPause)
{
DISPLAY("Press Enter \n");
getchar();
}
return result;
}

928
programs/xxhash.c Normal file
View File

@ -0,0 +1,928 @@
/*
xxHash - Fast Hash algorithm
Copyright (C) 2012-2014, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
- public discussion board : https://groups.google.com/forum/#!forum/lz4c
*/
//**************************************
// Tuning parameters
//**************************************
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_USE_UNALIGNED_ACCESS 1
#endif
// XXH_ACCEPT_NULL_INPUT_POINTER :
// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
// This option has a very small performance cost (only measurable on small inputs).
// By default, this option is disabled. To enable it, uncomment below define :
// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
// XXH_FORCE_NATIVE_FORMAT :
// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
// Results are therefore identical for little-endian and big-endian CPU.
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
// Should endian-independance be of no importance for your application, you may set the #define below to 1.
// It will improve speed for Big-endian CPU.
// This option has no impact on Little_Endian CPU.
#define XXH_FORCE_NATIVE_FORMAT 0
//**************************************
// Compiler Specific Options
//**************************************
// Disable some Visual warning messages
#ifdef _MSC_VER // Visual Studio
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
#endif
#ifdef _MSC_VER // Visual Studio
# define FORCE_INLINE static __forceinline
#else
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
#endif
//**************************************
// Includes & Memory related functions
//**************************************
#include "xxhash.h"
// Modify the local functions below should you wish to use some other memory routines
// for malloc(), free()
#include <stdlib.h>
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
// for memcpy()
#include <string.h>
static void* XXH_memcpy(void* dest, const void* src, size_t size)
{
return memcpy(dest,src,size);
}
//**************************************
// Basic Types
//**************************************
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
# define _PACKED __attribute__ ((packed))
#else
# define _PACKED
#endif
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# ifdef __IBMC__
# pragma pack(1)
# else
# pragma pack(push, 1)
# endif
#endif
typedef struct _U32_S
{
U32 v;
} _PACKED U32_S;
typedef struct _U64_S
{
U64 v;
} _PACKED U64_S;
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# pragma pack(pop)
#endif
#define A32(x) (((U32_S *)(x))->v)
#define A64(x) (((U64_S *)(x))->v)
//***************************************
// Compiler-specific Functions and Macros
//***************************************
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER) // Visual Studio
# define XXH_swap32 _byteswap_ulong
# define XXH_swap64 _byteswap_uint64
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
# define XXH_swap64 __builtin_bswap64
#else
static inline U32 XXH_swap32 (U32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
static inline U64 XXH_swap64 (U64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
//**************************************
// Constants
//**************************************
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
#define PRIME64_1 11400714785074694791ULL
#define PRIME64_2 14029467366897019727ULL
#define PRIME64_3 1609587929392839161ULL
#define PRIME64_4 9650029242287828579ULL
#define PRIME64_5 2870177450012600261ULL
//**************************************
// Architecture Macros
//**************************************
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
static const int one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
#endif
//**************************************
// Macros
//**************************************
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
//****************************
// Memory reads
//****************************
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
else
return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
}
FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
{
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
}
FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
else
return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
}
FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
{
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
}
//****************************
// Simple Hash Functions
//****************************
FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)16;
}
#endif
if (len>=16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do
{
v1 += XXH_get32bits(p) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_get32bits(p) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_get32bits(p) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_get32bits(p) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p+4<=bEnd)
{
h32 += XXH_get32bits(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
unsigned int XXH32 (const void* input, size_t len, unsigned seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USE_UNALIGNED_ACCESS)
if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U64 h64;
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)32;
}
#endif
if (len>=32)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
do
{
v1 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
v2 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
v3 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
v4 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
}
while (p<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64 * PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64 * PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64 * PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64 * PRIME64_1 + PRIME64_4;
}
else
{
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
while (p+8<=bEnd)
{
U64 k1 = XXH_get64bits(p);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
XXH64_state_t state;
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
return XXH64_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USE_UNALIGNED_ACCESS)
if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/****************************************************
* Advanced Hash Functions
****************************************************/
/*** Allocation ***/
typedef struct
{
U64 total_len;
U32 seed;
U32 v1;
U32 v2;
U32 v3;
U32 v4;
U32 mem32[4]; /* defined as U32 for alignment */
U32 memsize;
} XXH_istate32_t;
typedef struct
{
U64 total_len;
U64 seed;
U64 v1;
U64 v2;
U64 v3;
U64 v4;
U64 mem64[4]; /* defined as U64 for alignment */
U32 memsize;
} XXH_istate64_t;
XXH32_state_t* XXH32_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
}
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
XXH64_state_t* XXH64_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
/*** Hash feed ***/
XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
{
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME32_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
{
XXH_istate64_t* state = (XXH_istate64_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME64_1 + PRIME64_2;
state->v2 = seed + PRIME64_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME64_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate32_t* state = (XXH_istate32_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 16) // fill in tmp buffer
{
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) // some data left from previous update
{
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
{
const U32* p32 = state->mem32;
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v1 = XXH_rotl32(state->v1, 13);
state->v1 *= PRIME32_1;
p32++;
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v2 = XXH_rotl32(state->v2, 13);
state->v2 *= PRIME32_1;
p32++;
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v3 = XXH_rotl32(state->v3, 13);
state->v3 *= PRIME32_1;
p32++;
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v4 = XXH_rotl32(state->v4, 13);
state->v4 *= PRIME32_1;
p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do
{
v1 += XXH_readLE32(p, endian) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_readLE32(p, endian) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_readLE32(p, endian) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_readLE32(p, endian) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->mem32, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
{
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
const BYTE * p = (const BYTE*)state->mem32;
BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize;
U32 h32;
if (state->total_len >= 16)
{
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
}
else
{
h32 = state->seed + PRIME32_5;
}
h32 += (U32) state->total_len;
while (p+4<=bEnd)
{
h32 += XXH_readLE32(p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
U32 XXH32_digest (const XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 32) // fill in tmp buffer
{
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) // some data left from previous update
{
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
{
const U64* p64 = state->mem64;
state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v1 = XXH_rotl64(state->v1, 31);
state->v1 *= PRIME64_1;
p64++;
state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v2 = XXH_rotl64(state->v2, 31);
state->v2 *= PRIME64_1;
p64++;
state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v3 = XXH_rotl64(state->v3, 31);
state->v3 *= PRIME64_1;
p64++;
state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v4 = XXH_rotl64(state->v4, 31);
state->v4 *= PRIME64_1;
p64++;
}
p += 32-state->memsize;
state->memsize = 0;
}
if (p+32 <= bEnd)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
do
{
v1 += XXH_readLE64(p, endian) * PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
p+=8;
v2 += XXH_readLE64(p, endian) * PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
p+=8;
v3 += XXH_readLE64(p, endian) * PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
p+=8;
v4 += XXH_readLE64(p, endian) * PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
p+=8;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->mem64, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
{
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE * p = (const BYTE*)state->mem64;
BYTE* bEnd = (BYTE*)state->mem64 + state->memsize;
U64 h64;
if (state->total_len >= 32)
{
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64*PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64*PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64*PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64*PRIME64_1 + PRIME64_4;
}
else
{
h64 = state->seed + PRIME64_5;
}
h64 += (U64) state->total_len;
while (p+8<=bEnd)
{
U64 k1 = XXH_readLE64(p, endian);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64_digest (const XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_digest_endian(state_in, XXH_bigEndian);
}

156
programs/xxhash.h Executable file
View File

@ -0,0 +1,156 @@
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2014, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/*****************************
Includes
*****************************/
#include <stddef.h> /* size_t */
/*****************************
Type
*****************************/
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
/*****************************
Simple Hash Functions
*****************************/
unsigned int XXH32 (const void* input, size_t length, unsigned seed);
unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
/*
XXH32() :
Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
This function successfully passes all SMHasher tests.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
XXH64() :
Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
*/
/*****************************
Advanced Hash Functions
*****************************/
typedef struct { long long ll[ 6]; } XXH32_state_t;
typedef struct { long long ll[11]; } XXH64_state_t;
/*
These structures allow static allocation of XXH states.
States must then be initialized using XXHnn_reset() before first use.
If you prefer dynamic allocation, please refer to functions below.
*/
XXH32_state_t* XXH32_createState(void);
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH64_state_t* XXH64_createState(void);
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
/*
These functions create and release memory for XXH state.
States must then be initialized using XXHnn_reset() before first use.
*/
XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed);
XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
unsigned int XXH32_digest (const XXH32_state_t* statePtr);
XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
/*
These functions calculate the xxHash of an input provided in multiple smaller packets,
as opposed to an input provided as a single block.
XXH state space must first be allocated, using either static or dynamic method provided above.
Start a new hash by initializing state with a seed, using XXHnn_reset().
Then, feed the hash state by calling XXHnn_update() as many times as necessary.
Obviously, input must be valid, meaning allocated and read accessible.
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
Finally, you can produce a hash anytime, by using XXHnn_digest().
This function returns the final nn-bits hash.
You can nonetheless continue feeding the hash state with more input,
and therefore get some new hashes, by calling again XXHnn_digest().
When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
*/
#if defined (__cplusplus)
}
#endif

69
programs/zstd.1 Normal file
View File

@ -0,0 +1,69 @@
\"
\" zstd.1: This is a manual page for 'zstd' program. This file is part of the
\" zstd <https://github.com/Cyan4973/zstd> project.
\"
\" No hyphenation
.hy 0
.nr HY 0
.TH zstd "1" "2015-01-22" "zstd" "User Commands"
.SH NAME
\fBzstd\fR - standard compression algorithm
.SH SYNOPSIS
.TP 5
\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE>
.SH DESCRIPTION
.PP
\fBzstd\fR is a fast lossless compression algorithm
with highly parametrable compression strength and memory usage.
It is based on the \fBLZ77\fR family, coupled with FSE entropy stage.
zstd offers compression speed of 200 MB/s per core.
It also features a fast decoder, with speed > 500 MB/s per core.
\fBzstd\fR supports the following options :
.SH OPTIONS
.TP
.B \-1
fast compression (default)
.TP
.B \-d
decompression
.TP
.B \-f
overwrite output without prompting
.TP
.B \-h/\-H
display help/long help and exit
.TP
.B \-V
display Version number and exit
.TP
.B \-v
verbose mode
.TP
.B \-q
suppress warnings; specify twice to suppress errors too
.TP
.B \-c
force write to standard output, even if it is the console
.TP
.B \-t
test compressed file integrity
.TP
.B \-z
force compression
.TP
.B \-b
benchmark file(s)
.TP
.B \-i#
iteration loops [1-9](default : 3), benchmark mode only
.SH BUGS
Report bugs at:- https://github.com/Cyan4973/zstd
.SH AUTHOR
Yann Collet

32
programs/zstdcat.1 Normal file
View File

@ -0,0 +1,32 @@
\"
\" zstdcat.1: This is a manual page for 'zstdcat' program. This file is part of
\" the zstd <https://github.com/Cyan4973/zstd/> project.
\"
\" No hyphenation
.hy 0
.nr HY 0
.TH zstdcat "1" "2014-06-20" "zstdcat" "User Commands"
.SH NAME
\fBzstdcat\fR - Utility based on zstd
.SH SYNOPSIS
.TP 5
\fBzstdcat\fR [\fBOPTIONS\fR] [-|INPUT-FILE]
.SH DESCRIPTION
.PP
\fBzstdcat\fR is an utility based on \fBzstd\fR, a fast lossless compression algorithm.
\fBzstdcat\fR decompress input file or stream, redirecting its output to the console.
It is equivalent to \fBzstd -cd\fR,
Available options are the same as \fBzstd\fR ones (man zstd).
.SH BUGS
Report bugs at:- https://github.com/Cyan4973/zstd/
.SH AUTHOR
Yann Collet

322
programs/zstdcli.c Normal file
View File

@ -0,0 +1,322 @@
/*
zstdcli - Command Line Interface (cli) for zstd
Copyright (C) Yann Collet 2014-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/*
Note : this is user program.
It is not part of zstd compression library.
The license of this compression CLI program is GPLv2.
The license of zstd library is BSD.
*/
/**************************************
* Compiler Options
**************************************/
#define _CRT_SECURE_NO_WARNINGS /* Visual : removes warning from strcpy */
#define _POSIX_SOURCE 1 /* triggers fileno() within <stdio.h> on unix */
/**************************************
* Includes
**************************************/
#include <stdio.h> /* fprintf, getchar */
#include <stdlib.h> /* exit, calloc, free */
#include <string.h> /* strcmp, strlen */
#include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
#include "fileio.h"
/**************************************
* OS-specific Includes
**************************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h> // _O_BINARY
# include <io.h> // _setmode, _isatty
# ifdef __MINGW32__
int _fileno(FILE *stream); // MINGW somehow forgets to include this windows declaration into <stdio.h>
# endif
# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#else
# include <unistd.h> // isatty
# define SET_BINARY_MODE(file)
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
#endif
//****************************
// Constants
//****************************
#define COMPRESSOR_NAME "zstd command line interface"
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "v0.0.1"
#endif
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__
#define ZSTD_EXTENSION ".zst"
#define ZSTD_CAT "zstdcat"
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
/**************************************
* Display Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned displayLevel = 2; // 0 : no display // 1: errors // 2 : + result + interaction + warnings ; // 3 : + progression; // 4 : + information
/**************************************
* Exceptions
**************************************/
#define DEBUG 0
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/**************************************
* Command Line
**************************************/
static int usage(const char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] [input] [output]\n", programName);
DISPLAY( "\n");
DISPLAY( "input : a filename\n");
DISPLAY( " with no FILE, or when FILE is - , read standard input\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -d : decompression (default for %s extension)\n", ZSTD_EXTENSION);
//DISPLAY( " -z : force compression\n");
DISPLAY( " -f : overwrite output without prompting \n");
DISPLAY( " -h/-H : display help/long help and exit\n");
return 0;
}
static int usage_advanced(const char* programName)
{
DISPLAY(WELCOME_MESSAGE);
usage(programName);
DISPLAY( "\n");
DISPLAY( "Advanced arguments :\n");
DISPLAY( " -V : display Version number and exit\n");
DISPLAY( " -v : verbose mode\n");
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
//DISPLAY( " -t : test compressed file integrity\n");
DISPLAY( "Benchmark arguments :\n");
DISPLAY( " -b : benchmark file(s)\n");
DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n");
return 0;
}
static int badusage(const char* programName)
{
DISPLAYLEVEL(1, "Incorrect parameters\n");
if (displayLevel >= 1) usage(programName);
return 1;
}
static void waitEnter(void)
{
DISPLAY("Press enter to continue...\n");
getchar();
}
int main(int argc, char** argv)
{
int i,
bench=0,
decode=0,
forceStdout=0,
main_pause=0;
unsigned fileNameStart = 0;
unsigned nbFiles = 0;
const char* programName = argv[0];
const char* inFileName = NULL;
const char* outFileName = NULL;
char* dynNameSpace = NULL;
char extension[] = ZSTD_EXTENSION;
/* zstdcat behavior */
if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
// command switches
for(i=1; i<argc; i++)
{
char* argument = argv[i];
if(!argument) continue; // Protection if argument empty
/* Decode commands (note : aggregated commands are allowed) */
if (argument[0]=='-')
{
/* '-' means stdin/stdout */
if (argument[1]==0)
{
if (!inFileName) inFileName=stdinmark;
else outFileName=stdoutmark;
}
argument++;
while (argument[0]!=0)
{
switch(argument[0])
{
/* Display help */
case 'V': DISPLAY(WELCOME_MESSAGE); return 0; /* Version Only */
case 'H':
case 'h': return usage_advanced(programName);
// Compression (default)
//case 'z': forceCompress = 1; break;
// Decoding
case 'd': decode=1; argument++; break;
// Force stdout, even if stdout==console
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
// Test
//case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break;
/* Overwrite */
case 'f': FIO_overwriteMode(); argument++; break;
/* Verbose mode */
case 'v': displayLevel=4; argument++; break;
/* Quiet mode */
case 'q': displayLevel--; argument++; break;
/* keep source file (default anyway, so useless; only for xz/lzma compatibility) */
case 'k': argument++; break;
/* Benchmark */
case 'b': bench=1; argument++; break;
/* Modify Nb Iterations (benchmark only) */
case 'i':
{
int iters= 0;
argument++;
while ((*argument >='0') && (*argument <='9'))
iters *= 10, iters += *argument++ - '0';
BMK_SetNbIterations(iters);
}
break;
/* Pause at the end (hidden option) */
case 'p': main_pause=1; argument++; break;
/* unknown command */
default : return badusage(programName);
}
}
continue;
}
/* first provided filename is input */
if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argc-i; continue; }
/* second provided filename is output */
if (!outFileName)
{
outFileName = argument;
if (!strcmp (outFileName, nullString)) outFileName = nulmark;
continue;
}
}
/* Welcome message (if verbose) */
DISPLAYLEVEL(3, WELCOME_MESSAGE);
/* No input filename ==> use stdin */
if(!inFileName) { inFileName=stdinmark; }
/* Check if input defined as console; trigger an error in this case */
if (!strcmp(inFileName, stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
/* Check if benchmark is selected */
if (bench) { BMK_bench(argv+fileNameStart, nbFiles, 0); goto _end; }
/* No output filename ==> try to select one automatically (when possible) */
while (!outFileName)
{
if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; } /* Default to stdout whenever possible (i.e. not a console) */
if (!decode) /* compression to file */
{
size_t l = strlen(inFileName);
dynNameSpace = (char*)calloc(1,l+5);
strcpy(dynNameSpace, inFileName);
strcpy(dynNameSpace+l, ZSTD_EXTENSION);
outFileName = dynNameSpace;
DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName);
break;
}
/* decompression to file (automatic name will work only if input filename has correct format extension) */
{
size_t outl;
size_t inl = strlen(inFileName);
dynNameSpace = (char*)calloc(1,inl+1);
outFileName = dynNameSpace;
strcpy(dynNameSpace, inFileName);
outl = inl;
if (inl>4)
while ((outl >= inl-4) && (inFileName[outl] == extension[outl-inl+4])) dynNameSpace[outl--]=0;
if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); return badusage(programName); }
DISPLAYLEVEL(2, "Decoding file %s \n", outFileName);
}
}
/* Check if output is defined as console; trigger an error in this case */
if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
/* No warning message in pure pipe mode (stdin + stdout) */
if (!strcmp(inFileName, stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
/* IO Stream/File */
FIO_setNotificationLevel(displayLevel);
if (decode)
FIO_decompressFilename(outFileName, inFileName);
else
FIO_compressFilename(outFileName, inFileName);
_end:
if (main_pause) waitEnter();
free(dynNameSpace);
return 0;
}