restored lz4 hc compression ratio

This commit is contained in:
Yann Collet 2015-03-02 00:11:34 +01:00
parent 8f4e201a06
commit e68d1c98c1
2 changed files with 148 additions and 27 deletions

View File

@ -20,7 +20,7 @@
#
# You can contact the author at :
# - LZ4 source repository : http://code.google.com/p/lz4/
# - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
# - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
# ##########################################################################
# lz4 : Command Line Utility, supporting gzip-like arguments
# lz4c : CLU, supporting also legacy lz4demo arguments
@ -143,6 +143,7 @@ test-lz4: lz4 datagen
./datagen -g16KB | ./lz4 -9 | ./lz4 -vdq > $(VOID)
./datagen | ./lz4 | ./lz4 -vdq > $(VOID)
./datagen -g6M -p100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID)
./datagen -g17M | ./lz4 -9v | ./lz4 -vdq > $(VOID)
./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID)
./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID)
# test frame concatenation with null-length frame

172
programs/fullbench.c Normal file → Executable file
View File

@ -24,12 +24,12 @@
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
*/
//**************************************
// Compiler Options
//**************************************
// Disable some Visual warning messages
/**************************************
* Compiler Options
**************************************/
/* Disable some Visual warning messages */
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE // VS2005
#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */
// Unix Large Files support (>4GB)
#if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions
@ -45,9 +45,9 @@
#endif
//**************************************
// Includes
//**************************************
/**************************************
* Includes
**************************************/
#include <stdlib.h> // malloc
#include <stdio.h> // fprintf, fopen, ftello64
#include <sys/types.h> // stat64
@ -68,10 +68,10 @@
#include "xxhash.h"
//**************************************
// Compiler Options
//**************************************
// S_ISREG & gettimeofday() are not supported by MSVC
/**************************************
* Compiler Options
**************************************/
/* S_ISREG & gettimeofday() are not supported by MSVC */
#if !defined(S_ISREG)
# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#endif
@ -82,9 +82,9 @@
#endif
//**************************************
// Basic Types
//**************************************
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
@ -101,9 +101,9 @@
#endif
//****************************
// Constants
//****************************
/**************************************
* Constants
**************************************/
#define PROGRAM_DESCRIPTION "LZ4 speed analyzer"
#ifndef LZ4_VERSION
# define LZ4_VERSION ""
@ -122,9 +122,9 @@
#define ALL_DECOMPRESSORS 0
//**************************************
// Local structures
//**************************************
/**************************************
* Local structures
**************************************/
struct chunkParameters
{
U32 id;
@ -135,9 +135,9 @@ struct chunkParameters
};
//**************************************
// MACRO
//**************************************
/**************************************
* MACRO
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define PROGRESS(...) no_prompt ? 0 : DISPLAY(__VA_ARGS__)
@ -251,8 +251,127 @@ static U64 BMK_GetFileSize(char* infilename)
/*********************************************************
Benchmark function
* Benchmark function
*********************************************************/
#ifdef __SSSE3__
#include <tmmintrin.h>
/* Idea proposed by Terje Mathisen */
static BYTE stepSize16[17] = {16,16,16,15,16,15,12,14,16,9,10,11,12,13,14,15,16};
static __m128i replicateTable[17] = {
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
{0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1},
{0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0},
{0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3},
{0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,0},
{0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3},
{0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1},
{0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7},
{0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6},
{0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5},
{0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4},
{0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3},
{0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2},
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1},
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0},
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}};
static BYTE stepSize32[17] = {32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30,16};
static __m128i replicateTable2[17] = {
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
{0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1},
{1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1},
{0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3},
{1,2,3,4,0,1,2,3,4,0,1,2,3,4,0,1},
{4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1},
{2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3},
{0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7},
{7,8,0,1,2,3,4,5,6,7,8,0,1,2,3,4},
{6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1},
{5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9},
{4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7},
{3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5},
{2,3,4,5,6,7,8,9,10,11,12,13,0,1,2,3},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,1},
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}};
U32 lz4_decode_sse(BYTE* dest, BYTE* src, U32 srcLength)
{
BYTE* d = dest, *e = src+srcLength;
unsigned token, lit_len, mat_len;
__m128i a;
BYTE* dstore, *msrc;
if (!srcLength) return 0;
goto start;
do {
U32 step;
unsigned mat_offset = src[0] + (src[1] << 8);
src += 2;
msrc = d - mat_offset;
if (mat_len == 15) {
do {
token = *src++;
mat_len += token;
} while (token == 255);
}
mat_len += 4;
dstore = d;
d += mat_len;
if (mat_offset <= 16)
{ // Bulk store only!
__m128i a2;
a = _mm_loadu_si128((const __m128i *)msrc);
a2 = _mm_shuffle_epi8(a, replicateTable2[mat_offset]);
a = _mm_shuffle_epi8(a, replicateTable[mat_offset]);
step = stepSize32[mat_offset];
do {
_mm_storeu_si128((__m128i *)dstore, a);
_mm_storeu_si128((__m128i *)(dstore+16), a2);
dstore += step;
} while (dstore < d);
}
else
{
do
{
a = _mm_loadu_si128((const __m128i *)msrc);
_mm_storeu_si128((__m128i *)dstore, a);
msrc += sizeof(a);
dstore += sizeof(a);
} while (dstore < d);
}
start:
token = *src++;
lit_len = token >> 4;
mat_len = token & 15;
if (token >= 0xf0) { // lit_len == 15
do {
token = *src++;
lit_len += token;
} while (token == 255);
}
dstore = d;
msrc = src;
d += lit_len;
src += lit_len;
do {
a = _mm_loadu_si128((const __m128i *)msrc);
_mm_storeu_si128((__m128i *)dstore, a);
msrc += sizeof(a);
dstore += sizeof(a);
} while (dstore < d);
} while (src < e);
return (U32)(d-dest);
}
#endif // __SSSE3__
static int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSize)
{
@ -345,6 +464,7 @@ static int local_LZ4_saveDictHC(const char* in, char* out, int inSize)
static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize)
{
(void)inSize;
//lz4_decode_sse((BYTE*)out, (BYTE*)in, inSize);
LZ4_decompress_fast(in, out, outSize);
return outSize;
}
@ -677,7 +797,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
// CRC Checking
/* CRC Checking */
crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
}