minor compression speed improvement

This commit is contained in:
Yann Collet 2016-01-28 17:56:33 +01:00
parent 9cadd0853c
commit 863ec40f1e
4 changed files with 66 additions and 110 deletions

View File

@ -1,5 +1,5 @@
/*
dictBuilder.c
dictBuilder - dictionary builder for LZ algorithms
Copyright (C) Yann Collet 2016
GPL v2 License
@ -20,7 +20,6 @@
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/* **************************************
@ -40,13 +39,8 @@
# define _LARGEFILE64_SOURCE
#endif
/* S_ISREG & gettimeofday() are not supported by MSVC */
#if defined(_MSC_VER) || defined(_WIN32)
# define BMK_LEGACY_TIMER 1
#endif
/* *************************************
/*-*************************************
* Includes
***************************************/
#include <stdlib.h> /* malloc, free */
@ -75,7 +69,7 @@
#endif
/* *************************************
/*-*************************************
* Constants
***************************************/
#define KB *(1 <<10)
@ -93,8 +87,8 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t
#define MINRATIO 4
/* *************************************
* console display
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
@ -121,7 +115,7 @@ void DiB_printHex(U32 dlevel, const void* ptr, size_t length)
}
/* *************************************
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG

View File

@ -1052,8 +1052,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
/* Build Decoding Table */
DTableH->tableLog = (U16)nbBits;
DTableH->fastMode = 1;
for (s=0; s<=maxSymbolValue; s++)
{
for (s=0; s<=maxSymbolValue; s++) {
dinfo[s].newState = 0;
dinfo[s].symbol = (BYTE)s;
dinfo[s].nbBits = (BYTE)nbBits;
@ -1087,8 +1086,7 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
/* 4 symbols per loop */
for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
{
for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) {
op[0] = FSE_GETSYMBOL(&state1);
if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
@ -1109,8 +1107,7 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
/* tail */
/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
while (1)
{
while (1) {
if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
break;

View File

@ -1,6 +1,6 @@
/*
Buffered version of Zstd compression library
Copyright (C) 2015, Yann Collet.
Copyright (C) 2015-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -127,16 +127,14 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic
neededInBuffSize = (size_t)1 << params.windowLog;
/* allocate buffers */
if (zbc->inBuffSize < neededInBuffSize)
{
if (zbc->inBuffSize < neededInBuffSize) {
zbc->inBuffSize = neededInBuffSize;
free(zbc->inBuff); /* should not be necessary */
zbc->inBuff = (char*)malloc(neededInBuffSize);
if (zbc->inBuff == NULL) return ERROR(memory_allocation);
}
zbc->blockSize = MIN(BLOCKSIZE, zbc->inBuffSize);
if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1)
{
if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
free(zbc->outBuff); /* should not be necessary */
zbc->outBuff = (char*)malloc(zbc->outBuffSize);
@ -188,8 +186,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
char* op = ostart;
char* const oend = ostart + *maxDstSizePtr;
while (notDone)
{
while (notDone) {
switch(zbc->stage)
{
case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
@ -201,9 +198,9 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
size_t loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
zbc->inBuffPos += loaded;
ip += loaded;
if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) )
{ notDone = 0; break; } /* not enough input to get a full block : stop there, wait for more */
}
if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
notDone = 0; break; /* not enough input to get a full block : stop there, wait for more */
} }
/* compress current block (note : this stage cannot be stopped in the middle) */
{
void* cDst;
@ -235,8 +232,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
size_t flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
op += flushed;
zbc->outBuffFlushedSize += flushed;
if (toFlush!=flushed)
{ notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */
if (toFlush!=flushed) { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */
zbc->outBuffContentSize = 0;
zbc->outBuffFlushedSize = 0;
zbc->stage = ZBUFFcs_load;
@ -259,7 +255,9 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
void* dst, size_t* maxDstSizePtr,
const void* src, size_t* srcSizePtr)
{ return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0); }
{
return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0);
}
@ -387,11 +385,9 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
char* const oend = ostart + *maxDstSizePtr;
U32 notDone = 1;
while (notDone)
{
while (notDone) {
switch(zbc->stage)
{
case ZBUFFds_init :
return ERROR(init_missing);
@ -400,8 +396,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
{
size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
if (ZSTD_isError(headerSize)) return headerSize;
if (headerSize)
{
if (headerSize) {
/* not enough input to decode header : tell how many bytes would be necessary */
memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
zbc->hPos += *srcSizePtr;
@ -423,8 +418,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
ip += headerSize;
headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
if (ZSTD_isError(headerSize)) return headerSize;
if (headerSize)
{
if (headerSize) {
/* not enough input to decode header : tell how many bytes would be necessary */
*maxDstSizePtr = 0;
return headerSize - zbc->hPos;
@ -437,23 +431,19 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
{
size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
size_t neededInSize = BLOCKSIZE; /* a block is never > BLOCKSIZE */
if (zbc->inBuffSize < neededInSize)
{
if (zbc->inBuffSize < neededInSize) {
free(zbc->inBuff);
zbc->inBuffSize = neededInSize;
zbc->inBuff = (char*)malloc(neededInSize);
if (zbc->inBuff == NULL) return ERROR(memory_allocation);
}
if (zbc->outBuffSize < neededOutSize)
{
if (zbc->outBuffSize < neededOutSize) {
free(zbc->outBuff);
zbc->outBuffSize = neededOutSize;
zbc->outBuff = (char*)malloc(neededOutSize);
if (zbc->outBuff == NULL) return ERROR(memory_allocation);
}
}
if (zbc->hPos)
{
} }
if (zbc->hPos) {
/* some data already loaded into headerBuffer : transfer into inBuff */
memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
zbc->inPos = zbc->hPos;
@ -466,14 +456,12 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
case ZBUFFds_read:
{
size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
if (neededInSize==0) /* end of frame */
{
if (neededInSize==0) { /* end of frame */
zbc->stage = ZBUFFds_init;
notDone = 0;
break;
}
if ((size_t)(iend-ip) >= neededInSize)
{
if ((size_t)(iend-ip) >= neededInSize) {
/* directly decode from src */
size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
@ -509,16 +497,14 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
zbc->outEnd = zbc->outStart + decodedSize;
zbc->stage = ZBUFFds_flush;
// break; /* ZBUFFds_flush follows */
}
}
} }
case ZBUFFds_flush:
{
size_t toFlushSize = zbc->outEnd - zbc->outStart;
size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
op += flushedSize;
zbc->outStart += flushedSize;
if (flushedSize == toFlushSize)
{
if (flushedSize == toFlushSize) {
zbc->stage = ZBUFFds_read;
if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
zbc->outStart = zbc->outEnd = 0;
@ -529,8 +515,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
break;
}
default: return ERROR(GENERIC); /* impossible */
}
}
} }
*srcSizePtr = ip-istart;
*maxDstSizePtr = op-ostart;
@ -545,11 +530,6 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
/* *************************************
* Tool functions
***************************************/

View File

@ -1,6 +1,6 @@
/*
ZSTD HC - High Compression Mode of Zstandard
Copyright (C) 2015, Yann Collet.
Copyright (C) 2015-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -181,29 +181,25 @@ void ZSTD_validateParams(ZSTD_parameters* params)
static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
ZSTD_parameters params)
{
/* note : params considered validated here */
{ /* note : params considered validated here */
const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog);
/* reserve table memory */
{
const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize);
if (zc->workSpaceSize < neededSpace) {
free(zc->workSpace);
zc->workSpace = malloc(neededSpace);
if (zc->workSpace == NULL) return ERROR(memory_allocation);
zc->workSpaceSize = neededSpace;
}
memset(zc->workSpace, 0, tableSpace ); /* reset only tables */
zc->hashTable = (U32*)(zc->workSpace);
zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog);
zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog);
zc->hufTable = (HUF_CElt*)zc->seqStore.buffer;
zc->flagStaticTables = 0;
zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256;
const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize);
if (zc->workSpaceSize < neededSpace) {
free(zc->workSpace);
zc->workSpace = malloc(neededSpace);
if (zc->workSpace == NULL) return ERROR(memory_allocation);
zc->workSpaceSize = neededSpace;
}
memset(zc->workSpace, 0, tableSpace ); /* reset only tables */
zc->hashTable = (U32*)(zc->workSpace);
zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog);
zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog);
zc->hufTable = (HUF_CElt*)zc->seqStore.buffer;
zc->flagStaticTables = 0;
zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256;
zc->nextToUpdate = 1;
zc->nextSrc = NULL;
@ -256,7 +252,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
/* copy entropy tables */
dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
if (dstCCtx->flagStaticTables) {
if (srcCCtx->flagStaticTables) {
memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
@ -267,7 +263,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
}
/** ZSTD_reduceIndex
/*! ZSTD_reduceIndex
* rescale indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
const U32 reducerValue)
@ -284,7 +280,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
}
/* *******************************************************
/*-*******************************************************
* Block entropic compression
*********************************************************/
@ -553,7 +549,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
MEM_writeLE16(op, (U16)nbSeq); op+=2;
seqHead = op;
/* dumps : contains too large lengths */
/* dumps : contains rests of large lengths */
{
size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
if (dumpsLength < 512) {
@ -773,10 +769,8 @@ static unsigned ZSTD_highbit(U32 val)
static unsigned ZSTD_NbCommonBytes (register size_t val)
{
if (MEM_isLittleEndian())
{
if (MEM_64bits())
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanForward64( &r, (U64)val );
@ -787,9 +781,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
}
else /* 32 bits */
{
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r=0;
_BitScanForward( &r, (U32)val );
@ -801,11 +793,8 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}
else /* Big Endian CPU */
{
if (MEM_64bits())
{
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanReverse64( &r, val );
@ -820,9 +809,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
r += (!val);
return r;
# endif
}
else /* 32 bits */
{
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r = 0;
_BitScanReverse( &r, (unsigned long)val );
@ -835,8 +822,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
r += (!val);
return r;
# endif
}
}
} }
}
@ -874,24 +860,23 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
/* *************************************
/*-*************************************
* Hashes
***************************************/
static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; }
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); }
static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; }
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); }
static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; }
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); }
static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
@ -906,10 +891,10 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
/* *************************************
* Fast Scan
***************************************/
#define FILLHASHSTEP 3
static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
{