internal refactoring - better separation between match & entropy

This commit is contained in:
Yann Collet 2015-02-09 03:50:11 +01:00
parent 5ca33779a4
commit e30c39cd1b
2 changed files with 113 additions and 107 deletions

View File

@ -540,6 +540,7 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */
//if ((FSE_highbit32(maxSymbolValue)+2) > tableLog) tableLog = FSE_highbit32(maxSymbolValue)+2; /* Need a minimum to safely represent all symbol values */
if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
return tableLog;

View File

@ -80,7 +80,6 @@
/********************************************************
* Compiler specifics
*********************************************************/
//#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
#ifdef __AVX2__
# include <immintrin.h> /* AVX2 intrinsics */
#endif
@ -103,7 +102,7 @@
/********************************************************
* Basic Types
*********************************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
@ -124,7 +123,7 @@ typedef unsigned long long U64;
/********************************************************
* Constants
*********************************************************/
static const U32 ZSTD_magicNumber = 0xFD2FB51C;
static const U32 ZSTD_magicNumber = 0xFD2FB51C; /* Initial (limited) frame format */
#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
#define HASH_TABLESIZE (1 << HASH_LOG)
@ -144,7 +143,7 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
#define MB *(1 <<20)
#define GB *(1U<<20)
#define BLOCKSIZE (128 KB) // define, for static allocation
#define BLOCKSIZE (128 KB) /* define, for static allocation */
static const U32 g_maxDistance = 512 KB;
static const U32 g_maxLimit = 1 GB;
static const U32 g_searchStrength = 8;
@ -289,12 +288,36 @@ typedef struct
U32 origSize;
} blockProperties_t;
typedef struct {
void* buffer;
U32* offsetStart;
U32* offset;
BYTE* litStart;
BYTE* lit;
BYTE* litLengthStart;
BYTE* litLength;
BYTE* matchLengthStart;
BYTE* matchLength;
BYTE* dumpsStart;
BYTE* dumps;
} seqStore_t;
void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
ssPtr->offset = ssPtr->offsetStart;
ssPtr->lit = ssPtr->litStart;
ssPtr->litLength = ssPtr->litLengthStart;
ssPtr->matchLength = ssPtr->matchLengthStart;
ssPtr->dumps = ssPtr->dumpsStart;
}
typedef struct
{
const BYTE* base;
U32 current;
U32 nextUpdate;
BYTE* workplace;
seqStore_t seqStore;
#ifdef __AVX2__
__m256i hashTable[HASH_TABLESIZE>>3];
#else
@ -305,25 +328,28 @@ typedef struct
ZSTD_cctx_t ZSTD_createCCtx(void)
{
cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
return (ZSTD_cctx_t)srt;
cctxi_t* ctx = (cctxi_t*) malloc( sizeof(cctxi_t) );
ctx->seqStore.buffer = malloc(WORKPLACESIZE);
ctx->seqStore.offsetStart = (U32*) (ctx->seqStore.buffer);
ctx->seqStore.litStart = (BYTE*) (ctx->seqStore.offsetStart + (BLOCKSIZE>>2));
ctx->seqStore.litLengthStart = ctx->seqStore.litStart + BLOCKSIZE;
ctx->seqStore.matchLengthStart = ctx->seqStore.litLengthStart + (BLOCKSIZE>>2);
ctx->seqStore.dumpsStart = ctx->seqStore.matchLengthStart + (BLOCKSIZE>>2);
return (ZSTD_cctx_t)ctx;
}
void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
void ZSTD_resetCCtx(ZSTD_cctx_t cctx)
{
cctxi_t* srt = (cctxi_t*)ctx;
srt->base = NULL;
memset(srt->hashTable, 0, HASH_TABLESIZE*4);
cctxi_t* ctx = (cctxi_t*)cctx;
ctx->base = NULL;
memset(ctx->hashTable, 0, HASH_TABLESIZE*4);
}
size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
size_t ZSTD_freeCCtx(ZSTD_cctx_t cctx)
{
cctxi_t *srt = (cctxi_t *) (ctx);
free(srt->workplace);
free(srt);
cctxi_t* ctx = (cctxi_t*) (cctx);
free(ctx->seqStore.buffer);
free(ctx);
return 0;
}
@ -360,9 +386,9 @@ static unsigned ZSTD_highbit(U32 val)
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
# else // Software version
# else /* Software version */
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
int r;
@ -481,13 +507,11 @@ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, s
ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
// Build header
{
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_rle<<6);
}
/* Build header */
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_rle<<6);
return ZSTD_blockHeaderSize+1;
}
@ -500,13 +524,11 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* sr
if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
// Build header
{
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
}
/* Build header */
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
return ZSTD_blockHeaderSize+srcSize;
}
@ -523,7 +545,7 @@ static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
FSE_CStream_t bitC;
FSE_CState_t CState1, CState2;
// init
/* init */
(void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
FSE_initCStream(&bitC, dst);
FSE_initCState(&CState1, CTable);
@ -619,11 +641,11 @@ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
if (ZSTD_isError(errorCode)) return errorCode;
op += errorCode;
// check compressibility
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-minGain)
return 0;
// Build header
/* Build header */
{
size_t totalSize;
totalSize = op - ostart - ZSTD_blockHeaderSize;
@ -637,14 +659,9 @@ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
}
static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
const BYTE* op_lit_start, const BYTE* op_lit,
const BYTE* op_litLength_start, const BYTE* op_litLength,
const BYTE* op_matchLength_start,
const U32* op_offset_start,
const BYTE* op_dumps_start, const BYTE* op_dumps,
size_t srcSize, size_t lastLLSize
)
static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
const seqStore_t* seqStorePtr,
size_t lastLLSize, size_t srcSize)
{
FSE_CStream_t blockStream;
U32 count[256];
@ -652,14 +669,18 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
size_t mostFrequent;
U32 max = 255;
U32 tableLog = 11;
const size_t nbSeq = op_litLength - op_litLength_start;
U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
U32 LLtype, Offtype, MLtype;
const BYTE* const op_lit_start = seqStorePtr->litStart;
const BYTE* op_lit = seqStorePtr->lit;
const BYTE* const op_litLength_start = seqStorePtr->litLengthStart;
const BYTE* op_litLength = seqStorePtr->litLength;
const U32* op_offset = seqStorePtr->offset;
const BYTE* op_matchLength = seqStorePtr->matchLength;
const size_t nbSeq = op_litLength - op_litLength_start;
BYTE* op;
const U32* op_offset = op_offset_start + nbSeq;
const BYTE* op_matchLength = op_matchLength_start + nbSeq;
BYTE offsetBits_start[BLOCKSIZE / 4];
BYTE* offsetBitsPtr = offsetBits_start;
const size_t minGain = ZSTD_minGain(srcSize);
@ -699,7 +720,7 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
/* dumps */
{
size_t dumpsLength = op_dumps- op_dumps_start;
size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
if (dumpsLength < 512)
{
op[0] = (BYTE)(dumpsLength >> 8);
@ -713,16 +734,16 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
op[2] = (BYTE)(dumpsLength);
op += 3;
}
memcpy(op, op_dumps_start, dumpsLength);
memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
op += dumpsLength;
}
/* Encoding table of Literal Lengths */
max = MaxLL;
mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
mostFrequent = FSE_countFast(count, seqStorePtr->litLengthStart, nbSeq, &max);
if (mostFrequent == nbSeq)
{
*op++ = *op_litLength_start;
*op++ = *(seqStorePtr->litLengthStart);
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = bt_rle;
}
@ -744,6 +765,7 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
{
/* create OffsetBits */
size_t i;
const U32* const op_offset_start = seqStorePtr->offsetStart;
max = MaxOff;
for (i=0; i<nbSeq; i++)
{
@ -775,10 +797,10 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
/* Encoding Table of MatchLengths */
max = MaxML;
mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
mostFrequent = FSE_countFast(count, seqStorePtr->matchLengthStart, nbSeq, &max);
if (mostFrequent == nbSeq)
{
*op++ = *op_matchLength_start;
*op++ = *seqStorePtr->matchLengthStart;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = bt_rle;
}
@ -839,57 +861,45 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
}
static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* anchor, size_t offset, size_t matchLength)
{
const BYTE* const dumpStart = op_dumps;
const BYTE* const l_end = op_lit + litLength;
BYTE* op_lit = seqStorePtr->lit;
BYTE* const l_end = op_lit + litLength;
/* copy Literals */
while (op_lit<l_end) COPY8(op_lit, srcLit);
while (op_lit<l_end) COPY8(op_lit, anchor);
seqStorePtr->lit += litLength;
/* literal Length */
if (litLength >= MaxLL)
{
*op_ll++ = MaxLL;
*(seqStorePtr->litLength++) = MaxLL;
if (litLength<255 + MaxLL)
*op_dumps++ = (BYTE)(litLength - MaxLL);
*(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
else
{
*op_dumps++ = 255;
ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
//litLength |= 0xFF000000;
//ZSTD_writeBE32(op_dumps, (U32)litLength);
//op_dumps += 4;
*(seqStorePtr->dumps++) = 255;
ZSTD_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
}
}
else *op_ll = (BYTE)litLength;
else *(seqStorePtr->litLength++) = (BYTE)litLength;
/* match offset */
*op_offset = (U32)offset;
/* match offset */
*(seqStorePtr->offset++) = (U32)offset;
/* match Length */
if (matchLength >= MaxML)
{
*op_ml++ = MaxML;
if (matchLength<255 + MaxML)
*op_dumps++ = (BYTE)(matchLength - MaxML);
*(seqStorePtr->matchLength++) = MaxML;
if (matchLength < 255+MaxML)
*(seqStorePtr->dumps++) = (BYTE)(matchLength - MaxML);
else
{
*op_dumps++ = 255;
ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
//*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
//matchLength |= 0xFF000000;
//ZSTD_writeBE32(op_dumps, (U32)matchLength);
//op_dumps += 4;
*(seqStorePtr->dumps++) = 255;
ZSTD_writeLE32(seqStorePtr->dumps, (U32)matchLength); seqStorePtr->dumps+=3;
}
}
else *op_ml = (BYTE)matchLength;
return op_dumps - dumpStart;
else *(seqStorePtr->matchLength++) = (BYTE)matchLength;
}
@ -928,12 +938,12 @@ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
}
static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
static size_t ZSTD_compressBlock(void* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
cctxi_t* srt = (cctxi_t*) ctx;
U32* HashTable = (U32*)(srt->hashTable);
void* workplace = srt->workplace;
const BYTE* const base = srt->base;
cctxi_t* ctx = (cctxi_t*) cctx;
U32* HashTable = (U32*)(ctx->hashTable);
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart + 1;
@ -941,15 +951,13 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 16;
U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
size_t prevOffset=0, offset=0;
size_t lastLLSize;
/* init */
ZSTD_resetSeqStore(seqStorePtr);
/* Main Search Loop */
while (ip < ilimit)
{
@ -969,8 +977,7 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
if (offsetCode == prevOffset) offsetCode = 0;
prevOffset = offset;
offset = ip-match;
op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
op_l += litLength;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength);
/* Fill Table */
ZSTD_addPtr(HashTable, ip+1, base);
@ -982,13 +989,12 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
/* Last Literals */
lastLLSize = iend - anchor;
memcpy(op_l, anchor, lastLLSize);
op_l += lastLLSize;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
/* Finale compression stage */
return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
srcSize, lastLLSize);
return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
seqStorePtr, lastLLSize, srcSize);
}
@ -1256,17 +1262,17 @@ FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
FSE_initDState(&state2, &bitD, DTable);
op = oend;
// 2 symbols per loop
/* 2-4 symbols per loop */
while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
{
*--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
if (LitFSELog*2+7 > sizeof(size_t)*8) /* This test must be static */
FSE_reloadDStream(&bitD);
*--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
if (LitFSELog*4+7 < sizeof(size_t)*8) /* This test must be static */
{
*--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
*--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
@ -1317,7 +1323,7 @@ static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
U32 fastMode;
size_t errorCode;
if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; /* too small input size */
errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
@ -1572,7 +1578,6 @@ _another_round:
if (add < 255) matchLength += add;
else
{
//matchLength = (*(U32*)dumps) & 0xFFFFFF;
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
dumps += 3;
}