Merge pull request #582 from iburinoc/m32

Encode/decode offsets >= 32MB in 32-bits mode
This commit is contained in:
Yann Collet 2017-03-02 16:42:50 -08:00 committed by GitHub
commit 3a55d8be26
4 changed files with 50 additions and 11 deletions

View File

@ -60,6 +60,9 @@ extern "C" {
# include <immintrin.h> /* support for bextr (experimental) */
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
#define STREAM_ACCUMULATOR_MIN_64 57
#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
/*-******************************************
* bitStream encoding API (write forward)

View File

@ -576,11 +576,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
size_t srcSize)
{
const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
const seqStore_t* seqStorePtr = &(zc->seqStore);
U32 count[MaxSeq+1];
S16 norm[MaxSeq+1];
@ -714,7 +714,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);
{ size_t n;
@ -736,7 +747,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
} }
@ -761,7 +782,6 @@ _check_compressibility:
return op - ostart;
}
#if 0 /* for debug */
# define STORESEQ_DEBUG
#include <stdio.h> /* fprintf */

View File

@ -1142,7 +1142,7 @@ static size_t ZSTD_decompressSequences(
}
static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState)
FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
{
seq_t seq;
@ -1177,8 +1177,15 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState)
if (!ofCode)
offset = 0;
else {
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
if (ofCode <= 1) {
@ -1222,6 +1229,14 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState)
return seq;
}
static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) {
if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
return ZSTD_decodeSequenceLong_generic(seqState, 1);
} else {
return ZSTD_decodeSequenceLong_generic(seqState, 0);
}
}
FORCE_INLINE
size_t ZSTD_execSequenceLong(BYTE* op,
BYTE* const oend, seq_t sequence,
@ -1319,6 +1334,7 @@ static size_t ZSTD_decompressSequencesLong(
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
unsigned const windowSize = dctx->fParams.windowSize;
int nbSeq;
/* Build Decoding Tables */
@ -1348,13 +1364,13 @@ static size_t ZSTD_decompressSequencesLong(
/* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState);
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
}
if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState);
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
ZSTD_PREFETCH(sequence.match);

View File

@ -343,7 +343,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
#define ZSTD_WINDOWLOG_MAX_32 25
#define ZSTD_WINDOWLOG_MAX_32 27
#define ZSTD_WINDOWLOG_MAX_64 27
#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10