Changing api to get sequences across all blocks

This commit is contained in:
Bimba Shrestha 2019-08-30 09:18:44 -07:00
parent 623b90f85d
commit 5f8b0f6890
4 changed files with 103 additions and 71 deletions

View File

@ -13,6 +13,7 @@
***************************************/
#include <limits.h> /* INT_MAX */
#include <string.h> /* memset */
#include <stdlib.h>
#include "cpu.h"
#include "mem.h"
#include "hist.h" /* HIST_countFast_wksp */
@ -2190,77 +2191,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->longLengthID = 0;
}
typedef struct {
U32 matchPos;
U32 offset;
U32 litLength;
U32 matchLength;
int rep;
} Sequence;
static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict,
void* dst, size_t dstSize,
const void* src, size_t srcSize,
Sequence* outSeqs, size_t outSeqsSize)
{
const seqStore_t* seqStore;
const seqDef* seqs;
size_t seqsSize;
size_t i; int repIdx; size_t position;
size_t blockSize = ZSTD_getBlockSize(zc);
size_t maxOutput = ZSTD_compressBound(blockSize);
assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict)));
assert(dstSize >= maxOutput); dstSize = maxOutput;
assert(srcSize >= blockSize); srcSize = blockSize;
assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize)));
seqStore = ZSTD_getSeqStore(zc);
seqs = seqStore->sequencesStart;
seqsSize = seqStore->sequences - seqStore->sequencesStart;
assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize;
for (i = 0, position = 0; i < seqsSize; ++i) {
outSeqs[i].offset = seqs[i].offset;
outSeqs[i].litLength = seqs[i].litLength;
outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthID == 1) {
outSeqs[i].litLength += 0x10000;
} else if (seqStore->longLengthID == 2) {
outSeqs[i].matchLength += 0x10000;
}
}
if (outSeqs[i].offset <= 3 /* num reps */) {
outSeqs[i].rep = 1;
repIdx = i - outSeqs[i].offset;
if (repIdx >= 0) {
outSeqs[i].offset = outSeqs[repIdx].offset;
}
if (repIdx == -1) {
outSeqs[i].offset = 1;
} else if (repIdx == -2) {
outSeqs[i].offset = 4;
} else if (repIdx == -3) {
outSeqs[i].offset = 8;
}
} else {
outSeqs[i].offset -= 3 /* num reps */;
}
position += outSeqs[i].litLength;
outSeqs[i].matchPos = position;
position += outSeqs[i].matchLength;
}
}
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
@ -2394,6 +2324,81 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
}
}
static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs,
ZSTD_Sequence* outSeqs, size_t seqsSize)
{
size_t i; size_t position; int repIdx;
for (i = 0, position = 0; i < seqsSize; ++i) {
outSeqs[i].offset = seqs[i].offset;
outSeqs[i].litLength = seqs[i].litLength;
outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthID == 1) {
outSeqs[i].litLength += 0x10000;
} else if (seqStore->longLengthID == 2) {
outSeqs[i].matchLength += 0x10000;
}
}
if (outSeqs[i].offset <= 3 /* num reps */) {
outSeqs[i].rep = 1;
repIdx = i - outSeqs[i].offset;
if (repIdx >= 0) {
outSeqs[i].offset = outSeqs[repIdx].offset;
}
if (repIdx == -1) {
outSeqs[i].offset = 1;
} else if (repIdx == -2) {
outSeqs[i].offset = 4;
} else if (repIdx == -3) {
outSeqs[i].offset = 8;
}
} else {
outSeqs[i].offset -= 3 /* num reps */;
}
position += outSeqs[i].litLength;
outSeqs[i].matchPos = position;
position += outSeqs[i].matchLength;
}
}
static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore)
{
size_t seqsSize = seqStore->sequences - seqStore->sequencesStart;
assert(cctx->seqCollector.maxSequences >
(cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize);
ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart,
cctx->seqCollector.seqCurrent, seqsSize);
cctx->seqCollector.seqCurrent += seqsSize;
}
size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize,
int level)
{
size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*));
void* dst = malloc(dstCapacity);
size_t seqsSize;
SeqCollector seqCollector;
seqCollector.collectSequences = 1;
seqCollector.seqStart = outSeqs;
seqCollector.seqCurrent = outSeqs;
seqCollector.maxSequences = outSeqsSize;
zc->seqCollector = seqCollector;
ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level);
seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart;
free(dst);
return seqsSize;
}
/*! ZSTD_compress_frameChunk() :
* Compress a chunk of data into one or multiple blocks.
@ -2438,6 +2443,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
ip, blockSize);
FORWARD_IF_ERROR(cSize);
if (cctx->seqCollector.collectSequences) {
ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx));
}
if (cSize == 0) { /* block is not compressible */
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);

View File

@ -192,6 +192,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
typedef struct {
int collectSequences;
ZSTD_Sequence* seqStart;
ZSTD_Sequence* seqCurrent;
size_t maxSequences;
} SeqCollector;
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
@ -238,6 +245,7 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
SeqCollector seqCollector;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */

View File

@ -1072,6 +1072,14 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct {
unsigned int matchPos;
unsigned int offset;
unsigned int litLength;
unsigned int matchLength;
int rep;
} ZSTD_Sequence;
typedef struct {
unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
@ -1210,6 +1218,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
* or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level);
/***************************************
* Memory management

View File

@ -1960,6 +1960,11 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
}
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++);
memset(CNBuffer, 0, 1000000);
assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000,
compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1);
/* All zeroes test (test bug #137) */
#define ZEROESLENGTH 100
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);