Changing api to get sequences across all blocks
This commit is contained in:
parent
623b90f85d
commit
5f8b0f6890
@ -13,6 +13,7 @@
|
||||
***************************************/
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <string.h> /* memset */
|
||||
#include <stdlib.h>
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
@ -2190,77 +2191,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
||||
ssPtr->longLengthID = 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 matchPos;
|
||||
U32 offset;
|
||||
U32 litLength;
|
||||
U32 matchLength;
|
||||
int rep;
|
||||
} Sequence;
|
||||
|
||||
static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict,
|
||||
void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
Sequence* outSeqs, size_t outSeqsSize)
|
||||
{
|
||||
const seqStore_t* seqStore;
|
||||
const seqDef* seqs;
|
||||
size_t seqsSize;
|
||||
|
||||
size_t i; int repIdx; size_t position;
|
||||
|
||||
size_t blockSize = ZSTD_getBlockSize(zc);
|
||||
size_t maxOutput = ZSTD_compressBound(blockSize);
|
||||
|
||||
assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict)));
|
||||
assert(dstSize >= maxOutput); dstSize = maxOutput;
|
||||
assert(srcSize >= blockSize); srcSize = blockSize;
|
||||
assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize)));
|
||||
|
||||
seqStore = ZSTD_getSeqStore(zc);
|
||||
seqs = seqStore->sequencesStart;
|
||||
seqsSize = seqStore->sequences - seqStore->sequencesStart;
|
||||
|
||||
assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize;
|
||||
|
||||
for (i = 0, position = 0; i < seqsSize; ++i) {
|
||||
outSeqs[i].offset = seqs[i].offset;
|
||||
outSeqs[i].litLength = seqs[i].litLength;
|
||||
outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
|
||||
|
||||
if (i == seqStore->longLengthPos) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
outSeqs[i].litLength += 0x10000;
|
||||
} else if (seqStore->longLengthID == 2) {
|
||||
outSeqs[i].matchLength += 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
if (outSeqs[i].offset <= 3 /* num reps */) {
|
||||
outSeqs[i].rep = 1;
|
||||
repIdx = i - outSeqs[i].offset;
|
||||
|
||||
if (repIdx >= 0) {
|
||||
outSeqs[i].offset = outSeqs[repIdx].offset;
|
||||
}
|
||||
|
||||
if (repIdx == -1) {
|
||||
outSeqs[i].offset = 1;
|
||||
} else if (repIdx == -2) {
|
||||
outSeqs[i].offset = 4;
|
||||
} else if (repIdx == -3) {
|
||||
outSeqs[i].offset = 8;
|
||||
}
|
||||
} else {
|
||||
outSeqs[i].offset -= 3 /* num reps */;
|
||||
}
|
||||
|
||||
position += outSeqs[i].litLength;
|
||||
outSeqs[i].matchPos = position;
|
||||
position += outSeqs[i].matchLength;
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
|
||||
|
||||
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
@ -2394,6 +2324,81 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
|
||||
}
|
||||
}
|
||||
|
||||
static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs,
|
||||
ZSTD_Sequence* outSeqs, size_t seqsSize)
|
||||
{
|
||||
size_t i; size_t position; int repIdx;
|
||||
for (i = 0, position = 0; i < seqsSize; ++i) {
|
||||
outSeqs[i].offset = seqs[i].offset;
|
||||
outSeqs[i].litLength = seqs[i].litLength;
|
||||
outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
|
||||
|
||||
if (i == seqStore->longLengthPos) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
outSeqs[i].litLength += 0x10000;
|
||||
} else if (seqStore->longLengthID == 2) {
|
||||
outSeqs[i].matchLength += 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
if (outSeqs[i].offset <= 3 /* num reps */) {
|
||||
outSeqs[i].rep = 1;
|
||||
repIdx = i - outSeqs[i].offset;
|
||||
|
||||
if (repIdx >= 0) {
|
||||
outSeqs[i].offset = outSeqs[repIdx].offset;
|
||||
}
|
||||
|
||||
if (repIdx == -1) {
|
||||
outSeqs[i].offset = 1;
|
||||
} else if (repIdx == -2) {
|
||||
outSeqs[i].offset = 4;
|
||||
} else if (repIdx == -3) {
|
||||
outSeqs[i].offset = 8;
|
||||
}
|
||||
} else {
|
||||
outSeqs[i].offset -= 3 /* num reps */;
|
||||
}
|
||||
|
||||
position += outSeqs[i].litLength;
|
||||
outSeqs[i].matchPos = position;
|
||||
position += outSeqs[i].matchLength;
|
||||
}
|
||||
}
|
||||
|
||||
static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore)
|
||||
{
|
||||
size_t seqsSize = seqStore->sequences - seqStore->sequencesStart;
|
||||
|
||||
assert(cctx->seqCollector.maxSequences >
|
||||
(cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize);
|
||||
|
||||
ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart,
|
||||
cctx->seqCollector.seqCurrent, seqsSize);
|
||||
cctx->seqCollector.seqCurrent += seqsSize;
|
||||
}
|
||||
|
||||
size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
|
||||
size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize,
|
||||
int level)
|
||||
{
|
||||
size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*));
|
||||
void* dst = malloc(dstCapacity);
|
||||
size_t seqsSize;
|
||||
|
||||
SeqCollector seqCollector;
|
||||
seqCollector.collectSequences = 1;
|
||||
seqCollector.seqStart = outSeqs;
|
||||
seqCollector.seqCurrent = outSeqs;
|
||||
seqCollector.maxSequences = outSeqsSize;
|
||||
zc->seqCollector = seqCollector;
|
||||
|
||||
ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level);
|
||||
seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart;
|
||||
|
||||
free(dst);
|
||||
return seqsSize;
|
||||
}
|
||||
|
||||
/*! ZSTD_compress_frameChunk() :
|
||||
* Compress a chunk of data into one or multiple blocks.
|
||||
@ -2438,6 +2443,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||
ip, blockSize);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
if (cctx->seqCollector.collectSequences) {
|
||||
ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx));
|
||||
}
|
||||
|
||||
if (cSize == 0) { /* block is not compressible */
|
||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
||||
|
@ -192,6 +192,13 @@ typedef struct {
|
||||
size_t capacity; /* The capacity starting from `seq` pointer */
|
||||
} rawSeqStore_t;
|
||||
|
||||
typedef struct {
|
||||
int collectSequences;
|
||||
ZSTD_Sequence* seqStart;
|
||||
ZSTD_Sequence* seqCurrent;
|
||||
size_t maxSequences;
|
||||
} SeqCollector;
|
||||
|
||||
struct ZSTD_CCtx_params_s {
|
||||
ZSTD_format_e format;
|
||||
ZSTD_compressionParameters cParams;
|
||||
@ -238,6 +245,7 @@ struct ZSTD_CCtx_s {
|
||||
XXH64_state_t xxhState;
|
||||
ZSTD_customMem customMem;
|
||||
size_t staticSize;
|
||||
SeqCollector seqCollector;
|
||||
|
||||
seqStore_t seqStore; /* sequences storage ptrs */
|
||||
ldmState_t ldmState; /* long distance matching state */
|
||||
|
11
lib/zstd.h
11
lib/zstd.h
@ -1072,6 +1072,14 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
|
||||
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
|
||||
|
||||
typedef struct {
|
||||
unsigned int matchPos;
|
||||
unsigned int offset;
|
||||
unsigned int litLength;
|
||||
unsigned int matchLength;
|
||||
int rep;
|
||||
} ZSTD_Sequence;
|
||||
|
||||
typedef struct {
|
||||
unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
|
||||
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
|
||||
@ -1210,6 +1218,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
|
||||
* or an error code (if srcSize is too small) */
|
||||
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||
|
||||
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
|
||||
size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level);
|
||||
|
||||
|
||||
/***************************************
|
||||
* Memory management
|
||||
|
@ -1960,6 +1960,11 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++);
|
||||
memset(CNBuffer, 0, 1000000);
|
||||
assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000,
|
||||
compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1);
|
||||
|
||||
/* All zeroes test (test bug #137) */
|
||||
#define ZEROESLENGTH 100
|
||||
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
|
||||
|
Loading…
Reference in New Issue
Block a user