Merge pull request #2381 from senhuang42/expand_sequence_extraction_api
Add enum to define ZSTD_Sequence type and update sequence extraction API
This commit is contained in:
commit
f62edf0fe9
@ -2505,6 +2505,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
||||
for (i = 0; i < seqStoreSeqSize; ++i) {
|
||||
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
||||
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
|
||||
outSeqs[i].rep = 0;
|
||||
|
||||
if (i == seqStore->longLengthPos) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
@ -2549,8 +2550,8 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
||||
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
||||
}
|
||||
|
||||
size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize)
|
||||
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize)
|
||||
{
|
||||
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
||||
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
|
||||
@ -2569,6 +2570,22 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
return zc->seqCollector.seqIndex;
|
||||
}
|
||||
|
||||
size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
|
||||
size_t in = 0;
|
||||
size_t out = 0;
|
||||
for (; in < seqsSize; ++in) {
|
||||
if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
|
||||
if (in != seqsSize - 1) {
|
||||
sequences[in+1].litLength += sequences[in].litLength;
|
||||
}
|
||||
} else {
|
||||
sequences[out] = sequences[in];
|
||||
++out;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Returns true if the given block is a RLE block */
|
||||
static int ZSTD_isRLE(const BYTE *ip, size_t length) {
|
||||
size_t i;
|
||||
|
33
lib/zstd.h
33
lib/zstd.h
@ -1149,7 +1149,7 @@ typedef struct {
|
||||
* rep == 2 --> offset == repeat_offset_3
|
||||
* rep == 3 --> offset == repeat_offset_1 - 1
|
||||
*
|
||||
* Note: This field is optional. ZSTD_getSequences() will calculate the value of
|
||||
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
|
||||
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
||||
* sequence provider's perspective.
|
||||
*/
|
||||
@ -1297,17 +1297,36 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
|
||||
* or an error code (if srcSize is too small) */
|
||||
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||
|
||||
/*! ZSTD_getSequences() :
|
||||
* Extract sequences from the sequence store.
|
||||
* Each block will end with a dummy sequence with offset == 0, matchLength == 0, and litLength == length of last literals.
|
||||
typedef enum {
|
||||
ZSTD_sf_explicitBlockDelimiters, /* Representation of ZSTD_Sequence contains explicit block delimiters */
|
||||
ZSTD_sf_noBlockDelimiters /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
|
||||
} ZSTD_sequenceFormat_e;
|
||||
|
||||
/*! ZSTD_generateSequences() :
|
||||
* Generate sequences using ZSTD_compress2, given a source buffer.
|
||||
*
|
||||
* Each block will end with a dummy sequence
|
||||
* with offset == 0, matchLength == 0, and litLength == length of last literals.
|
||||
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
|
||||
* simply acts as a block delimiter.
|
||||
*
|
||||
* zc can be used to insert custom compression params.
|
||||
* This function invokes ZSTD_compress2
|
||||
* @return : number of sequences extracted
|
||||
* @return : number of sequences generated
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize);
|
||||
|
||||
ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize);
|
||||
|
||||
/*! ZSTD_mergeBlockDelimiters() :
|
||||
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
|
||||
* by merging them into into the literals of the next sequence.
|
||||
*
|
||||
* As such, the final generated result has no explicit representation of block boundaries,
|
||||
* and the final last literals segment is not represented in the sequences.
|
||||
* @return : number of sequences left after merging
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
|
||||
|
||||
/***************************************
|
||||
* Memory management
|
||||
|
@ -305,13 +305,17 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
|
||||
|
||||
#endif
|
||||
|
||||
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
|
||||
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
||||
BYTE* src, size_t size, ZSTD_sequenceFormat_e format)
|
||||
{
|
||||
size_t i;
|
||||
size_t j;
|
||||
for(i = 0; i < seqsSize; ++i) {
|
||||
assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
|
||||
assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
|
||||
if (format == ZSTD_sf_noBlockDelimiters) {
|
||||
assert(seqs[i].matchLength != 0 || seqs[i].offset != 0);
|
||||
}
|
||||
|
||||
memcpy(dst, src, seqs[i].litLength);
|
||||
dst += seqs[i].litLength;
|
||||
@ -326,6 +330,9 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
||||
size -= seqs[i].matchLength;
|
||||
}
|
||||
}
|
||||
if (format == ZSTD_sf_noBlockDelimiters) {
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
}
|
||||
|
||||
/*=============================================
|
||||
@ -2702,9 +2709,9 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences decode from sequences test : ", testNb++);
|
||||
{
|
||||
size_t srcSize = 100 KB;
|
||||
size_t srcSize = 150 KB;
|
||||
BYTE* src = (BYTE*)CNBuffer;
|
||||
BYTE* decoded = (BYTE*)compressedBuffer;
|
||||
|
||||
@ -2718,11 +2725,14 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
/* Populate src with random data */
|
||||
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
||||
|
||||
/* get the sequences */
|
||||
seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
|
||||
/* Test with block delimiters roundtrip */
|
||||
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
|
||||
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
|
||||
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
|
||||
|
||||
/* "decode" and compare the sequences */
|
||||
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
|
||||
/* Test no block delimiters roundtrip */
|
||||
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
|
||||
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
|
||||
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
Loading…
Reference in New Issue
Block a user