Merge pull request #2381 from senhuang42/expand_sequence_extraction_api

Add enum to define ZSTD_Sequence type and update sequence extraction API
This commit is contained in:
sen 2020-11-06 13:00:31 -05:00 committed by GitHub
commit f62edf0fe9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 16 deletions

View File

@ -2505,6 +2505,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
for (i = 0; i < seqStoreSeqSize; ++i) {
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
outSeqs[i].rep = 0;
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthID == 1) {
@ -2549,8 +2550,8 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
zc->seqCollector.seqIndex += seqStoreSeqSize;
}
size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize)
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize)
{
const size_t dstCapacity = ZSTD_compressBound(srcSize);
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
@ -2569,6 +2570,22 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
return zc->seqCollector.seqIndex;
}
size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
size_t in = 0;
size_t out = 0;
for (; in < seqsSize; ++in) {
if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
if (in != seqsSize - 1) {
sequences[in+1].litLength += sequences[in].litLength;
}
} else {
sequences[out] = sequences[in];
++out;
}
}
return out;
}
/* Returns true if the given block is a RLE block */
static int ZSTD_isRLE(const BYTE *ip, size_t length) {
size_t i;

View File

@ -1149,7 +1149,7 @@ typedef struct {
* rep == 2 --> offset == repeat_offset_3
* rep == 3 --> offset == repeat_offset_1 - 1
*
* Note: This field is optional. ZSTD_getSequences() will calculate the value of
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
* sequence provider's perspective.
*/
@ -1297,17 +1297,36 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
* or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/*! ZSTD_getSequences() :
* Extract sequences from the sequence store.
* Each block will end with a dummy sequence with offset == 0, matchLength == 0, and litLength == length of last literals.
typedef enum {
ZSTD_sf_explicitBlockDelimiters, /* Representation of ZSTD_Sequence contains explicit block delimiters */
ZSTD_sf_noBlockDelimiters /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
} ZSTD_sequenceFormat_e;
/*! ZSTD_generateSequences() :
* Generate sequences using ZSTD_compress2, given a source buffer.
*
* Each block will end with a dummy sequence
* with offset == 0, matchLength == 0, and litLength == length of last literals.
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
* simply acts as a block delimiter.
*
* zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2
* @return : number of sequences extracted
* @return : number of sequences generated
*/
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize);
/*! ZSTD_mergeBlockDelimiters() :
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
* by merging them into into the literals of the next sequence.
*
* As such, the final generated result has no explicit representation of block boundaries,
* and the final last literals segment is not represented in the sequences.
* @return : number of sequences left after merging
*/
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
/***************************************
* Memory management

View File

@ -305,13 +305,17 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
#endif
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
BYTE* src, size_t size, ZSTD_sequenceFormat_e format)
{
size_t i;
size_t j;
for(i = 0; i < seqsSize; ++i) {
assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
if (format == ZSTD_sf_noBlockDelimiters) {
assert(seqs[i].matchLength != 0 || seqs[i].offset != 0);
}
memcpy(dst, src, seqs[i].litLength);
dst += seqs[i].litLength;
@ -326,6 +330,9 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
size -= seqs[i].matchLength;
}
}
if (format == ZSTD_sf_noBlockDelimiters) {
memcpy(dst, src, size);
}
}
/*=============================================
@ -2702,9 +2709,9 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
}
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences decode from sequences test : ", testNb++);
{
size_t srcSize = 100 KB;
size_t srcSize = 150 KB;
BYTE* src = (BYTE*)CNBuffer;
BYTE* decoded = (BYTE*)compressedBuffer;
@ -2718,11 +2725,14 @@ static int basicUnitTests(U32 const seed, double compressibility)
/* Populate src with random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
/* get the sequences */
seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
/* Test with block delimiters roundtrip */
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
/* "decode" and compare the sequences */
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
/* Test no block delimiters roundtrip */
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
ZSTD_freeCCtx(cctx);