diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 74f2cee3..a1bf866c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2505,6 +2505,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) for (i = 0; i < seqStoreSeqSize; ++i) { outSeqs[i].litLength = seqStoreSeqs[i].litLength; outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; if (i == seqStore->longLengthPos) { if (seqStore->longLengthID == 1) { @@ -2549,8 +2550,8 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) zc->seqCollector.seqIndex += seqStoreSeqSize; } -size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) { const size_t dstCapacity = ZSTD_compressBound(srcSize); void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); @@ -2569,6 +2570,22 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, return zc->seqCollector.seqIndex; } +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + /* Returns true if the given block is a RLE block */ static int ZSTD_isRLE(const BYTE *ip, size_t length) { size_t i; diff --git a/lib/zstd.h b/lib/zstd.h index 18720cc7..955df70f 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1149,7 +1149,7 @@ typedef struct { * rep == 2 --> offset == repeat_offset_3 * rep == 3 --> offset == repeat_offset_1 - 1 * - * Note: This field is optional. ZSTD_getSequences() will calculate the value of + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of * 'rep', but repeat offsets do not necessarily need to be calculated from an external * sequence provider's perspective. */ @@ -1297,17 +1297,36 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store. - * Each block will end with a dummy sequence with offset == 0, matchLength == 0, and litLength == length of last literals. +typedef enum { + ZSTD_sf_explicitBlockDelimiters, /* Representation of ZSTD_Sequence contains explicit block delimiters */ + ZSTD_sf_noBlockDelimiters /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. * * zc can be used to insert custom compression params. * This function invokes ZSTD_compress2 - * @return : number of sequences extracted + * @return : number of sequences generated */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); /*************************************** * Memory management diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 796e8bed..65e44209 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -305,13 +305,17 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) #endif -static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size) +static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, + BYTE* src, size_t size, ZSTD_sequenceFormat_e format) { size_t i; size_t j; for(i = 0; i < seqsSize; ++i) { assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size); assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size); + if (format == ZSTD_sf_noBlockDelimiters) { + assert(seqs[i].matchLength != 0 || seqs[i].offset != 0); + } memcpy(dst, src, seqs[i].litLength); dst += seqs[i].litLength; @@ -326,6 +330,9 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, size -= seqs[i].matchLength; } } + if (format == ZSTD_sf_noBlockDelimiters) { + memcpy(dst, src, size); + } } /*============================================= @@ -2702,9 +2709,9 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } - DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++); + DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences decode from sequences test : ", testNb++); { - size_t srcSize = 100 KB; + size_t srcSize = 150 KB; BYTE* src = (BYTE*)CNBuffer; BYTE* decoded = (BYTE*)compressedBuffer; @@ -2718,11 +2725,14 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Populate src with random data */ RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); - /* get the sequences */ - seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize); + /* Test with block delimiters roundtrip */ + seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize); + FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters); + assert(!memcmp(CNBuffer, compressedBuffer, srcSize)); - /* "decode" and compare the sequences */ - FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize); + /* Test no block delimiters roundtrip */ + seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize); + FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters); assert(!memcmp(CNBuffer, compressedBuffer, srcSize)); ZSTD_freeCCtx(cctx);