Implement first pass at compressSequences()

This commit is contained in:
senhuang42 2020-09-30 17:18:20 -04:00
parent a9327b1e9b
commit 086513b5b9
4 changed files with 180 additions and 8 deletions

View File

@ -1044,7 +1044,8 @@ size_t ZSTD_CCtx_refPrefix_advanced(
/*! ZSTD_CCtx_reset() :
* Also dumps dictionary */
size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
size_t
ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
{
if ( (reset == ZSTD_reset_session_only)
|| (reset == ZSTD_reset_session_and_parameters) ) {
@ -1687,7 +1688,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
}
printf("Reserving space for seqs\n");
/* sequences storage */
ZSTD_referenceExternalSequences(zc, NULL, 0);
zc->seqStore.maxNbSeq = maxNbSeq;
@ -2333,7 +2334,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
if (cSize >= maxCSize) return 0; /* block not compressed */
}
printf("compressSequences: %u\n", cSize);
return cSize;
}
@ -2650,6 +2651,13 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->bmi2);
printf("cSize compressed seqs: %u\n", cSize);
if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc);
return 0;
}
if (frame &&
/* We don't want to emit our first block as a RLE even if it qualifies because
@ -2858,6 +2866,21 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
return (size_t)(op-ostart);
}
void printBits(size_t const size, void const * const ptr)
{
unsigned char *b = (unsigned char*) ptr;
unsigned char byte;
int i, j;
for (i = size-1; i >= 0; i--) {
for (j = 7; j >= 0; j--) {
byte = (b[i] >> j) & 1;
printf("%u", byte);
}
printf("\n");
}
puts("");
}
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
@ -2873,12 +2896,14 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
size_t pos=0;
printf("ZSTD_writeFrameHeader: pledgedSrcSize: %u\n", pledgedSrcSize);;
assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
"dst buf is too small to fit worst-case frame header size.");
DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
!params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
printf("dictIDSizeCodeLength: %u dictIDSizeCode: %u checksumFlag: %u, windowSize: %u singleSegment: %u windowLogByte: %u fcsCode: %u frameHeaderDescriptionByte: %u\n",
dictIDSizeCodeLength, dictIDSizeCode, checksumFlag, windowSize, singleSegment, windowLogByte, fcsCode, frameHeaderDescriptionByte);
if (params->format == ZSTD_f_zstd1) {
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
pos = 4;
@ -2901,6 +2926,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
}
printBits(pos, op);
return pos;
}
@ -2919,6 +2945,7 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
}
}
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{
RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
@ -3388,7 +3415,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
BYTE* op = ostart;
size_t fhSize = 0;
DEBUGLOG(4, "ZSTD_writeEpilogue");
DEBUGLOG(4, "ZSTD_writeEpilogue: dstCap: %u", dstCapacity);
RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
/* special case : empty frame */
@ -3401,6 +3428,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
}
if (cctx->stage != ZSTDcs_ending) {
printf("did this\n");
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
@ -3446,7 +3474,6 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
return cSize + endResult;
}
static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@ -4461,6 +4488,145 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
}
}
/* Returns 0 on success, otherwise ZSTD error code */
static size_t ZSTD_copySequencesToSeqStore(ZSTD_CCtx* zc,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize) {
printf("ZSTD_copySequencesToSeqStore: numSeqs: %zu\n", inSeqsSize);
size_t idx = 0;
BYTE const* istart = (BYTE const*)src;
BYTE const* ip = (BYTE const*)src;
const BYTE* const iend = ip + srcSize;
ZSTD_resetSeqStore(&zc->seqStore);
for (; idx < inSeqsSize; ++idx) {
U32 litLength = inSeqs[idx].litLength;
U32 matchLength = inSeqs[idx].matchLength;
U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE;
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small!");
//printf("idx now: %zu, seq: (ll: %u, ml: %u, of: %u), at mp %u\n", idx, litLength, matchLength, offCode, (U32)(ip+litLength - istart));;
ZSTD_storeSeq(&zc->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength;
}
/* Handle last literals */
size_t consumedSize = (U32)(ip - istart);
assert(consumedSize <= srcSize);
size_t lastLLSize = srcSize - consumedSize;
if (lastLLSize > 0) {
printf("There are last literals\n");
const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
}
printf("ZSTD_copySequencesToSeqStore: done\n");
return 0;
}
size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize, int compressionLevel) {
printf("ZSTD_compressSequences_ext()\n");
BYTE* op = (BYTE*)dst;
ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
{
ZSTD_CCtx_params params = cctx->requestedParams;
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
if (cctx->cdict)
params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
cctx->pledgedSrcSizePlusOne = srcSize + 1; /* auto-fix pledgedSrcSize */
{
size_t const dictSize = prefixDict.dict
? prefixDict.dictSize
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
params.cParams = ZSTD_getCParamsFromCCtxParams(
&params, cctx->pledgedSrcSizePlusOne-1,
dictSize, mode);
}
U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
cctx->cdict,
&params, pledgedSrcSize,
ZSTDb_buffered) , "");
assert(cctx->appliedParams.nbWorkers == 0);
cctx->inToCompress = 0;
cctx->inBuffPos = 0;
/* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
cctx->streamStage = zcss_load;
cctx->frameEnded = 0;
}
size_t cSize;
if (dstCapacity < ZSTD_compressBound(srcSize))
RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!");
printf("SeqStore: maxNbSeq: %u, maxNbLits: %u\n", cctx->seqStore.maxNbSeq, cctx->seqStore.maxNbLit);
size_t frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
op += frameHeaderSize;
printf("frame header size: %u\n", frameHeaderSize);
if (cctx->appliedParams.ldmParams.enableLdm) {
ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
}
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize);
}
ZSTD_copySequencesToSeqStore(cctx, inSeqs, inSeqsSize, src, srcSize);
cSize = ZSTD_compressSequences(&cctx->seqStore,
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
&cctx->appliedParams,
op + ZSTD_blockHeaderSize, dstCapacity - frameHeaderSize - ZSTD_blockHeaderSize,
srcSize,
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
cctx->bmi2);
printf("Compressed sequences size is : %u\n", cSize);
/* Error checking */
if (!ZSTD_isError(cSize) && cSize > 1) {
ZSTD_confirmRepcodesAndEntropyTables(cctx);
}
U32 lastBlock = 1;
/* Write block header */
U32 const cBlockHeader = cSize == 1 ?
lastBlock + (((U32)bt_rle)<<1) + (U32)(cctx->blockSize << 3) :
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader);
cSize += ZSTD_blockHeaderSize + frameHeaderSize;
cctx->consumedSrcSize += srcSize;
cctx->producedCSize += cSize;
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
MEM_writeLE32(dst + cSize, checksum);
cSize += 4;
}
printf("Total cSize: %u\n", cSize);
return cSize;
}
/*====== Finalize ======*/
/*! ZSTD_flushStream() :

View File

@ -497,7 +497,6 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
if (ret > 0)
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
}
ip += zfh.headerSize;
remainingSize -= zfh.headerSize;
@ -1617,6 +1616,7 @@ static size_t ZSTD_decompressContinueStream(
zds->streamStage = zdss_flush;
}
} else {
/* Write directly into the output buffer */
size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);

View File

@ -56,6 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr)
{
printf("getcblockSize: srcSize: %u\n", srcSize);
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
{ U32 const cBlockHeader = MEM_readLE24(src);
@ -419,6 +420,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
* our buffer to handle the over-write.
*/
{
U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
@ -1210,6 +1212,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
printf("Last LL: %u\n", lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize);
@ -1217,6 +1220,8 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
}
}
printf("op - ostart: %u\n", (U32)(op-ostart));
return op-ostart;
}

View File

@ -1328,7 +1328,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
*/
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
ZSTDLIB_API size_t ZSTD_compressSequences_ext(void* dst, size_t dstSize,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize);
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize, int compressionLevel);
/***************************************