From 086513b5b9ddd911b6a1497120fd9c147acab143 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 30 Sep 2020 17:18:20 -0400 Subject: [PATCH] Implement first pass at compressSequences() --- lib/compress/zstd_compress.c | 178 ++++++++++++++++++++++++- lib/decompress/zstd_decompress.c | 2 +- lib/decompress/zstd_decompress_block.c | 5 + lib/zstd.h | 3 +- 4 files changed, 180 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5e81c88c..8c5a1c5f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1044,7 +1044,8 @@ size_t ZSTD_CCtx_refPrefix_advanced( /*! ZSTD_CCtx_reset() : * Also dumps dictionary */ -size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +size_t +ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) { if ( (reset == ZSTD_reset_session_only) || (reset == ZSTD_reset_session_and_parameters) ) { @@ -1687,7 +1688,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); } - + printf("Reserving space for seqs\n"); /* sequences storage */ ZSTD_referenceExternalSequences(zc, NULL, 0); zc->seqStore.maxNbSeq = maxNbSeq; @@ -2333,7 +2334,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); if (cSize >= maxCSize) return 0; /* block not compressed */ } - + printf("compressSequences: %u\n", cSize); return cSize; } @@ -2650,6 +2651,13 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, srcSize, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + printf("cSize compressed seqs: %u\n", cSize); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because @@ -2858,6 +2866,21 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, return (size_t)(op-ostart); } +void printBits(size_t const size, void const * const ptr) +{ + unsigned char *b = (unsigned char*) ptr; + unsigned char byte; + int i, j; + + for (i = size-1; i >= 0; i--) { + for (j = 7; j >= 0; j--) { + byte = (b[i] >> j) & 1; + printf("%u", byte); + } + printf("\n"); + } + puts(""); +} static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) @@ -2873,12 +2896,14 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); size_t pos=0; + printf("ZSTD_writeFrameHeader: pledgedSrcSize: %u\n", pledgedSrcSize);; assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, "dst buf is too small to fit worst-case frame header size."); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - + printf("dictIDSizeCodeLength: %u dictIDSizeCode: %u checksumFlag: %u, windowSize: %u singleSegment: %u windowLogByte: %u fcsCode: %u frameHeaderDescriptionByte: %u\n", + dictIDSizeCodeLength, dictIDSizeCode, checksumFlag, windowSize, singleSegment, windowLogByte, fcsCode, frameHeaderDescriptionByte); if (params->format == ZSTD_f_zstd1) { MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; @@ -2901,6 +2926,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; } + printBits(pos, op); return pos; } @@ -2919,6 +2945,7 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) } } + size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) { RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, @@ -3388,7 +3415,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) BYTE* op = ostart; size_t fhSize = 0; - DEBUGLOG(4, "ZSTD_writeEpilogue"); + DEBUGLOG(4, "ZSTD_writeEpilogue: dstCap: %u", dstCapacity); RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); /* special case : empty frame */ @@ -3401,6 +3428,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) } if (cctx->stage != ZSTDcs_ending) { + printf("did this\n"); /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); @@ -3446,7 +3474,6 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, return cSize + endResult; } - static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -4461,6 +4488,145 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, } } +/* Returns 0 on success, otherwise ZSTD error code */ +static size_t ZSTD_copySequencesToSeqStore(ZSTD_CCtx* zc, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + printf("ZSTD_copySequencesToSeqStore: numSeqs: %zu\n", inSeqsSize); + size_t idx = 0; + BYTE const* istart = (BYTE const*)src; + BYTE const* ip = (BYTE const*)src; + const BYTE* const iend = ip + srcSize; + ZSTD_resetSeqStore(&zc->seqStore); + + for (; idx < inSeqsSize; ++idx) { + U32 litLength = inSeqs[idx].litLength; + U32 matchLength = inSeqs[idx].matchLength; + U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE; + RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small!"); + //printf("idx now: %zu, seq: (ll: %u, ml: %u, of: %u), at mp %u\n", idx, litLength, matchLength, offCode, (U32)(ip+litLength - istart));; + + ZSTD_storeSeq(&zc->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + + /* Handle last literals */ + size_t consumedSize = (U32)(ip - istart); + assert(consumedSize <= srcSize); + size_t lastLLSize = srcSize - consumedSize; + if (lastLLSize > 0) { + printf("There are last literals\n"); + const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } + + printf("ZSTD_copySequencesToSeqStore: done\n"); + return 0; +} + +size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize, int compressionLevel) { + printf("ZSTD_compressSequences_ext()\n"); + BYTE* op = (BYTE*)dst; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + cctx->pledgedSrcSizePlusOne = srcSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + + size_t cSize; + + if (dstCapacity < ZSTD_compressBound(srcSize)) + RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!"); + printf("SeqStore: maxNbSeq: %u, maxNbLits: %u\n", cctx->seqStore.maxNbSeq, cctx->seqStore.maxNbLit); + + size_t frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + printf("frame header size: %u\n", frameHeaderSize); + + + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } + + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + + ZSTD_copySequencesToSeqStore(cctx, inSeqs, inSeqsSize, src, srcSize); + cSize = ZSTD_compressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - frameHeaderSize - ZSTD_blockHeaderSize, + srcSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + + printf("Compressed sequences size is : %u\n", cSize); + /* Error checking */ + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(cctx); + } + + U32 lastBlock = 1; + + /* Write block header */ + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(cctx->blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize + frameHeaderSize; + + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += cSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(dst + cSize, checksum); + cSize += 4; + } + + printf("Total cSize: %u\n", cSize); + return cSize; +} + /*====== Finalize ======*/ /*! ZSTD_flushStream() : diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 21f846bc..09fd0ecb 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -497,7 +497,6 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize if (ret > 0) return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); } - ip += zfh.headerSize; remainingSize -= zfh.headerSize; @@ -1617,6 +1616,7 @@ static size_t ZSTD_decompressContinueStream( zds->streamStage = zdss_flush; } } else { + /* Write directly into the output buffer */ size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index bec82e85..261a8f92 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -56,6 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { + printf("getcblockSize: srcSize: %u\n", srcSize); RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); { U32 const cBlockHeader = MEM_readLE24(src); @@ -419,6 +420,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, * our buffer to handle the over-write. */ { + U64 const add = 0x0101010101010101ull; size_t pos = 0; U64 sv = 0; @@ -1210,6 +1212,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; + printf("Last LL: %u\n", lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); @@ -1217,6 +1220,8 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, } } + printf("op - ostart: %u\n", (U32)(op-ostart)); + return op-ostart; } diff --git a/lib/zstd.h b/lib/zstd.h index c5cf51e2..0b97805c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1328,7 +1328,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, */ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); ZSTDLIB_API size_t ZSTD_compressSequences_ext(void* dst, size_t dstSize, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize); + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize, int compressionLevel); /***************************************