Implement first pass at compressSequences()
This commit is contained in:
parent
a9327b1e9b
commit
086513b5b9
@ -1044,7 +1044,8 @@ size_t ZSTD_CCtx_refPrefix_advanced(
|
||||
|
||||
/*! ZSTD_CCtx_reset() :
|
||||
* Also dumps dictionary */
|
||||
size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
||||
size_t
|
||||
ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
||||
{
|
||||
if ( (reset == ZSTD_reset_session_only)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
@ -1687,7 +1688,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
|
||||
ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
|
||||
}
|
||||
|
||||
printf("Reserving space for seqs\n");
|
||||
/* sequences storage */
|
||||
ZSTD_referenceExternalSequences(zc, NULL, 0);
|
||||
zc->seqStore.maxNbSeq = maxNbSeq;
|
||||
@ -2333,7 +2334,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
||||
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
|
||||
if (cSize >= maxCSize) return 0; /* block not compressed */
|
||||
}
|
||||
|
||||
printf("compressSequences: %u\n", cSize);
|
||||
return cSize;
|
||||
}
|
||||
|
||||
@ -2650,6 +2651,13 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
srcSize,
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||
zc->bmi2);
|
||||
printf("cSize compressed seqs: %u\n", cSize);
|
||||
|
||||
if (zc->seqCollector.collectSequences) {
|
||||
ZSTD_copyBlockSequences(zc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
if (frame &&
|
||||
/* We don't want to emit our first block as a RLE even if it qualifies because
|
||||
@ -2858,6 +2866,21 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
void printBits(size_t const size, void const * const ptr)
|
||||
{
|
||||
unsigned char *b = (unsigned char*) ptr;
|
||||
unsigned char byte;
|
||||
int i, j;
|
||||
|
||||
for (i = size-1; i >= 0; i--) {
|
||||
for (j = 7; j >= 0; j--) {
|
||||
byte = (b[i] >> j) & 1;
|
||||
printf("%u", byte);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
|
||||
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
||||
const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
|
||||
@ -2873,12 +2896,14 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
||||
BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
|
||||
size_t pos=0;
|
||||
|
||||
printf("ZSTD_writeFrameHeader: pledgedSrcSize: %u\n", pledgedSrcSize);;
|
||||
assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
|
||||
RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
|
||||
"dst buf is too small to fit worst-case frame header size.");
|
||||
DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
|
||||
!params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
|
||||
|
||||
printf("dictIDSizeCodeLength: %u dictIDSizeCode: %u checksumFlag: %u, windowSize: %u singleSegment: %u windowLogByte: %u fcsCode: %u frameHeaderDescriptionByte: %u\n",
|
||||
dictIDSizeCodeLength, dictIDSizeCode, checksumFlag, windowSize, singleSegment, windowLogByte, fcsCode, frameHeaderDescriptionByte);
|
||||
if (params->format == ZSTD_f_zstd1) {
|
||||
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
|
||||
pos = 4;
|
||||
@ -2901,6 +2926,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
||||
case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
|
||||
case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
|
||||
}
|
||||
printBits(pos, op);
|
||||
return pos;
|
||||
}
|
||||
|
||||
@ -2919,6 +2945,7 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
|
||||
{
|
||||
RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
|
||||
@ -3388,7 +3415,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
||||
BYTE* op = ostart;
|
||||
size_t fhSize = 0;
|
||||
|
||||
DEBUGLOG(4, "ZSTD_writeEpilogue");
|
||||
DEBUGLOG(4, "ZSTD_writeEpilogue: dstCap: %u", dstCapacity);
|
||||
RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
|
||||
|
||||
/* special case : empty frame */
|
||||
@ -3401,6 +3428,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
||||
}
|
||||
|
||||
if (cctx->stage != ZSTDcs_ending) {
|
||||
printf("did this\n");
|
||||
/* write one last empty block, make it the "last" block */
|
||||
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
|
||||
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
|
||||
@ -3446,7 +3474,6 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
|
||||
return cSize + endResult;
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
@ -4461,6 +4488,145 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns 0 on success, otherwise ZSTD error code */
|
||||
static size_t ZSTD_copySequencesToSeqStore(ZSTD_CCtx* zc,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize) {
|
||||
printf("ZSTD_copySequencesToSeqStore: numSeqs: %zu\n", inSeqsSize);
|
||||
size_t idx = 0;
|
||||
BYTE const* istart = (BYTE const*)src;
|
||||
BYTE const* ip = (BYTE const*)src;
|
||||
const BYTE* const iend = ip + srcSize;
|
||||
ZSTD_resetSeqStore(&zc->seqStore);
|
||||
|
||||
for (; idx < inSeqsSize; ++idx) {
|
||||
U32 litLength = inSeqs[idx].litLength;
|
||||
U32 matchLength = inSeqs[idx].matchLength;
|
||||
U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE;
|
||||
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small!");
|
||||
//printf("idx now: %zu, seq: (ll: %u, ml: %u, of: %u), at mp %u\n", idx, litLength, matchLength, offCode, (U32)(ip+litLength - istart));;
|
||||
|
||||
ZSTD_storeSeq(&zc->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
||||
ip += matchLength + litLength;
|
||||
}
|
||||
|
||||
/* Handle last literals */
|
||||
size_t consumedSize = (U32)(ip - istart);
|
||||
assert(consumedSize <= srcSize);
|
||||
size_t lastLLSize = srcSize - consumedSize;
|
||||
if (lastLLSize > 0) {
|
||||
printf("There are last literals\n");
|
||||
const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
|
||||
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
|
||||
}
|
||||
|
||||
printf("ZSTD_copySequencesToSeqStore: done\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize, int compressionLevel) {
|
||||
printf("ZSTD_compressSequences_ext()\n");
|
||||
BYTE* op = (BYTE*)dst;
|
||||
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
|
||||
{
|
||||
ZSTD_CCtx_params params = cctx->requestedParams;
|
||||
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
|
||||
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
|
||||
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
|
||||
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
|
||||
if (cctx->cdict)
|
||||
params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
|
||||
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
||||
cctx->pledgedSrcSizePlusOne = srcSize + 1; /* auto-fix pledgedSrcSize */
|
||||
{
|
||||
size_t const dictSize = prefixDict.dict
|
||||
? prefixDict.dictSize
|
||||
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
|
||||
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
|
||||
params.cParams = ZSTD_getCParamsFromCCtxParams(
|
||||
¶ms, cctx->pledgedSrcSizePlusOne-1,
|
||||
dictSize, mode);
|
||||
}
|
||||
|
||||
U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
|
||||
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
||||
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
||||
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
|
||||
cctx->cdict,
|
||||
¶ms, pledgedSrcSize,
|
||||
ZSTDb_buffered) , "");
|
||||
assert(cctx->appliedParams.nbWorkers == 0);
|
||||
cctx->inToCompress = 0;
|
||||
cctx->inBuffPos = 0;
|
||||
/* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
|
||||
cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
|
||||
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
|
||||
cctx->streamStage = zcss_load;
|
||||
cctx->frameEnded = 0;
|
||||
}
|
||||
|
||||
size_t cSize;
|
||||
|
||||
if (dstCapacity < ZSTD_compressBound(srcSize))
|
||||
RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!");
|
||||
printf("SeqStore: maxNbSeq: %u, maxNbLits: %u\n", cctx->seqStore.maxNbSeq, cctx->seqStore.maxNbLit);
|
||||
|
||||
size_t frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
|
||||
op += frameHeaderSize;
|
||||
printf("frame header size: %u\n", frameHeaderSize);
|
||||
|
||||
|
||||
if (cctx->appliedParams.ldmParams.enableLdm) {
|
||||
ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
|
||||
}
|
||||
|
||||
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
|
||||
XXH64_update(&cctx->xxhState, src, srcSize);
|
||||
}
|
||||
|
||||
ZSTD_copySequencesToSeqStore(cctx, inSeqs, inSeqsSize, src, srcSize);
|
||||
cSize = ZSTD_compressSequences(&cctx->seqStore,
|
||||
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
||||
&cctx->appliedParams,
|
||||
op + ZSTD_blockHeaderSize, dstCapacity - frameHeaderSize - ZSTD_blockHeaderSize,
|
||||
srcSize,
|
||||
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||
cctx->bmi2);
|
||||
|
||||
printf("Compressed sequences size is : %u\n", cSize);
|
||||
/* Error checking */
|
||||
if (!ZSTD_isError(cSize) && cSize > 1) {
|
||||
ZSTD_confirmRepcodesAndEntropyTables(cctx);
|
||||
}
|
||||
|
||||
U32 lastBlock = 1;
|
||||
|
||||
/* Write block header */
|
||||
U32 const cBlockHeader = cSize == 1 ?
|
||||
lastBlock + (((U32)bt_rle)<<1) + (U32)(cctx->blockSize << 3) :
|
||||
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(op, cBlockHeader);
|
||||
cSize += ZSTD_blockHeaderSize + frameHeaderSize;
|
||||
|
||||
cctx->consumedSrcSize += srcSize;
|
||||
cctx->producedCSize += cSize;
|
||||
|
||||
if (cctx->appliedParams.fParams.checksumFlag) {
|
||||
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
|
||||
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
|
||||
DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
|
||||
MEM_writeLE32(dst + cSize, checksum);
|
||||
cSize += 4;
|
||||
}
|
||||
|
||||
printf("Total cSize: %u\n", cSize);
|
||||
return cSize;
|
||||
}
|
||||
|
||||
/*====== Finalize ======*/
|
||||
|
||||
/*! ZSTD_flushStream() :
|
||||
|
@ -497,7 +497,6 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
||||
if (ret > 0)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
}
|
||||
|
||||
ip += zfh.headerSize;
|
||||
remainingSize -= zfh.headerSize;
|
||||
|
||||
@ -1617,6 +1616,7 @@ static size_t ZSTD_decompressContinueStream(
|
||||
zds->streamStage = zdss_flush;
|
||||
}
|
||||
} else {
|
||||
|
||||
/* Write directly into the output buffer */
|
||||
size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
|
||||
size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
|
||||
|
@ -56,6 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
blockProperties_t* bpPtr)
|
||||
{
|
||||
printf("getcblockSize: srcSize: %u\n", srcSize);
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
||||
|
||||
{ U32 const cBlockHeader = MEM_readLE24(src);
|
||||
@ -419,6 +420,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
||||
* our buffer to handle the over-write.
|
||||
*/
|
||||
{
|
||||
|
||||
U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
@ -1210,6 +1212,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
|
||||
/* last literal segment */
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
printf("Last LL: %u\n", lastLLSize);
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
||||
if (op != NULL) {
|
||||
ZSTD_memcpy(op, litPtr, lastLLSize);
|
||||
@ -1217,6 +1220,8 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
}
|
||||
}
|
||||
|
||||
printf("op - ostart: %u\n", (U32)(op-ostart));
|
||||
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
|
@ -1328,7 +1328,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
|
||||
ZSTDLIB_API size_t ZSTD_compressSequences_ext(void* dst, size_t dstSize,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize);
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize, int compressionLevel);
|
||||
|
||||
|
||||
/***************************************
|
||||
|
Loading…
Reference in New Issue
Block a user