Merge branch 'dev' of github.com:senhuang42/zstd into change_ldm_mt_config

This commit is contained in:
senhuang42 2020-10-14 10:17:50 -04:00
commit 467e4383b0
25 changed files with 845 additions and 190 deletions

View File

@ -0,0 +1,178 @@
/*
* Copyright (c) 2020, Martin Liska, SUSE, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stdio.h> // printf
#include <stdlib.h> // free
#include <string.h> // memset, strcat, strlen
#include <zstd.h> // presumes zstd library is installed
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
#include <pthread.h>
typedef struct compress_args
{
const char *fname;
char *outName;
int cLevel;
#if defined(ZSTD_STATIC_LINKING_ONLY)
ZSTD_threadPool *pool;
#endif
} compress_args_t;
static void *compressFile_orDie(void *data)
{
compress_args_t *args = (compress_args_t *)data;
fprintf (stderr, "Starting compression of %s with level %d\n", args->fname, args->cLevel);
/* Open the input and output files. */
FILE* const fin = fopen_orDie(args->fname, "rb");
FILE* const fout = fopen_orDie(args->outName, "wb");
/* Create the input and output buffers.
* They may be any size, but we recommend using these functions to size them.
* Performance will only suffer significantly for very tiny buffers.
*/
size_t const buffInSize = ZSTD_CStreamInSize();
void* const buffIn = malloc_orDie(buffInSize);
size_t const buffOutSize = ZSTD_CStreamOutSize();
void* const buffOut = malloc_orDie(buffOutSize);
/* Create the context. */
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
#if defined(ZSTD_STATIC_LINKING_ONLY)
size_t r = ZSTD_CCtx_refThreadPool(cctx, args->pool);
CHECK(r == 0, "ZSTD_CCtx_refThreadPool failed!");
#endif
/* Set any parameters you want.
* Here we set the compression level, and enable the checksum.
*/
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, args->cLevel) );
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 16);
/* This loop read from the input file, compresses that entire chunk,
* and writes all output produced to the output file.
*/
size_t const toRead = buffInSize;
for (;;) {
size_t read = fread_orDie(buffIn, toRead, fin);
/* Select the flush mode.
* If the read may not be finished (read == toRead) we use
* ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end.
* Zstd optimizes the case where the first flush mode is ZSTD_e_end,
* since it knows it is compressing the entire source in one pass.
*/
int const lastChunk = (read < toRead);
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
/* Set the input buffer to what we just read.
* We compress until the input buffer is empty, each time flushing the
* output.
*/
ZSTD_inBuffer input = { buffIn, read, 0 };
int finished;
do {
/* Compress into the output buffer and write all of the output to
* the file so we can reuse the buffer next iteration.
*/
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode);
CHECK_ZSTD(remaining);
fwrite_orDie(buffOut, output.pos, fout);
/* If we're on the last chunk we're finished when zstd returns 0,
* which means its consumed all the input AND finished the frame.
* Otherwise, we're finished when we've consumed all the input.
*/
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
} while (!finished);
CHECK(input.pos == input.size,
"Impossible: zstd only returns 0 when the input is completely consumed!");
if (lastChunk) {
break;
}
}
fprintf (stderr, "Finishing compression of %s\n", args->outName);
ZSTD_freeCCtx(cctx);
fclose_orDie(fout);
fclose_orDie(fin);
free(buffIn);
free(buffOut);
free(args->outName);
return NULL;
}
static char* createOutFilename_orDie(const char* filename)
{
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void* const outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (char*)outSpace;
}
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
if (argc<=3) {
printf("wrong arguments\n");
printf("usage:\n");
printf("%s POOL_SIZE LEVEL FILES\n", exeName);
return 1;
}
int pool_size = atoi (argv[1]);
CHECK(pool_size != 0, "can't parse POOL_SIZE!");
int level = atoi (argv[2]);
CHECK(level != 0, "can't parse LEVEL!");
argc -= 3;
argv += 3;
#if defined(ZSTD_STATIC_LINKING_ONLY)
ZSTD_threadPool *pool = ZSTD_createThreadPool (pool_size);
CHECK(pool != NULL, "ZSTD_createThreadPool() failed!");
fprintf (stderr, "Using shared thread pool of size %d\n", pool_size);
#else
fprintf (stderr, "All threads use its own thread pool\n");
#endif
pthread_t *threads = malloc_orDie(argc * sizeof(pthread_t));
compress_args_t *args = malloc_orDie(argc * sizeof(compress_args_t));
for (unsigned i = 0; i < argc; i++)
{
args[i].fname = argv[i];
args[i].outName = createOutFilename_orDie(args[i].fname);
args[i].cLevel = level;
#if defined(ZSTD_STATIC_LINKING_ONLY)
args[i].pool = pool;
#endif
pthread_create (&threads[i], NULL, compressFile_orDie, &args[i]);
}
for (unsigned i = 0; i < argc; i++)
pthread_join (threads[i], NULL);
#if defined(ZSTD_STATIC_LINKING_ONLY)
ZSTD_freeThreadPool (pool);
#endif
return 0;
}

View File

@ -45,7 +45,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) {
assert(0); /* Unreachable */
}
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
return POOL_create (numThreads, 0);
}
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
@ -184,7 +188,9 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_customFree(ctx, ctx->customMem);
}
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
POOL_free (pool);
}
size_t POOL_sizeof(POOL_ctx *ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */

View File

@ -19,7 +19,7 @@
/*-*************************************
* Dependencies
***************************************/
#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#include "compiler.h"
@ -242,7 +242,7 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) {
#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
#else
ZSTD_memcpy(dst, src, 8);
@ -251,7 +251,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) {
#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
#else
ZSTD_memcpy(dst, src, 16);

View File

@ -64,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
struct ZSTD_CDict_s {
const void* dictContent;
size_t dictContentSize;
ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
ZSTD_cwksp workspace;
ZSTD_matchState_t matchState;
@ -877,7 +878,6 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
int const compressionLevel,
unsigned long long srcSizeHint,
size_t const dictSize);
static int ZSTD_dedicatedDictSearch_isSupported(
const ZSTD_compressionParameters* cParams);
@ -971,6 +971,14 @@ size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
return 0;
}
size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't ref a pool when ctx not in init stage.");
cctx->pool = pool;
return 0;
}
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
{
return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
@ -1055,24 +1063,73 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
return hashLog - btScale;
}
/** ZSTD_dictAndWindowLog() :
* Returns an adjusted window log that is large enough to fit the source and the dictionary.
* The zstd format says that the entire dictionary is valid if one byte of the dictionary
* is within the window. So the hashLog and chainLog should be large enough to reference both
* the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
* the hashLog and windowLog.
* NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
*/
static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
{
const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
/* No dictionary ==> No change */
if (dictSize == 0) {
return windowLog;
}
assert(windowLog <= ZSTD_WINDOWLOG_MAX);
assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
{
U64 const windowSize = 1ULL << windowLog;
U64 const dictAndWindowSize = dictSize + windowSize;
/* If the window size is already large enough to fit both the source and the dictionary
* then just use the window size. Otherwise adjust so that it fits the dictionary and
* the window.
*/
if (windowSize >= dictSize + srcSize) {
return windowLog; /* Window size large enough already */
} else if (dictAndWindowSize >= maxWindowSize) {
return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
} else {
return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
}
}
}
/** ZSTD_adjustCParams_internal() :
* optimize `cPar` for a specified input (`srcSize` and `dictSize`).
* mostly downsize to reduce memory consumption and initialization latency.
* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
* `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
* note : `srcSize==0` means 0!
* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
unsigned long long srcSize,
size_t dictSize)
size_t dictSize,
ZSTD_cParamMode_e mode)
{
static const U64 minSrcSize = 513; /* (1<<9) + 1 */
static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
const U64 minSrcSize = 513; /* (1<<9) + 1 */
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0);
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
srcSize = minSrcSize;
switch (mode) {
case ZSTD_cpm_noAttachDict:
case ZSTD_cpm_unknown:
case ZSTD_cpm_createCDict:
break;
case ZSTD_cpm_attachDict:
dictSize = 0;
break;
default:
assert(0);
break;
}
/* resize windowLog if input is small enough, to use less memory */
if ( (srcSize < maxWindowResize)
&& (dictSize < maxWindowResize) ) {
@ -1082,10 +1139,11 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
ZSTD_highbit32(tSize-1) + 1;
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
}
if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
{ U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
if (cycleLog > cPar.windowLog)
cPar.chainLog -= (cycleLog - cPar.windowLog);
{ U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
if (cycleLog > dictAndWindowLog)
cPar.chainLog -= (cycleLog - dictAndWindowLog);
}
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
@ -1101,11 +1159,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
{
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
}
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
static void ZSTD_overrideCParams(
ZSTD_compressionParameters* cParams,
@ -1121,18 +1179,18 @@ static void ZSTD_overrideCParams(
}
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
{
ZSTD_compressionParameters cParams;
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
srcSizeHint = CCtxParams->srcSizeHint;
}
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
assert(!ZSTD_checkCParams(cParams));
/* srcSizeHint == 0 means 0 */
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
}
static size_t
@ -1210,7 +1268,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
/* estimateCCtxSize is for one-shot compression. So no buffers should
@ -1228,7 +1286,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
return ZSTD_estimateCCtxSize_usingCParams(cParams);
}
@ -1247,7 +1305,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
{ ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
@ -1266,7 +1324,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
return ZSTD_estimateCStreamSize_usingCParams(cParams);
}
@ -1691,7 +1749,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
}
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 0);
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
cdict->dictContentSize, ZSTD_cpm_attachDict);
params.cParams.windowLog = windowLog;
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_makeClean, zbuff), "");
@ -2354,7 +2413,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
src, srcSize);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) {
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
ldmSeqStore.seq = zc->ldmSequences;
ldmSeqStore.capacity = zc->maxNbLdmSequences;
@ -2371,6 +2430,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
@ -3180,7 +3240,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, dictContentType, dtlm,
cdict->dictContentSize, cdict->dictContentType, dtlm,
cctx->entropyWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
@ -3227,7 +3287,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
@ -3361,7 +3421,7 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
assert(params.fParams.contentSizeFlag == 1);
@ -3416,7 +3476,7 @@ size_t ZSTD_estimateCDictSize_advanced(
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
}
@ -3452,6 +3512,7 @@ static size_t ZSTD_initCDict_internal(
ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
}
cdict->dictContentSize = dictSize;
cdict->dictContentType = dictContentType;
cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
@ -3549,18 +3610,18 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
if (cctxParams.enableDedicatedDictSearch) {
cParams = ZSTD_dedicatedDictSearch_getCParams(
cctxParams.compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
cctxParams.compressionLevel, dictSize);
ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
} else {
cParams = ZSTD_getCParamsFromCCtxParams(
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
}
if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
/* Fall back to non-DDSS params */
cctxParams.enableDedicatedDictSearch = 0;
cParams = ZSTD_getCParamsFromCCtxParams(
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
}
cctxParams.cParams = cParams;
@ -3582,21 +3643,24 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byCopy, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
if (cdict)
cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
return cdict;
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
if (cdict)
cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
return cdict;
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@ -3780,32 +3844,12 @@ size_t ZSTD_CStreamOutSize(void)
return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
}
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
const ZSTD_CDict* const cdict,
ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_resetCStream_internal");
/* Finalize the compression parameters */
params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, dictContentType, ZSTD_dtlm_fast,
cdict,
&params, pledgedSrcSize,
ZSTDb_buffered) , "");
cctx->inToCompress = 0;
cctx->inBuffPos = 0;
cctx->inBuffTarget = cctx->blockSize
+ (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
cctx->streamStage = zcss_load;
cctx->frameEnded = 0;
return 0; /* ready to go */
if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
return ZSTD_cpm_attachDict;
else
return ZSTD_cpm_noAttachDict;
}
/* ZSTD_resetCStream():
@ -4120,10 +4164,19 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
if (cctx->cdict)
params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
params.cParams = ZSTD_getCParamsFromCCtxParams(
&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
{
size_t const dictSize = prefixDict.dict
? prefixDict.dictSize
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
params.cParams = ZSTD_getCParamsFromCCtxParams(
&params, cctx->pledgedSrcSizePlusOne-1,
dictSize, mode);
}
#ifdef ZSTD_MULTITHREAD
@ -4135,7 +4188,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
if (cctx->mtctx == NULL) {
DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
params.nbWorkers);
cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
}
/* mt compression */
@ -4148,12 +4201,21 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
cctx->appliedParams.nbWorkers = params.nbWorkers;
} else
#endif
{ FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
cctx->cdict,
params, cctx->pledgedSrcSizePlusOne-1) , "");
assert(cctx->streamStage == zcss_load);
{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
cctx->cdict,
&params, pledgedSrcSize,
ZSTDb_buffered) , "");
assert(cctx->appliedParams.nbWorkers == 0);
cctx->inToCompress = 0;
cctx->inBuffPos = 0;
/* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
cctx->streamStage = zcss_load;
cctx->frameEnded = 0;
} }
/* end of transparent initialization stage */
@ -4365,9 +4427,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
};
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize)
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
switch (cParams.strategy) {
case ZSTD_fast:
case ZSTD_dfast:
@ -4416,15 +4478,34 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
}
}
static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
{
switch (mode) {
case ZSTD_cpm_unknown:
case ZSTD_cpm_noAttachDict:
case ZSTD_cpm_createCDict:
break;
case ZSTD_cpm_attachDict:
dictSize = 0;
break;
default:
assert(0);
break;
}
{ int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
}
}
/*! ZSTD_getCParams_internal() :
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
* Use dictSize == 0 for unknown or unused. */
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
* Use dictSize == 0 for unknown or unused.
* Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
{
int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
int row = compressionLevel;
DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
@ -4434,7 +4515,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
/* refine parameters based on srcSize & dictSize */
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
}
}
@ -4444,16 +4525,16 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
{
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
}
/*! ZSTD_getParams() :
* same idea as ZSTD_getCParams()
* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
* Fields of `ZSTD_frameParameters` are set to default values */
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
ZSTD_parameters params;
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
ZSTD_memset(&params, 0, sizeof(params));
params.cParams = cParams;
@ -4467,5 +4548,5 @@ static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned lo
* Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
}

View File

@ -82,10 +82,27 @@ typedef struct {
} ZSTD_entropyCTables_t;
typedef struct {
U32 off;
U32 len;
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
U32 len; /* Raw length of match */
} ZSTD_match_t;
typedef struct {
U32 offset; /* Offset of sequence */
U32 litLength; /* Length of literals prior to match */
U32 matchLength; /* Raw length of match */
} rawSeq;
typedef struct {
rawSeq* seq; /* The start of the sequences */
size_t pos; /* The index in seq where reading stopped. pos <= size. */
size_t posInSequence; /* The position within the sequence at seq[pos] where reading
stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
size_t size; /* The number of sequences. <= capacity. */
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
int price;
U32 off;
@ -152,6 +169,7 @@ struct ZSTD_matchState_t {
optState_t opt; /* optimal parser state */
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore;
};
typedef struct {
@ -183,19 +201,6 @@ typedef struct {
U32 windowLog; /* Window log for the LDM */
} ldmParams_t;
typedef struct {
U32 offset;
U32 litLength;
U32 matchLength;
} rawSeq;
typedef struct {
rawSeq* seq; /* The start of the sequences */
size_t pos; /* The position where reading stopped. <= size. */
size_t size; /* The number of sequences. <= capacity. */
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
typedef struct {
int collectSequences;
ZSTD_Sequence* seqStart;
@ -255,6 +260,7 @@ struct ZSTD_CCtx_s {
unsigned long long producedCSize;
XXH64_state_t xxhState;
ZSTD_customMem customMem;
ZSTD_threadPool* pool;
size_t staticSize;
SeqCollector seqCollector;
int isFirstBlock;
@ -301,6 +307,25 @@ typedef enum {
ZSTD_dedicatedDictSearch = 3
} ZSTD_dictMode_e;
typedef enum {
ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
* In this mode we use both the srcSize and the dictSize
* when selecting and adjusting parameters.
*/
ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
* In this mode we only take the srcSize into account when selecting
* and adjusting parameters.
*/
ZSTD_cpm_createCDict = 2, /* Creating a CDict.
* In this mode we take both the source size and the dictionary size
* into account when selecting and adjusting the parameters.
*/
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
* We don't know what these parameters are for. We default to the legacy
* behavior of taking both the source size and the dict size into account
* when selecting and adjusting parameters.
*/
} ZSTD_cParamMode_e;
typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -1089,7 +1114,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
* Note: srcSizeHint == 0 means 0!
*/
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
/*! ZSTD_initCStream_internal() :
* Private use only. Init streaming operation.

View File

@ -27,13 +27,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
if (cParams->strategy >= ZSTD_btopt) {
/* Get out of the way of the optimal parser */
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
params->minMatchLength = minMatch;
}
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
@ -562,6 +555,26 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
return sequence;
}
/* ZSTD_ldm_skipRawSeqStoreBytes():
* Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
*/
static void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
if (currPos >= currSeq.litLength + currSeq.matchLength) {
currPos -= currSeq.litLength + currSeq.matchLength;
rawSeqStore->pos++;
} else {
rawSeqStore->posInSequence = currPos;
break;
}
}
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
rawSeqStore->posInSequence = 0;
}
}
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
@ -577,6 +590,15 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
BYTE const* ip = istart;
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
/* If using opt parser, use LDMs only as candidates rather than always accepting them */
if (cParams->strategy >= ZSTD_btopt) {
size_t lastLLSize;
ms->ldmSeqStore = rawSeqStore;
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
return lastLLSize;
}
assert(rawSeqStore->pos <= rawSeqStore->size);
assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */

View File

@ -764,6 +764,140 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
}
}
/*************************
* LDM helper functions *
*************************/
/* Struct containing info needed to make decision about ldm inclusion */
typedef struct {
rawSeqStore_t seqStore; /* External match candidates store for this block */
U32 startPosInBlock; /* Start position of the current match candidate */
U32 endPosInBlock; /* End position of the current match candidate */
U32 offset; /* Offset of the match candidate */
} ZSTD_optLdm_t;
/* ZSTD_optLdm_skipRawSeqStoreBytes():
* Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
*/
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
if (currPos >= currSeq.litLength + currSeq.matchLength) {
currPos -= currSeq.litLength + currSeq.matchLength;
rawSeqStore->pos++;
} else {
rawSeqStore->posInSequence = currPos;
break;
}
}
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
rawSeqStore->posInSequence = 0;
}
}
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
* Calculates the beginning and end of the next match in the current block.
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
*/
static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
U32 blockBytesRemaining) {
rawSeq currSeq;
U32 currBlockEndPos;
U32 literalsBytesRemaining;
U32 matchBytesRemaining;
/* Setting match end position to MAX to ensure we never use an LDM during this block */
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
optLdm->startPosInBlock = UINT_MAX;
optLdm->endPosInBlock = UINT_MAX;
return;
}
/* Calculate appropriate bytes left in matchLength and litLength after adjusting
based on ldmSeqStore->posInSequence */
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
currBlockEndPos = currPosInBlock + blockBytesRemaining;
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
0;
matchBytesRemaining = (literalsBytesRemaining == 0) ?
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
currSeq.matchLength;
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
if (literalsBytesRemaining >= blockBytesRemaining) {
optLdm->startPosInBlock = UINT_MAX;
optLdm->endPosInBlock = UINT_MAX;
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
return;
}
/* Matches may be < MINMATCH by this process. In that case, we will reject them
when we are deciding whether or not to add the ldm */
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
optLdm->offset = currSeq.offset;
if (optLdm->endPosInBlock > currBlockEndPos) {
/* Match ends after the block ends, we can't use the whole match */
optLdm->endPosInBlock = currBlockEndPos;
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
} else {
/* Consume nb of bytes equal to size of sequence left */
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
}
}
/* ZSTD_optLdm_maybeAddMatch():
* Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
* and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
*/
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
/* Ensure that current block position is not outside of the match */
if (currPosInBlock < optLdm->startPosInBlock
|| currPosInBlock >= optLdm->endPosInBlock
|| candidateMatchLength < MINMATCH) {
return;
}
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
candidateOffCode, candidateMatchLength, currPosInBlock);
matches[*nbMatches].len = candidateMatchLength;
matches[*nbMatches].off = candidateOffCode;
(*nbMatches)++;
}
}
/* ZSTD_optLdm_processMatchCandidate():
* Wrapper function to update ldm seq store and call ldm functions as necessary.
*/
static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
U32 currPosInBlock, U32 remainingBytes) {
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
return;
}
if (currPosInBlock >= optLdm->endPosInBlock) {
if (currPosInBlock > optLdm->endPosInBlock) {
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
* at the end of a match from the ldm seq store, and will often be some bytes
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
*/
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
}
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
}
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
}
/*-*******************************
* Optimal parser
@ -817,6 +951,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm;
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@ -832,7 +971,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
(U32)(ip-istart), (U32)(iend - ip));
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
@ -945,8 +1086,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
U32 const previousPrice = opt[cur].price;
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 matchNb;
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
(U32)(inr-istart), (U32)(iend-inr));
if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
continue;

View File

@ -266,8 +266,6 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
/* ===== Seq Pool Wrapper ====== */
static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
@ -277,7 +275,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
static rawSeqStore_t bufferToSeq(buffer_t buffer)
{
rawSeqStore_t seq = {NULL, 0, 0, 0};
rawSeqStore_t seq = kNullRawSeqStore;
seq.seq = (rawSeq*)buffer.start;
seq.capacity = buffer.capacity / sizeof(rawSeq);
return seq;
@ -831,6 +829,7 @@ struct ZSTDMT_CCtx_s {
ZSTD_customMem cMem;
ZSTD_CDict* cdictLocal;
const ZSTD_CDict* cdict;
unsigned providedFactory: 1;
};
static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
@ -889,7 +888,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
}
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
{
ZSTDMT_CCtx* mtctx;
U32 nbJobs = nbWorkers + 2;
@ -907,7 +906,14 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
mtctx->cMem = cMem;
mtctx->allJobsCompleted = 1;
mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
if (pool != NULL) {
mtctx->factory = pool;
mtctx->providedFactory = 1;
}
else {
mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
mtctx->providedFactory = 0;
}
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
mtctx->jobIDMask = nbJobs - 1;
@ -924,20 +930,21 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
return mtctx;
}
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
{
#ifdef ZSTD_MULTITHREAD
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
#else
(void)nbWorkers;
(void)cMem;
(void)pool;
return NULL;
#endif
}
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
{
return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem, NULL);
}
@ -983,7 +990,8 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
{
if (mtctx==NULL) return 0; /* compatible with free on NULL */
POOL_free(mtctx->factory); /* stop and free worker threads */
if (!mtctx->providedFactory)
POOL_free(mtctx->factory); /* stop and free worker threads */
ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
ZSTDMT_freeBufferPool(mtctx->bufPool);
@ -1097,7 +1105,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
compressionLevel);
mtctx->params.compressionLevel = compressionLevel;
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
cParams.windowLog = saved_wlog;
mtctx->params.cParams = cParams;
}

View File

@ -60,7 +60,8 @@ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
ZSTD_customMem cMem);
ZSTD_customMem cMem,
ZSTD_threadPool *pool);
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);

View File

@ -1280,7 +1280,11 @@ static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, si
* Basic Types
*********************************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -89,7 +89,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -90,7 +90,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -52,7 +52,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -80,7 +80,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -82,7 +82,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -242,7 +242,11 @@ extern "C" {
* Basic Types
*****************************************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;

View File

@ -1409,6 +1409,21 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
ZSTD_compressionParameters cParams,
ZSTD_customMem customMem);
/* ! Thread pool :
* These prototypes make it possible to share a thread pool among multiple compression contexts.
* This can limit resources for applications with multiple threads where each one uses
* a threaded compression mode (via ZSTD_c_nbWorkers parameter).
* ZSTD_createThreadPool creates a new thread pool with a given number of threads.
* Note that the lifetime of such pool must exist while being used.
* ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
* to use an internal thread pool).
* ZSTD_freeThreadPool frees a thread pool.
*/
typedef struct POOL_ctx_s ZSTD_threadPool;
ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
/**
* This API is temporary and is expected to change or disappear in the future!
*/
@ -1424,8 +1439,6 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
ZSTD_dictContentType_e dictContentType,
ZSTD_customMem customMem);
/***************************************
* Advanced compression functions
***************************************/

View File

@ -331,6 +331,7 @@ struct FIO_ctx_s {
/* file i/o info */
int nbFilesTotal;
int hasStdinInput;
int hasStdoutOutput;
/* file i/o state */
int currFileIdx;
@ -388,6 +389,7 @@ FIO_ctx_t* FIO_createContext(void)
ret->currFileIdx = 0;
ret->hasStdinInput = 0;
ret->hasStdoutOutput = 0;
ret->nbFilesTotal = 1;
ret->nbFilesProcessed = 0;
ret->totalBytesInput = 0;
@ -536,6 +538,10 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
/* FIO_ctx_t functions */
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
fCtx->hasStdoutOutput = value;
}
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
{
fCtx->nbFilesTotal = value;
@ -841,6 +847,7 @@ static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
* If -q is specified with --rm, zstd will abort pre-emptively
* If neither flag is specified, zstd will prompt the user for confirmation to proceed.
* If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q).
* However, if the output is stdout, we will always abort rather than displaying the warning prompt.
*/
static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff)
{
@ -859,7 +866,12 @@ static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t*
}
DISPLAYLEVEL(2, "\nThe concatenated output CANNOT regenerate the original directory tree. ")
if (prefs->removeSrcFile) {
error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
if (fCtx->hasStdoutOutput) {
DISPLAYLEVEL(1, "\nAborting. Use -f if you really want to delete the files and output to stdout");
error = 1;
} else {
error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
}
}
}
DISPLAY("\n");
@ -1532,20 +1544,20 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
fCtx->totalBytesInput += (size_t)readsize;
fCtx->totalBytesOutput += (size_t)compressedfilesize;
DISPLAYLEVEL(2, "\r%79s\r", "");
if (g_display_prefs.displayLevel >= 2) {
if (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1) {
if (readsize == 0) {
DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n",
srcFileName,
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
dstFileName);
} else {
DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n",
srcFileName,
(double)compressedfilesize / readsize * 100,
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
dstFileName);
}
if (g_display_prefs.displayLevel >= 2 &&
!fCtx->hasStdoutOutput &&
(g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
if (readsize == 0) {
DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n",
srcFileName,
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
dstFileName);
} else {
DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n",
srcFileName,
(double)compressedfilesize / readsize * 100,
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
dstFileName);
}
}
@ -2112,19 +2124,21 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
/* Write block */
storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
frameSize += outBuff.pos;
if (fCtx->nbFilesTotal > 1) {
size_t srcFileNameSize = strlen(srcFileName);
if (srcFileNameSize > 18) {
const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
if (!fCtx->hasStdoutOutput) {
if (fCtx->nbFilesTotal > 1) {
size_t srcFileNameSize = strlen(srcFileName);
if (srcFileNameSize > 18) {
const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
} else {
DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
}
} else {
DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB... ",
fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ",
srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
}
} else {
DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ",
srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
}
if (inBuff.pos > 0) {

View File

@ -107,6 +107,7 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
/* FIO_ctx_t functions */
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value);
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value);
void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames);
/*-*************************************

View File

@ -28,7 +28,11 @@ extern "C" {
******************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint64_t PTime; /* Precise Time */
#else
typedef unsigned long long PTime; /* does not support compilers without long long support */

View File

@ -697,6 +697,7 @@ int main(int const argCount, const char* argv[])
int argNb,
followLinks = 0,
forceStdout = 0,
hasStdout = 0,
ldmFlag = 0,
main_pause = 0,
nbWorkers = 0,
@ -1282,9 +1283,12 @@ int main(int const argCount, const char* argv[])
}
/* No status message in pipe mode (stdin - stdout) */
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
hasStdout = outFileName && !strcmp(outFileName,stdoutmark);
if (hasStdout && (g_displayLevel==2)) g_displayLevel=1;
/* IO Stream/File */
FIO_setHasStdoutOutput(fCtx, hasStdout);
FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);
FIO_determineHasStdinInput(fCtx, filenames);
FIO_setNotificationLevel(g_displayLevel);

View File

@ -232,23 +232,40 @@ local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize,
r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
ZSTD_freeCCtx(cctx);
return r;
}
static size_t
local_ZSTD_compress_generic_end(const void* src, size_t srcSize,
void* dst, size_t dstCapacity,
void* payload)
local_ZSTD_compress2(const void* src, size_t srcSize,
void* dst, size_t dstCapacity,
void* payload)
{
(void)payload;
return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
}
static size_t
local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
void* dst, size_t dstCapacity,
void* payload)
local_ZSTD_compressStream2_end(const void* src, size_t srcSize,
void* dst, size_t dstCapacity,
void* payload)
{
ZSTD_outBuffer buffOut;
ZSTD_inBuffer buffIn;
(void)payload;
buffOut.dst = dst;
buffOut.size = dstCapacity;
buffOut.pos = 0;
buffIn.src = src;
buffIn.size = srcSize;
buffIn.pos = 0;
ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end);
return buffOut.pos;
}
static size_t
local_ZSTD_compressStream2_continue(const void* src, size_t srcSize,
void* dst, size_t dstCapacity,
void* payload)
{
ZSTD_outBuffer buffOut;
ZSTD_inBuffer buffIn;
@ -437,11 +454,17 @@ static int benchMem(unsigned benchNb,
case 43:
benchFunction = local_ZSTD_compressStream_freshCCtx; benchName = "compressStream_freshCCtx";
break;
case 50:
benchFunction = local_ZSTD_compress2; benchName = "compress2";
break;
case 51:
benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue";
benchFunction = local_ZSTD_compressStream2_end; benchName = "compressStream2, end";
break;
case 52:
benchFunction = local_ZSTD_compress_generic_end; benchName = "compress_generic, end";
benchFunction = local_ZSTD_compressStream2_end; benchName = "compressStream2, end & short";
break;
case 53:
benchFunction = local_ZSTD_compressStream2_continue; benchName = "compressStream2, continue";
break;
case 61:
benchFunction = local_ZSTD_compress_generic_T2_continue; benchName = "compress_generic, -T2, continue";
@ -573,6 +596,11 @@ static int benchMem(unsigned benchNb,
payload = &cparams;
break;
case 52 :
/* compressStream2, short dstCapacity */
dstBuffSize--;
break;
/* test functions */
/* convention: test functions have ID > 100 */

View File

@ -372,6 +372,19 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "%u (OK) \n", vn);
}
DISPLAYLEVEL(3, "test%3u : ZSTD_adjustCParams : ", testNb++);
{
ZSTD_compressionParameters params;
memset(&params, 0, sizeof(params));
params.windowLog = 10;
params.hashLog = 19;
params.chainLog = 19;
params = ZSTD_adjustCParams(params, 1000, 100000);
if (params.hashLog != 18) goto _output_error;
if (params.chainLog != 17) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
if (cctx==NULL) goto _output_error;
@ -780,6 +793,44 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : testing ldm no regressions in size for opt parser : ", testNb++);
{
size_t cSizeLdm;
size_t cSizeNoLdm;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, seed);
/* Enable checksum to verify round trip. */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
/* Round trip once with ldm. */
cSizeLdm = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSizeLdm);
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSizeLdm));
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 0));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
/* Round trip once without ldm. */
cSizeNoLdm = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSizeNoLdm);
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSizeNoLdm));
if (cSizeLdm > cSizeNoLdm) {
DISPLAY("Using long mode should not cause regressions for btopt+\n");
testResult = 1;
goto _end;
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
/* Note: this test takes 0.5 seconds to run */
DISPLAYLEVEL(3, "test%3i : testing refPrefx vs refPrefx + ldm (size comparison) : ", testNb++);
{
@ -1971,34 +2022,53 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
/* Either operation is allowed to fail, but one must fail. */
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto);
if (!ZSTD_isError(ret)) {
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (!ZSTD_isError(ret)) goto _output_error;
}
}
DISPLAYLEVEL(3, "OK \n");
{ char* rawDictBuffer = (char*)malloc(dictSize);
assert(rawDictBuffer);
memcpy(rawDictBuffer, (char*)dictBuffer + 2, dictSize - 2);
memset(rawDictBuffer + dictSize - 2, 0, 2);
MEM_writeLE32((char*)rawDictBuffer, ZSTD_MAGIC_DICTIONARY);
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
if (ZSTD_isError(ret)) goto _output_error;
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (ZSTD_isError(ret)) goto _output_error;
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
{
size_t ret;
/* Either operation is allowed to fail, but one must fail. */
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
if (!ZSTD_isError(ret)) {
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (!ZSTD_isError(ret)) goto _output_error;
}
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
{
size_t ret;
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
if (ZSTD_isError(ret)) goto _output_error;
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (ZSTD_isError(ret)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Testing non-attached CDict with ZSTD_dct_rawContent : ", testNb++);
{ size_t const srcSize = MIN(CNBuffSize, 100);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
/* Force the dictionary to be reloaded in raw content mode */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceLoad));
CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize);
CHECK_Z(cSize);
}
DISPLAYLEVEL(3, "OK \n");
free(rawDictBuffer);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );

View File

@ -396,9 +396,8 @@ cp tmp_rm3.zst tmp_rm4.zst
echo 'Y' | zstd -d tmp_rm3.zst tmp_rm4.zst -o tmp_rm_out --rm
test ! -f tmp_rm3.zst
test ! -f tmp_rm4.zst
echo 'yes' | zstd tmp_rm_out tmp_rm3 -c --rm > $INTOVOID
test ! -f tmp_rm_out
test ! -f tmp_rm3
echo 'yes' | zstd tmp_rm_out tmp_rm3 -c --rm && die "compressing multiple files to stdout with --rm should fail unless -f is specified"
echo 'yes' | zstd tmp_rm_out tmp_rm3 -c --rm -v && die "compressing multiple files to stdout with --rm should fail unless -f is specified"
println gooder > tmpexists1
zstd tmpexists1 tmpexists -c --rm -f > $INTOVOID
@ -1207,6 +1206,7 @@ roundTripTest -g1000K "1 --single-thread --long"
roundTripTest -g517K "6 --single-thread --long"
roundTripTest -g516K "16 --single-thread --long"
roundTripTest -g518K "19 --single-thread --long"
roundTripTest -g2M "22 --single-thread --ultra --long"
fileRoundTripTest -g5M "3 --single-thread --long"
@ -1216,6 +1216,7 @@ then
println "\n===> zstdmt round-trip tests "
roundTripTest -g4M "1 -T0"
roundTripTest -g8M "3 -T2"
roundTripTest -g8M "19 -T0 --long"
roundTripTest -g8000K "2 --threads=2"
fileRoundTripTest -g4M "19 -T2 -B1M"
@ -1334,6 +1335,28 @@ roundTripTest -g1M -P50 "1 --single-thread --long=29" " --long=28 --memory=512MB
roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB"
println "\n===> zstd long distance matching with optimal parser compressed size tests "
optCSize16=$(datagen -g511K | zstd -16 -c | wc -c)
longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c)
optCSize19=$(datagen -g2M | zstd -19 -c | wc -c)
longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c)
optCSize19wlog23=$(datagen -g2M | zstd -19 -c --zstd=wlog=23 | wc -c)
longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c)
optCSize22=$(datagen -g900K | zstd -22 --ultra -c | wc -c)
longCSize22=$(datagen -g900K | zstd -22 --ultra --long -c | wc -c)
if [ "$longCSize16" -gt "$optCSize16" ]; then
echo using --long on compression level 16 should not cause compressed size regression
exit 1
elif [ "$longCSize19" -gt "$optCSize19" ]; then
echo using --long on compression level 19 should not cause compressed size regression
exit 1
elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
echo using --long on compression level 19 with wLog=23 should not cause compressed size regression
exit 1
elif [ "$longCSize22" -gt "$optCSize22" ]; then
echo using --long on compression level 22 should not cause compressed size regression
exit 1
fi
if [ "$1" != "--test-large-data" ]; then