zstd/lib/compress/zstd_compress.c

2964 lines
124 KiB
C
Raw Normal View History

/*
2016-08-30 17:04:33 +00:00
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
2016-08-30 17:04:33 +00:00
*/
2015-10-22 14:31:46 +00:00
2017-05-12 22:31:53 +00:00
/*-*************************************
* Tuning parameters
***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3
2017-05-12 22:31:53 +00:00
#endif
2016-02-11 23:07:30 +00:00
/*-*************************************
2016-02-03 01:46:46 +00:00
* Dependencies
2015-10-22 14:31:46 +00:00
***************************************/
#include <string.h> /* memset */
2015-11-11 20:38:21 +00:00
#include "mem.h"
2016-08-11 23:20:36 +00:00
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
2017-09-02 01:28:35 +00:00
#include "zstd_compress.h"
#include "zstd_fast.h"
#include "zstd_double_fast.h"
#include "zstd_lazy.h"
#include "zstd_opt.h"
2017-09-07 00:56:01 +00:00
#include "zstd_ldm.h"
2015-10-22 14:31:46 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2016-01-23 18:28:41 +00:00
* Helper functions
***************************************/
size_t ZSTD_compressBound(size_t srcSize) {
size_t const lowLimit = 256 KB;
size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */
return srcSize + (srcSize >> 8) + margin;
}
2016-01-23 18:28:41 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Sequence storage
***************************************/
static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
ssPtr->lit = ssPtr->litStart;
2016-07-29 22:55:13 +00:00
ssPtr->sequences = ssPtr->sequencesStart;
2016-04-07 15:19:00 +00:00
ssPtr->longLengthID = 0;
2015-11-11 20:38:21 +00:00
}
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Context memory management
2015-10-22 14:31:46 +00:00
***************************************/
struct ZSTD_CDict_s {
void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
ZSTD_CCtx* refContext;
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
2015-10-22 14:31:46 +00:00
{
return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
2016-05-23 13:49:09 +00:00
}
ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
{
2016-07-14 14:52:45 +00:00
ZSTD_CCtx* cctx;
2016-05-23 14:24:52 +00:00
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2016-05-23 13:49:09 +00:00
cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
2016-07-14 14:52:45 +00:00
if (!cctx) return NULL;
cctx->customMem = customMem;
2017-08-18 18:20:08 +00:00
cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
ZSTD_STATIC_ASSERT(zcss_init==0);
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
2016-07-14 14:52:45 +00:00
return cctx;
2015-10-22 14:31:46 +00:00
}
ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
{
ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
cctx->staticSize = workspaceSize;
cctx->workSpace = (void*)(cctx+1);
cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
/* entropy space (never moves) */
if (cctx->workSpaceSize < sizeof(ZSTD_entropyCTables_t)) return NULL;
assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
cctx->entropy = (ZSTD_entropyCTables_t*)cctx->workSpace;
2016-07-14 14:52:45 +00:00
return cctx;
2015-10-22 14:31:46 +00:00
}
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
2015-10-25 13:06:35 +00:00
{
if (cctx==NULL) return 0; /* support free on NULL */
if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
ZSTD_free(cctx->workSpace, cctx->customMem);
cctx->workSpace = NULL;
ZSTD_freeCDict(cctx->cdictLocal);
cctx->cdictLocal = NULL;
#ifdef ZSTD_MULTITHREAD
ZSTDMT_freeCCtx(cctx->mtctx);
cctx->mtctx = NULL;
#endif
ZSTD_free(cctx, cctx->customMem);
return 0; /* reserved as a potential error code in the future */
2015-10-25 13:06:35 +00:00
}
2015-10-22 14:31:46 +00:00
static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
{
#ifdef ZSTD_MULTITHREAD
return ZSTDMT_sizeof_CCtx(cctx->mtctx);
#else
(void) cctx;
return 0;
#endif
}
2016-08-22 23:18:06 +00:00
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
2016-07-11 01:12:17 +00:00
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(3, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
DEBUGLOG(3, "workSpaceSize (including streaming buffers): %u", (U32)cctx->workSpaceSize);
DEBUGLOG(3, "inner cdict : %u", (U32)ZSTD_sizeof_CDict(cctx->cdictLocal));
DEBUGLOG(3, "inner MTCTX : %u", (U32)ZSTD_sizeof_mtctx(cctx));
return sizeof(*cctx) + cctx->workSpaceSize
+ ZSTD_sizeof_CDict(cctx->cdictLocal)
+ ZSTD_sizeof_mtctx(cctx);
2016-07-11 01:12:17 +00:00
}
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
{
return ZSTD_sizeof_CCtx(zcs); /* same object */
}
2017-05-12 22:31:53 +00:00
/* private API call, for dictBuilder only */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
2017-08-18 02:30:22 +00:00
#define ZSTD_CLEVEL_CUSTOM 999
2017-05-12 22:31:53 +00:00
2017-08-29 02:25:17 +00:00
static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
ZSTD_CCtx_params params, U64 srcSizeHint, size_t dictSize)
{
2017-08-29 02:25:17 +00:00
return (params.compressionLevel == ZSTD_CLEVEL_CUSTOM ?
params.cParams :
ZSTD_getCParams(params.compressionLevel, srcSizeHint, dictSize));
}
2017-08-23 22:47:15 +00:00
static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* params, U64 srcSize)
{
2017-08-29 02:25:17 +00:00
params->cParams = ZSTD_getCParamsFromCCtxParams(*params, srcSize, 0);
params->compressionLevel = ZSTD_CLEVEL_CUSTOM;
}
static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
{
ZSTD_cLevelToCCtxParams_srcSize(
&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1);
}
static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* params)
{
ZSTD_cLevelToCCtxParams_srcSize(params, 0);
2017-08-18 02:30:22 +00:00
}
2017-08-21 17:09:06 +00:00
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params cctxParams;
memset(&cctxParams, 0, sizeof(cctxParams));
2017-08-21 17:09:06 +00:00
cctxParams.cParams = cParams;
cctxParams.compressionLevel = ZSTD_CLEVEL_CUSTOM;
return cctxParams;
}
static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
ZSTD_customMem customMem)
{
ZSTD_CCtx_params* params;
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
params = (ZSTD_CCtx_params*)ZSTD_calloc(
sizeof(ZSTD_CCtx_params), customMem);
if (!params) { return NULL; }
params->customMem = customMem;
2017-08-18 02:30:22 +00:00
params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
return params;
}
ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
{
return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
}
size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
{
if (params == NULL) { return 0; }
ZSTD_free(params, params->customMem);
return 0;
}
2017-08-21 05:55:07 +00:00
size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params)
{
2017-08-29 02:25:17 +00:00
return ZSTD_initCCtxParams(params, ZSTD_CLEVEL_DEFAULT);
2017-08-21 05:55:07 +00:00
}
size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
2017-08-29 02:25:17 +00:00
if (!cctxParams) { return ERROR(GENERIC); }
memset(cctxParams, 0, sizeof(*cctxParams));
2017-08-29 02:25:17 +00:00
cctxParams->compressionLevel = compressionLevel;
return 0;
}
size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
if (!cctxParams) { return ERROR(GENERIC); }
CHECK_F( ZSTD_checkCParams(params.cParams) );
memset(cctxParams, 0, sizeof(*cctxParams));
2017-08-21 08:59:08 +00:00
cctxParams->cParams = params.cParams;
cctxParams->fParams = params.fParams;
cctxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM;
return 0;
}
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
{
ZSTD_CCtx_params ret = cctxParams;
ret.cParams = params.cParams;
ret.fParams = params.fParams;
return ret;
}
#define CLAMPCHECK(val,min,max) { \
if (((val)<(min)) | ((val)>(max))) { \
return ERROR(parameter_outOfBound); \
} }
2017-05-12 22:31:53 +00:00
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
{
2017-05-22 20:05:45 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
2017-05-12 22:31:53 +00:00
switch(param)
{
2017-08-21 05:55:07 +00:00
case ZSTD_p_compressionLevel:
if (value == 0) return 0; /* special value : 0 means "don't change anything" */
if (cctx->cdict) return ERROR(stage_wrong);
2017-08-21 08:59:08 +00:00
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
2017-08-21 05:55:07 +00:00
case ZSTD_p_windowLog:
case ZSTD_p_hashLog:
case ZSTD_p_chainLog:
case ZSTD_p_searchLog:
case ZSTD_p_minMatch:
case ZSTD_p_targetLength:
case ZSTD_p_compressionStrategy:
if (value == 0) return 0; /* special value : 0 means "don't change anything" */
if (cctx->cdict) return ERROR(stage_wrong);
2017-08-21 17:09:06 +00:00
ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */
2017-08-21 05:55:07 +00:00
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
2017-05-12 22:31:53 +00:00
2017-08-21 05:55:07 +00:00
case ZSTD_p_contentSizeFlag:
case ZSTD_p_checksumFlag:
case ZSTD_p_dictIDFlag:
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
2017-05-12 22:31:53 +00:00
case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize,
* even when referencing into Dictionary content
* default : 0 when using a CDict, 1 when using a Prefix */
cctx->loadedDictEnd = 0;
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_nbThreads:
if (value==0) return 0;
DEBUGLOG(5, " setting nbThreads : %u", value);
2017-08-29 23:18:21 +00:00
if (value > 1 && cctx->staticSize) {
return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
}
2017-08-29 23:18:21 +00:00
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_jobSize:
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_overlapSizeLog:
2017-08-18 19:08:57 +00:00
DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
2017-08-21 05:55:07 +00:00
2017-09-03 04:10:36 +00:00
case ZSTD_p_enableLongDistanceMatching:
if (cctx->cdict) return ERROR(stage_wrong);
if (value != 0) {
ZSTD_cLevelToCParams(cctx);
}
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_ldmHashLog:
case ZSTD_p_ldmMinMatch:
if (value == 0) return 0; /* special value : 0 means "don't change anything" */
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
2017-09-03 04:10:36 +00:00
case ZSTD_p_ldmBucketSizeLog:
case ZSTD_p_ldmHashEveryLog:
if (cctx->cdict) return ERROR(stage_wrong);
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
default: return ERROR(parameter_unsupported);
}
}
2017-08-18 02:30:22 +00:00
size_t ZSTD_CCtxParam_setParameter(
ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value)
{
switch(param)
{
case ZSTD_p_compressionLevel :
if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();
if (value == 0) return 0;
params->compressionLevel = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_windowLog :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.windowLog = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_hashLog :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.hashLog = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_chainLog :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.chainLog = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_searchLog :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.searchLog = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_minMatch :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.searchLength = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_targetLength :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.targetLength = value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_compressionStrategy :
2017-08-18 02:30:22 +00:00
if (value == 0) return 0;
CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
2017-08-18 02:30:22 +00:00
ZSTD_cLevelToCCtxParams(params);
params->cParams.strategy = (ZSTD_strategy)value;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_contentSizeFlag :
/* Content size written in frame header _when known_ (default:1) */
2017-08-21 05:55:07 +00:00
DEBUGLOG(5, "set content size flag = %u", (value>0));
2017-08-18 02:30:22 +00:00
params->fParams.contentSizeFlag = value > 0;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_checksumFlag :
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
2017-08-18 02:30:22 +00:00
params->fParams.checksumFlag = value > 0;
return 0;
2017-05-12 22:31:53 +00:00
case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
DEBUGLOG(5, "set dictIDFlag = %u", (value>0));
2017-08-18 02:30:22 +00:00
params->fParams.noDictIDFlag = (value == 0);
return 0;
2017-05-12 22:31:53 +00:00
2017-08-18 02:30:22 +00:00
case ZSTD_p_forceMaxWindow :
2017-08-18 18:43:31 +00:00
params->forceWindow = value > 0;
return 0;
2017-08-18 02:30:22 +00:00
case ZSTD_p_nbThreads :
2017-08-29 23:18:21 +00:00
if (value == 0) return 0;
#ifndef ZSTD_MULTITHREAD
if (value > 1) return ERROR(parameter_unsupported);
2017-09-11 21:37:03 +00:00
return 0;
#else
return ZSTDMT_initializeCCtxParameters(params, value);
#endif
2017-08-18 02:30:22 +00:00
case ZSTD_p_jobSize :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
2017-08-21 20:18:00 +00:00
if (params->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_sectionSize, value);
#endif
2017-08-18 02:30:22 +00:00
case ZSTD_p_overlapSizeLog :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
2017-08-21 20:18:00 +00:00
if (params->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value);
#endif
2017-05-12 22:31:53 +00:00
2017-09-03 04:10:36 +00:00
case ZSTD_p_enableLongDistanceMatching :
if (value != 0) {
ZSTD_cLevelToCCtxParams(params);
2017-09-07 00:56:01 +00:00
params->cParams.windowLog = ZSTD_LDM_WINDOW_LOG;
}
return ZSTD_ldm_initializeParameters(&params->ldmParams, value);
case ZSTD_p_ldmHashLog :
if (value == 0) return 0;
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
params->ldmParams.hashLog = value;
return 0;
case ZSTD_p_ldmMinMatch :
if (value == 0) return 0;
2017-09-06 15:39:46 +00:00
CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX);
params->ldmParams.minMatchLength = value;
return 0;
2017-09-03 04:10:36 +00:00
case ZSTD_p_ldmBucketSizeLog :
if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) {
return ERROR(parameter_outOfBound);
}
params->ldmParams.bucketSizeLog = value;
return 0;
case ZSTD_p_ldmHashEveryLog :
if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
return ERROR(parameter_outOfBound);
}
params->ldmParams.hashEveryLog = value;
return 0;
default: return ERROR(parameter_unsupported);
}
}
2017-08-21 08:59:08 +00:00
/**
* This function should be updated whenever ZSTD_CCtx_params is updated.
* Parameters are copied manually before the dictionary is loaded.
* The multithreading parameters jobSize and overlapSizeLog are set only if
2017-08-26 00:58:28 +00:00
* nbThreads > 1.
2017-08-21 17:09:06 +00:00
*
* Pledged srcSize is treated as unknown.
2017-08-21 08:59:08 +00:00
*/
2017-08-30 01:03:06 +00:00
size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
2017-08-18 18:20:08 +00:00
{
2017-08-29 23:18:21 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
if (cctx->cdict) return ERROR(stage_wrong);
2017-08-18 20:01:55 +00:00
/* Assume the compression and frame parameters are validated */
cctx->requestedParams.cParams = params->cParams;
cctx->requestedParams.fParams = params->fParams;
cctx->requestedParams.compressionLevel = params->compressionLevel;
/* Set force window explicitly since it sets cctx->loadedDictEnd */
CHECK_F( ZSTD_CCtx_setParameter(
cctx, ZSTD_p_forceMaxWindow, params->forceWindow) );
/* Set multithreading parameters explicitly */
CHECK_F( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, params->nbThreads) );
2017-08-21 05:55:07 +00:00
if (params->nbThreads > 1) {
CHECK_F( ZSTD_CCtx_setParameter(cctx, ZSTD_p_jobSize, params->jobSize) );
CHECK_F( ZSTD_CCtx_setParameter(
2017-08-18 20:01:55 +00:00
cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) );
2017-08-21 05:55:07 +00:00
}
/* Copy long distance matching parameters */
cctx->requestedParams.ldmParams = params->ldmParams;
/* customMem is used only for create/free params and can be ignored */
2017-08-18 18:20:08 +00:00
return 0;
}
2017-05-12 22:31:53 +00:00
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
{
DEBUGLOG(5, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
2017-05-22 20:05:45 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
2017-05-12 22:31:53 +00:00
return 0;
}
size_t ZSTD_CCtx_loadDictionary_advanced(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode)
{
2017-05-22 20:05:45 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
DEBUGLOG(5, "load dictionary of size %u", (U32)dictSize);
ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */
if (dict==NULL || dictSize==0) { /* no dictionary mode */
cctx->cdictLocal = NULL;
cctx->cdict = NULL;
} else {
ZSTD_compressionParameters const cParams =
2017-08-29 02:25:17 +00:00
ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, 0, dictSize);
cctx->cdictLocal = ZSTD_createCDict_advanced(
dict, dictSize,
dictLoadMethod, dictMode,
cParams, cctx->customMem);
cctx->cdict = cctx->cdictLocal;
if (cctx->cdictLocal == NULL)
return ERROR(memory_allocation);
}
return 0;
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dm_auto);
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dm_auto);
}
2017-06-27 22:49:12 +00:00
size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
2016-02-11 23:07:30 +00:00
{
2017-05-22 20:05:45 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
2017-06-27 22:49:12 +00:00
cctx->cdict = cdict;
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */
2017-06-27 22:49:12 +00:00
return 0;
2016-02-11 23:07:30 +00:00
}
2017-06-27 22:49:12 +00:00
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
{
return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dm_rawContent);
}
size_t ZSTD_CCtx_refPrefix_advanced(
ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode)
2016-02-11 23:07:30 +00:00
{
2017-05-22 20:05:45 +00:00
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
2017-06-27 22:49:12 +00:00
cctx->cdict = NULL; /* prefix discards any prior cdict */
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
cctx->prefixDict.dictMode = dictMode;
2017-06-27 22:49:12 +00:00
return 0;
2016-02-11 23:07:30 +00:00
}
2017-06-16 21:00:46 +00:00
static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
{
2017-06-16 17:17:50 +00:00
cctx->streamStage = zcss_init;
cctx->pledgedSrcSizePlusOne = 0;
}
2017-06-16 21:00:46 +00:00
/*! ZSTD_CCtx_reset() :
* Also dumps dictionary */
void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
{
ZSTD_startNewCompression(cctx);
2017-06-16 17:17:50 +00:00
cctx->cdict = NULL;
}
2015-11-04 11:05:27 +00:00
/** ZSTD_checkCParams() :
control CParam values remain within authorized range.
@return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{
CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
2016-04-04 11:49:18 +00:00
CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
return ERROR(parameter_unsupported);
return 0;
}
/** ZSTD_clampCParams() :
* make CParam values within valid range.
* @return : valid CParams */
static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{
# define CLAMP(val,min,max) { \
if (val<min) val=min; \
else if (val>max) val=max; \
}
CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
return cParams;
}
2016-02-10 12:37:52 +00:00
2016-12-11 23:47:30 +00:00
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
return hashLog - btScale;
}
/** ZSTD_adjustCParams_internal() :
optimize `cPar` for a given input (`srcSize` and `dictSize`).
mostly downsizing to reduce memory consumption and initialization.
Both `srcSize` and `dictSize` are optional (use 0 if unknown),
but if both are 0, no optimization can be done.
Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
{
assert(ZSTD_checkCParams(cPar)==0);
if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
2015-11-04 11:05:27 +00:00
2016-03-19 17:08:32 +00:00
/* resize params, to use less memory when necessary */
2016-03-30 18:06:26 +00:00
{ U32 const minSrcSize = (srcSize==0) ? 500 : 0;
U64 const rSize = srcSize + dictSize + minSrcSize;
2016-04-04 12:53:16 +00:00
if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
2017-08-22 18:55:42 +00:00
U32 const srcLog =
MAX(ZSTD_HASHLOG_MIN, (rSize==1) ? 1 : ZSTD_highbit32((U32)(rSize)-1) + 1);
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
} }
if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
2016-12-11 23:47:30 +00:00
{ U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
}
2016-03-19 16:18:00 +00:00
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
return cPar;
2015-11-04 11:05:27 +00:00
}
ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
{
cPar = ZSTD_clampCParams(cPar);
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
}
size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params)
2016-03-19 15:09:09 +00:00
{
/* Estimate CCtx size is supported for single-threaded compression only. */
if (params->nbThreads > 1) { return ERROR(GENERIC); }
2017-08-29 02:25:17 +00:00
{ ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(*params, 0, 0);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
size_t const chainSize =
(cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
size_t const hSize = ((size_t)1) << cParams.hashLog;
U32 const hashLog3 = (cParams.searchLength>3) ?
0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
2016-07-11 01:12:17 +00:00
size_t const optBudget =
((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
size_t const ldmSpace = params->ldmParams.enableLdm ?
ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
2017-09-03 04:10:36 +00:00
params->ldmParams.bucketSizeLog) : 0;
size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
optSpace + ldmSpace;
2016-07-11 01:12:17 +00:00
DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
return sizeof(ZSTD_CCtx) + neededSpace;
}
}
2016-07-11 01:12:17 +00:00
size_t ZSTD_estimateCCtxSize_advanced_usingCParams(ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
return ZSTD_estimateCCtxSize_advanced_usingCCtxParams(&params);
}
size_t ZSTD_estimateCCtxSize(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCCtxSize_advanced_usingCParams(cParams);
}
size_t ZSTD_estimateCStreamSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params)
{
if (params->nbThreads > 1) { return ERROR(GENERIC); }
{ size_t const CCtxSize = ZSTD_estimateCCtxSize_advanced_usingCCtxParams(params);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize;
size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
size_t const streamingSize = inBuffSize + outBuffSize;
return CCtxSize + streamingSize;
}
}
size_t ZSTD_estimateCStreamSize_advanced_usingCParams(ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
return ZSTD_estimateCStreamSize_advanced_usingCCtxParams(&params);
}
2017-06-26 23:02:25 +00:00
size_t ZSTD_estimateCStreamSize(int compressionLevel) {
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCStreamSize_advanced_usingCParams(cParams);
2017-06-26 23:02:25 +00:00
}
static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX);
U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX);
return (bslog1 == bslog2) /* same block size */
& (cParams1.hashLog == cParams2.hashLog)
& (cParams1.chainLog == cParams2.chainLog)
& (cParams1.strategy == cParams2.strategy) /* opt parser space */
& ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
}
/** The parameters are equivalent if ldm is not enabled in both sets or
* all the parameters are equivalent. */
static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
ldmParams_t ldmParams2)
{
return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
(ldmParams1.enableLdm == ldmParams2.enableLdm &&
ldmParams1.hashLog == ldmParams2.hashLog &&
2017-09-03 04:10:36 +00:00
ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
ldmParams1.hashEveryLog == ldmParams2.hashEveryLog);
}
/** Equivalence for resetCCtx purposes */
static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
ZSTD_CCtx_params params2)
{
return ZSTD_equivalentCParams(params1.cParams, params2.cParams) &&
ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams);
}
/*! ZSTD_continueCCtx() :
* reuse CCtx without reset (note : requires no dictionary) */
2017-08-18 18:20:08 +00:00
static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
{
U32 const end = (U32)(cctx->nextSrc - cctx->base);
DEBUGLOG(5, "continue mode");
cctx->appliedParams = params;
2017-06-16 21:00:46 +00:00
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
cctx->consumedSrcSize = 0;
2017-06-16 21:00:46 +00:00
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
cctx->appliedParams.fParams.contentSizeFlag = 0;
2017-06-16 21:00:46 +00:00
DEBUGLOG(5, "pledged content size : %u ; flag : %u",
(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
cctx->lowLimit = end;
cctx->dictLimit = end;
cctx->nextToUpdate = end+1;
cctx->stage = ZSTDcs_init;
cctx->dictID = 0;
cctx->loadedDictEnd = 0;
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = repStartValue[i]; }
cctx->optState.litLengthSum = 0; /* force reset of btopt stats */
2016-09-06 07:54:22 +00:00
XXH64_reset(&cctx->xxhState, 0);
return 0;
}
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
2017-07-03 22:21:24 +00:00
typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
2017-08-18 18:20:08 +00:00
ZSTD_CCtx_params params, U64 pledgedSrcSize,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
if (crp == ZSTDcrp_continue) {
if (ZSTD_equivalentParams(params, zc->appliedParams)) {
DEBUGLOG(5, "ZSTD_equivalentParams()==1");
assert(!(params.ldmParams.enableLdm &&
2017-09-07 00:56:01 +00:00
params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
zc->entropy->offcode_repeatMode = FSE_repeat_none;
zc->entropy->matchlength_repeatMode = FSE_repeat_none;
zc->entropy->litlength_repeatMode = FSE_repeat_none;
return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
} }
if (params.ldmParams.enableLdm) {
2017-09-03 04:10:36 +00:00
/* Adjust long distance matching parameters */
2017-09-07 00:56:01 +00:00
ZSTD_ldm_adjustParameters(&params.ldmParams, params.cParams.windowLog);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashEveryLog < 32);
zc->ldmState.hashPower =
ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
}
{ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
0 : (1 << params.cParams.chainLog);
size_t const hSize = ((size_t)1) << params.cParams.hashLog;
U32 const hashLog3 = (params.cParams.searchLength>3) ?
0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+ (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
|| (params.cParams.strategy == ZSTD_btultra)) ?
optPotentialSpace : 0;
size_t const bufferSpace = buffInSize + buffOutSize;
2017-09-06 15:39:46 +00:00
size_t const ldmSpace = params.ldmParams.enableLdm
? ZSTD_ldm_getTableSize(params.ldmParams.hashLog, params.ldmParams.bucketSizeLog)
: 0;
size_t const neededSpace = entropySpace + optSpace + ldmSpace +
tableSpace + tokenSpace + bufferSpace;
if (zc->workSpaceSize < neededSpace) { /* too small : resize */
DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n",
(unsigned)zc->workSpaceSize>>10,
(unsigned)neededSpace>>10);
/* static cctx : no resize, error out */
if (zc->staticSize) return ERROR(memory_allocation);
zc->workSpaceSize = 0;
ZSTD_free(zc->workSpace, zc->customMem);
zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
if (zc->workSpace == NULL) return ERROR(memory_allocation);
zc->workSpaceSize = neededSpace;
ptr = zc->workSpace;
/* entropy space */
assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
assert(zc->workSpaceSize >= sizeof(ZSTD_entropyCTables_t));
zc->entropy = (ZSTD_entropyCTables_t*)zc->workSpace;
} }
2016-03-19 15:09:09 +00:00
/* init params */
zc->appliedParams = params;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
zc->appliedParams.fParams.contentSizeFlag = 0;
2017-06-16 21:00:46 +00:00
DEBUGLOG(5, "pledged content size : %u ; flag : %u",
(U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
zc->blockSize = blockSize;
XXH64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
zc->dictID = 0;
zc->loadedDictEnd = 0;
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
zc->entropy->offcode_repeatMode = FSE_repeat_none;
zc->entropy->matchlength_repeatMode = FSE_repeat_none;
zc->entropy->litlength_repeatMode = FSE_repeat_none;
zc->nextToUpdate = 1;
zc->nextSrc = NULL;
zc->base = NULL;
zc->dictBase = NULL;
zc->dictLimit = 0;
zc->lowLimit = 0;
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->seqStore.rep[i] = repStartValue[i]; }
zc->hashLog3 = hashLog3;
zc->optState.litLengthSum = 0;
ptr = zc->entropy + 1;
/* opt parser space */
2017-03-09 19:44:25 +00:00
if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
DEBUGLOG(5, "reserving optimal parser space");
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->optState.litFreq = (U32*)ptr;
zc->optState.litLengthFreq = zc->optState.litFreq + (1<<Litbits);
zc->optState.matchLengthFreq = zc->optState.litLengthFreq + (MaxLL+1);
zc->optState.offCodeFreq = zc->optState.matchLengthFreq + (MaxML+1);
ptr = zc->optState.offCodeFreq + (MaxOff+1);
zc->optState.matchTable = (ZSTD_match_t*)ptr;
ptr = zc->optState.matchTable + ZSTD_OPT_NUM+1;
zc->optState.priceTable = (ZSTD_optimal_t*)ptr;
ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1;
}
2017-09-06 15:39:46 +00:00
/* ldm hash table */
/* initialize bucketOffsets table later for pointer alignment */
if (params.ldmParams.enableLdm) {
size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
2017-09-06 15:39:46 +00:00
memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t));
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
ptr = zc->ldmState.hashTable + ldmHSize;
}
/* table Space */
if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->hashTable = (U32*)(ptr);
zc->chainTable = zc->hashTable + hSize;
zc->hashTable3 = zc->chainTable + chainSize;
ptr = zc->hashTable3 + h3Size;
/* sequences storage */
zc->seqStore.sequencesStart = (seqDef*)ptr;
ptr = zc->seqStore.sequencesStart + maxNbSeq;
zc->seqStore.llCode = (BYTE*) ptr;
zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
ptr = zc->seqStore.litStart + blockSize;
2017-09-06 15:39:46 +00:00
/* ldm bucketOffsets table */
if (params.ldmParams.enableLdm) {
size_t const ldmBucketSize =
((size_t)1) << (params.ldmParams.hashLog -
params.ldmParams.bucketSizeLog);
memset(ptr, 0, ldmBucketSize);
zc->ldmState.bucketOffsets = (BYTE*)ptr;
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
}
/* buffers */
zc->inBuffSize = buffInSize;
zc->inBuff = (char*)ptr;
zc->outBuffSize = buffOutSize;
zc->outBuff = zc->inBuff + buffInSize;
return 0;
}
2015-10-22 14:31:46 +00:00
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;
* do not use with extDict variant ! */
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
int i;
for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = 0;
}
2015-10-25 13:06:35 +00:00
/*! ZSTD_copyCCtx_internal() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* The "context", in this case, refers to the hash and chain tables, entropy
* tables, and dictionary offsets.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
* @return : 0, or an error code */
static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
const ZSTD_CCtx* srcCCtx,
ZSTD_frameParameters fParams,
unsigned long long pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
2016-01-26 14:58:49 +00:00
{
DEBUGLOG(5, "ZSTD_copyCCtx_internal");
2016-07-27 19:05:12 +00:00
if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
2016-01-26 14:58:49 +00:00
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
/* Copy only compression parameters related to tables. */
params.cParams = srcCCtx->appliedParams.cParams;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
ZSTDcrp_noMemset, zbuff);
}
2016-01-26 14:58:49 +00:00
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
2016-07-27 19:05:12 +00:00
size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
2016-03-19 16:18:00 +00:00
}
2016-01-26 14:58:49 +00:00
2016-05-29 03:01:04 +00:00
/* copy dictionary offsets */
2016-03-19 16:18:00 +00:00
dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
dstCCtx->nextSrc = srcCCtx->nextSrc;
dstCCtx->base = srcCCtx->base;
dstCCtx->dictBase = srcCCtx->dictBase;
dstCCtx->dictLimit = srcCCtx->dictLimit;
dstCCtx->lowLimit = srcCCtx->lowLimit;
dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
2016-05-29 03:01:04 +00:00
dstCCtx->dictID = srcCCtx->dictID;
2016-01-26 14:58:49 +00:00
2016-01-27 23:18:06 +00:00
/* copy entropy tables */
memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t));
2016-01-26 14:58:49 +00:00
return 0;
}
/*! ZSTD_copyCCtx() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* pledgedSrcSize==0 means "unknown".
* @return : 0, or an error code */
size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
{
ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
fParams.contentSizeFlag = pledgedSrcSize>0;
return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize, zbuff);
}
2016-01-26 14:58:49 +00:00
2016-03-20 15:20:06 +00:00
/*! ZSTD_reduceTable() :
* reduce table indexes by `reducerValue` */
2016-03-20 15:20:06 +00:00
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
{
2016-03-20 15:20:06 +00:00
U32 u;
for (u=0 ; u < size ; u++) {
if (table[u] < reducerValue) table[u] = 0;
else table[u] -= reducerValue;
}
}
/*! ZSTD_ldm_reduceTable() :
* reduce table indexes by `reducerValue` */
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
U32 const reducerValue)
{
U32 u;
for (u = 0; u < size; u++) {
if (table[u].offset < reducerValue) table[u].offset = 0;
else table[u].offset -= reducerValue;
}
}
2016-03-20 15:20:06 +00:00
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
{
{ U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
2016-03-20 15:20:06 +00:00
ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
{ U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((U32)1 << zc->appliedParams.cParams.chainLog);
2016-04-04 11:49:18 +00:00
ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
2016-03-20 15:20:06 +00:00
{ U32 const h3Size = (zc->hashLog3) ? (U32)1 << zc->hashLog3 : 0;
2016-03-20 15:20:06 +00:00
ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
{ if (zc->appliedParams.ldmParams.enableLdm) {
U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog;
ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
}
}
2016-03-20 15:20:06 +00:00
}
2016-01-28 16:56:33 +00:00
/*-*******************************************************
2015-11-11 20:38:21 +00:00
* Block entropic compression
*********************************************************/
/* See doc/zstd_compression_format.md for detailed format description */
2015-11-11 20:38:21 +00:00
2016-03-15 00:24:33 +00:00
size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
2016-03-15 00:24:33 +00:00
if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
2015-11-11 20:38:21 +00:00
return ZSTD_blockHeaderSize+srcSize;
}
2016-03-15 00:24:33 +00:00
static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
BYTE* const ostart = (BYTE* const)dst;
2016-07-27 19:05:12 +00:00
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
2015-11-11 20:38:21 +00:00
2016-03-15 00:24:33 +00:00
if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
2015-11-11 20:38:21 +00:00
2016-01-23 18:28:41 +00:00
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
2016-07-23 14:31:49 +00:00
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
2016-01-23 18:28:41 +00:00
break;
case 2: /* 2 - 2 - 12 */
2016-07-23 14:31:49 +00:00
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
case 3: /* 2 - 2 - 20 */
2016-07-23 14:31:49 +00:00
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
2016-01-23 18:28:41 +00:00
}
memcpy(ostart + flSize, src, srcSize);
return srcSize + flSize;
2015-11-11 20:38:21 +00:00
}
2016-03-15 00:24:33 +00:00
static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
BYTE* const ostart = (BYTE* const)dst;
2016-07-27 19:05:12 +00:00
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
2016-01-23 18:28:41 +00:00
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
2016-01-23 18:28:41 +00:00
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
2016-07-23 14:31:49 +00:00
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
2016-01-23 18:28:41 +00:00
break;
case 2: /* 2 - 2 - 12 */
2016-07-23 14:31:49 +00:00
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
case 3: /* 2 - 2 - 20 */
2016-07-23 14:31:49 +00:00
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
2016-01-23 18:28:41 +00:00
}
2015-11-11 20:38:21 +00:00
2016-01-23 18:28:41 +00:00
ostart[flSize] = *(const BYTE*)src;
return flSize+1;
2015-11-11 20:38:21 +00:00
}
2016-01-23 18:28:41 +00:00
2016-03-20 00:09:18 +00:00
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
2015-11-11 20:38:21 +00:00
static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
ZSTD_strategy strategy,
2016-03-15 00:24:33 +00:00
void* dst, size_t dstCapacity,
2015-11-11 20:38:21 +00:00
const void* src, size_t srcSize)
{
2016-03-18 11:37:45 +00:00
size_t const minGain = ZSTD_minGain(srcSize);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
2016-07-27 19:05:12 +00:00
BYTE* const ostart = (BYTE*)dst;
2016-01-25 03:10:46 +00:00
U32 singleStream = srcSize < 256;
2016-07-23 14:31:49 +00:00
symbolEncodingType_e hType = set_compressed;
2016-03-18 11:37:45 +00:00
size_t cLitSize;
2015-11-11 20:38:21 +00:00
2016-03-20 00:09:18 +00:00
/* small ? don't even attempt compression (speed opt) */
# define LITERAL_NOENTROPY 63
{ size_t const minLitSize = entropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
2016-03-20 00:09:18 +00:00
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
{ HUF_repeat repeat = entropy->hufCTable_repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
2017-04-20 18:14:13 +00:00
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat)
2017-04-20 18:14:13 +00:00
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
else { entropy->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
2016-01-26 02:14:20 +00:00
}
2015-11-11 20:38:21 +00:00
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
entropy->hufCTable_repeatMode = HUF_repeat_none;
2016-03-18 11:37:45 +00:00
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
if (cLitSize==1) {
entropy->hufCTable_repeatMode = HUF_repeat_none;
2016-03-18 11:37:45 +00:00
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
2015-11-11 20:38:21 +00:00
/* Build header */
2016-01-23 18:28:41 +00:00
switch(lhSize)
2015-11-11 20:38:21 +00:00
{
2016-01-23 18:28:41 +00:00
case 3: /* 2 - 2 - 10 - 10 */
2016-07-22 15:30:52 +00:00
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
2016-01-23 18:28:41 +00:00
case 4: /* 2 - 2 - 14 - 14 */
2016-07-22 02:45:06 +00:00
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
2016-01-23 18:28:41 +00:00
case 5: /* 2 - 2 - 18 - 18 */
2016-07-22 02:45:06 +00:00
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
break;
}
default: /* not possible : lhSize is {3,4,5} */
assert(0);
2015-11-11 20:38:21 +00:00
}
2016-03-18 11:37:45 +00:00
return lhSize+cLitSize;
2015-11-11 20:38:21 +00:00
}
2016-07-29 19:22:17 +00:00
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
{
2016-07-29 19:22:17 +00:00
BYTE const LL_deltaCode = 19;
BYTE const ML_deltaCode = 36;
2016-07-29 22:55:13 +00:00
const seqDef* const sequences = seqStorePtr->sequencesStart;
2016-07-29 19:22:17 +00:00
BYTE* const llCodeTable = seqStorePtr->llCode;
BYTE* const ofCodeTable = seqStorePtr->ofCode;
BYTE* const mlCodeTable = seqStorePtr->mlCode;
2016-07-29 22:55:13 +00:00
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2016-07-29 19:22:17 +00:00
U32 u;
for (u=0; u<nbSeq; u++) {
U32 const llv = sequences[u].litLength;
U32 const mlv = sequences[u].matchLength;
llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
2016-07-29 19:22:17 +00:00
ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
2016-04-07 15:19:00 +00:00
}
2016-07-29 19:22:17 +00:00
if (seqStorePtr->longLengthID==1)
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
if (seqStorePtr->longLengthID==2)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
{
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*repeatMode = FSE_repeat_check;
return set_rle;
}
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
return set_repeat;
}
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
*repeatMode = FSE_repeat_valid;
return set_basic;
}
*repeatMode = FSE_repeat_check;
return set_compressed;
}
MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type,
U32* count, U32 max,
BYTE const* codeTable, size_t nbSeq,
S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
void* workspace, size_t workspaceSize)
{
BYTE* op = (BYTE*)dst;
BYTE const* const oend = op + dstCapacity;
switch (type) {
case set_rle:
*op = codeTable[0];
CHECK_F(FSE_buildCTable_rle(CTable, (BYTE)max));
return 1;
case set_repeat:
return 0;
case set_basic:
CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize));
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
if (count[codeTable[nbSeq-1]] > 1) {
count[codeTable[nbSeq-1]]--;
nbSeq_1--;
}
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
CHECK_F(FSE_buildCTable_wksp(CTable, norm, max, tableLog, workspace, workspaceSize));
return NCountSize;
}
}
default: return assert(0), ERROR(GENERIC);
}
}
MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
BIT_CStream_t blockStream;
FSE_CState_t stateMatchLength;
FSE_CState_t stateOffsetBits;
FSE_CState_t stateLitLength;
CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
/* first symbols */
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);
{ size_t n;
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
BYTE const llCode = llCodeTable[n];
BYTE const ofCode = ofCodeTable[n];
BYTE const mlCode = mlCodeTable[n];
U32 const llBits = LL_bits[llCode];
U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
U32 const mlBits = ML_bits[mlCode];
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
} }
FSE_flushCState(&blockStream, &stateMatchLength);
FSE_flushCState(&blockStream, &stateOffsetBits);
FSE_flushCState(&blockStream, &stateLitLength);
{ size_t const streamSize = BIT_closeCStream(&blockStream);
if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
return streamSize;
}
}
MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* entropy,
ZSTD_compressionParameters const* cParams,
void* dst, size_t dstCapacity)
2015-11-11 20:38:21 +00:00
{
const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
2015-11-11 20:38:21 +00:00
U32 count[MaxSeq+1];
FSE_CTable* CTable_LitLength = entropy->litlengthCTable;
FSE_CTable* CTable_OffsetBits = entropy->offcodeCTable;
FSE_CTable* CTable_MatchLength = entropy->matchlengthCTable;
2015-11-11 20:38:21 +00:00
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2016-07-29 22:55:13 +00:00
const seqDef* const sequences = seqStorePtr->sequencesStart;
2016-07-29 19:22:17 +00:00
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2015-11-23 12:34:21 +00:00
BYTE* const ostart = (BYTE*)dst;
2016-03-15 00:24:33 +00:00
BYTE* const oend = ostart + dstCapacity;
2016-03-18 11:37:45 +00:00
BYTE* op = ostart;
2016-07-29 22:55:13 +00:00
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2015-11-11 20:38:21 +00:00
BYTE* seqHead;
ZSTD_STATIC_ASSERT(sizeof(entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
2015-11-11 20:38:21 +00:00
/* Compress literals */
2016-03-20 00:09:18 +00:00
{ const BYTE* const literals = seqStorePtr->litStart;
2016-03-18 11:37:45 +00:00
size_t const litSize = seqStorePtr->lit - literals;
size_t const cSize = ZSTD_compressLiterals(
entropy, cParams->strategy, op, dstCapacity, literals, litSize);
if (ZSTD_isError(cSize))
return cSize;
2015-11-11 20:38:21 +00:00
op += cSize;
}
/* Sequences Header */
2016-03-23 21:31:57 +00:00
if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) return op - ostart;
2015-11-11 20:38:21 +00:00
/* seqHead : flags for FSE encoding type */
seqHead = op++;
2015-11-11 20:38:21 +00:00
/* convert length/distances into codes */
2016-07-29 19:22:17 +00:00
ZSTD_seqToCodes(seqStorePtr);
2015-11-11 20:38:21 +00:00
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
entropy->workspace, sizeof(entropy->workspace));
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
/* CTable for Offsets */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
entropy->workspace, sizeof(entropy->workspace));
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
2015-11-11 20:38:21 +00:00
/* CTable for MatchLengths */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
entropy->workspace, sizeof(entropy->workspace));
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
2015-11-11 20:38:21 +00:00
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2015-11-11 20:38:21 +00:00
{ size_t const streamSize = ZSTD_encodeSequences(op, oend - op,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
if (ZSTD_isError(streamSize)) return streamSize;
op += streamSize;
}
2015-11-11 20:38:21 +00:00
return op - ostart;
}
2015-11-11 20:38:21 +00:00
MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* entropy,
ZSTD_compressionParameters const* cParams,
void* dst, size_t dstCapacity,
size_t srcSize)
{
size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams,
dst, dstCapacity);
size_t const minGain = ZSTD_minGain(srcSize);
size_t const maxCSize = srcSize - minGain;
/* If the srcSize <= dstCapacity, then there is enough space to write a
* raw uncompressed block. Since we ran out of space, the block must not
* be compressible, so fall back to a raw uncompressed block.
*/
int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
if (ZSTD_isError(cSize) && !uncompressibleError)
return cSize;
/* Check compressibility */
if (cSize >= maxCSize || uncompressibleError) {
entropy->hufCTable_repeatMode = HUF_repeat_none;
entropy->offcode_repeatMode = FSE_repeat_none;
entropy->matchlength_repeatMode = FSE_repeat_none;
entropy->litlength_repeatMode = FSE_repeat_none;
return 0;
}
assert(!ZSTD_isError(cSize));
2015-11-11 20:38:21 +00:00
2016-06-13 23:49:25 +00:00
/* confirm repcodes */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; }
return cSize;
2015-11-11 20:38:21 +00:00
}
/* ZSTD_selectBlockCompressor() :
2017-09-07 00:56:01 +00:00
* Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */
typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2017-09-07 00:56:01 +00:00
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2015-10-31 11:57:14 +00:00
{
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
};
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert((U32)strat >= (U32)ZSTD_fast);
assert((U32)strat <= (U32)ZSTD_btultra);
return blockCompressor[extDict!=0][(U32)strat];
2015-10-31 11:57:14 +00:00
}
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
const BYTE* anchor, size_t lastLLSize)
{
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
2015-10-31 11:57:14 +00:00
2016-03-15 00:24:33 +00:00
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-04 11:05:27 +00:00
{
const BYTE* const base = zc->base;
const BYTE* const istart = (const BYTE*)src;
const U32 current = (U32)(istart-base);
size_t lastLLSize;
const BYTE* anchor;
U32 const extDict = zc->lowLimit < zc->dictLimit;
const ZSTD_blockCompressor blockCompressor =
zc->appliedParams.ldmParams.enableLdm
? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm)
: ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
2016-02-02 13:36:49 +00:00
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
ZSTD_resetSeqStore(&(zc->seqStore));
if (current > zc->nextToUpdate + 384)
zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */
lastLLSize = blockCompressor(zc, src, srcSize);
/* Last literals */
anchor = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize);
return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize);
2015-11-04 11:05:27 +00:00
}
/*! ZSTD_compress_frameChunk() :
2016-07-27 22:55:43 +00:00
* Compress a chunk of data into one or multiple blocks.
* All blocks will be terminated, all input will be consumed.
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
* Frame is supposed already started (header already produced)
* @return : compressed size, or an error code
*/
static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2016-05-31 16:13:56 +00:00
void* dst, size_t dstCapacity,
2016-07-27 22:55:43 +00:00
const void* src, size_t srcSize,
U32 lastFrameChunk)
2015-10-22 14:31:46 +00:00
{
2016-05-31 16:13:56 +00:00
size_t blockSize = cctx->blockSize;
2015-10-22 14:31:46 +00:00
size_t remaining = srcSize;
const BYTE* ip = (const BYTE*)src;
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
U32 const maxDist = 1 << cctx->appliedParams.cParams.windowLog;
2015-11-01 11:40:22 +00:00
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2016-05-31 16:13:56 +00:00
XXH64_update(&cctx->xxhState, src, srcSize);
2016-02-02 13:36:49 +00:00
while (remaining) {
2016-07-27 22:55:43 +00:00
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2015-11-04 17:19:39 +00:00
size_t cSize;
2015-10-22 14:31:46 +00:00
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
2015-11-04 17:19:39 +00:00
if (remaining < blockSize) blockSize = remaining;
2016-08-02 12:26:00 +00:00
/* preemptive overflow correction */
if (cctx->lowLimit > (3U<<29)) {
U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->appliedParams.cParams.hashLog, cctx->appliedParams.cParams.strategy)) - 1;
2016-12-11 23:25:07 +00:00
U32 const current = (U32)(ip - cctx->base);
U32 const newCurrent = (current & cycleMask) + (1 << cctx->appliedParams.cParams.windowLog);
2016-12-11 23:25:07 +00:00
U32 const correction = current - newCurrent;
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
2016-08-02 12:26:00 +00:00
ZSTD_reduceIndex(cctx, correction);
cctx->base += correction;
cctx->dictBase += correction;
2016-12-11 23:25:07 +00:00
cctx->lowLimit -= correction;
2016-08-02 12:26:00 +00:00
cctx->dictLimit -= correction;
if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
else cctx->nextToUpdate -= correction;
}
if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2016-03-19 17:08:32 +00:00
/* enforce maxDist */
2016-05-31 16:13:56 +00:00
U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2015-11-24 13:06:07 +00:00
}
2016-05-31 16:13:56 +00:00
cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
if (ZSTD_isError(cSize)) return cSize;
2015-10-22 14:31:46 +00:00
2016-02-02 13:36:49 +00:00
if (cSize == 0) { /* block is not compressible */
2016-07-27 22:55:43 +00:00
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
cSize = ZSTD_blockHeaderSize+blockSize;
2016-02-02 13:36:49 +00:00
} else {
2016-07-27 22:55:43 +00:00
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader24);
2016-07-27 22:55:43 +00:00
cSize += ZSTD_blockHeaderSize;
2015-10-22 14:31:46 +00:00
}
2015-11-04 17:19:39 +00:00
remaining -= blockSize;
2016-03-15 00:24:33 +00:00
dstCapacity -= cSize;
2015-11-04 17:19:39 +00:00
ip += blockSize;
2015-10-22 14:31:46 +00:00
op += cSize;
}
2016-07-28 13:29:08 +00:00
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2015-10-22 14:31:46 +00:00
return op-ostart;
}
2016-04-12 13:52:33 +00:00
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2017-08-18 18:20:08 +00:00
ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID)
2016-04-12 13:52:33 +00:00
{ BYTE* const op = (BYTE*)dst;
U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
2016-07-27 19:05:12 +00:00
U32 const checksumFlag = params.fParams.checksumFlag>0;
U32 const windowSize = 1U << params.cParams.windowLog;
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2016-07-27 19:05:12 +00:00
BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
U32 const fcsCode = params.fParams.contentSizeFlag ?
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
2016-07-27 19:05:12 +00:00
BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2016-05-29 03:01:04 +00:00
size_t pos;
if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
!params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
2016-04-12 13:52:33 +00:00
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
op[4] = frameHeaderDecriptionByte; pos=5;
if (!singleSegment) op[pos++] = windowLogByte;
2016-05-29 03:01:04 +00:00
switch(dictIDSizeCode)
{
default: assert(0); /* impossible */
2016-05-29 03:01:04 +00:00
case 0 : break;
case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2016-07-27 19:21:36 +00:00
case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2016-05-29 03:01:04 +00:00
case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
}
switch(fcsCode)
2016-04-12 13:52:33 +00:00
{
default: assert(0); /* impossible */
case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2016-05-29 03:01:04 +00:00
case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2016-04-12 13:52:33 +00:00
}
2016-05-29 03:01:04 +00:00
return pos;
2016-04-12 13:52:33 +00:00
}
2016-08-02 12:26:00 +00:00
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2016-03-23 21:31:57 +00:00
void* dst, size_t dstCapacity,
2016-01-09 00:08:23 +00:00
const void* src, size_t srcSize,
2016-07-27 22:55:43 +00:00
U32 frame, U32 lastFrameChunk)
2015-10-22 14:31:46 +00:00
{
const BYTE* const ip = (const BYTE*) src;
2016-04-12 13:52:33 +00:00
size_t fhSize = 0;
2016-01-07 14:35:18 +00:00
DEBUGLOG(5, "ZSTD_compressContinue_internal");
DEBUGLOG(5, "stage: %u", cctx->stage);
2016-08-02 12:26:00 +00:00
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2016-07-27 19:21:36 +00:00
2016-08-02 12:26:00 +00:00
if (frame && (cctx->stage==ZSTDcs_init)) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
2017-08-23 17:24:19 +00:00
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
2016-04-12 13:52:33 +00:00
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
dst = (char*)dst + fhSize;
2016-08-02 12:26:00 +00:00
cctx->stage = ZSTDcs_ongoing;
2016-01-07 14:35:18 +00:00
}
2015-10-22 14:31:46 +00:00
/* Check if blocks follow each other */
2016-08-02 12:26:00 +00:00
if (src != cctx->nextSrc) {
/* not contiguous */
2016-08-02 12:26:00 +00:00
ptrdiff_t const delta = cctx->nextSrc - ip;
cctx->lowLimit = cctx->dictLimit;
cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
cctx->dictBase = cctx->base;
cctx->base -= delta;
cctx->nextToUpdate = cctx->dictLimit;
if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
}
2016-08-02 12:26:00 +00:00
/* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
cctx->lowLimit = lowLimitMax;
}
2016-08-02 12:26:00 +00:00
cctx->nextSrc = ip + srcSize;
2015-10-22 14:31:46 +00:00
if (srcSize) {
size_t const cSize = frame ?
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2016-08-02 12:26:00 +00:00
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2016-01-07 14:35:18 +00:00
if (ZSTD_isError(cSize)) return cSize;
cctx->consumedSrcSize += srcSize;
2016-04-12 13:52:33 +00:00
return cSize + fhSize;
} else
return fhSize;
2015-10-22 14:31:46 +00:00
}
2016-07-27 23:17:22 +00:00
size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2016-03-23 21:31:57 +00:00
void* dst, size_t dstCapacity,
2016-01-09 00:08:23 +00:00
const void* src, size_t srcSize)
{
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
2016-07-27 23:17:22 +00:00
}
size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
2016-01-09 00:08:23 +00:00
{
ZSTD_compressionParameters const cParams =
2017-08-29 02:25:17 +00:00
ZSTD_getCParamsFromCCtxParams(cctx->appliedParams, 0, 0);
return MIN (ZSTD_BLOCKSIZE_MAX, 1 << cParams.windowLog);
2016-07-18 14:52:10 +00:00
}
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2016-01-09 00:08:23 +00:00
}
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
2016-01-26 02:14:20 +00:00
static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
/* input becomes current prefix */
zc->lowLimit = zc->dictLimit;
zc->dictLimit = (U32)(zc->nextSrc - zc->base);
zc->dictBase = zc->base;
zc->base += ip - zc->nextSrc;
zc->nextToUpdate = zc->dictLimit;
2017-08-18 18:43:31 +00:00
zc->loadedDictEnd = zc->appliedParams.forceWindow ? 0 : (U32)(iend - zc->base);
zc->nextSrc = iend;
2016-07-27 19:05:12 +00:00
if (srcSize <= HASH_READ_SIZE) return 0;
switch(zc->appliedParams.cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
break;
2016-07-12 07:47:31 +00:00
case ZSTD_dfast:
ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
2016-07-12 07:47:31 +00:00
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
if (srcSize >= HASH_READ_SIZE)
ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
break;
case ZSTD_btlazy2:
2016-02-15 06:21:54 +00:00
case ZSTD_btopt:
2017-03-09 19:44:25 +00:00
case ZSTD_btultra:
if (srcSize >= HASH_READ_SIZE)
ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
break;
default:
assert(0); /* not possible : not a valid strategy id */
}
[zstdmt] Fix MSAN failure with ZSTD_p_forceWindow Reproduction steps: ``` make zstreamtest CC=clang CFLAGS="-O3 -g -fsanitize=memory -fsanitize-memory-track-origins" ./zstreamtest -vv -t4178 -i4178 -s4531 ``` How to get to the error in gdb (may be a more efficient way): * 2 breaks at zstd_compress.c:2418 -- in ZSTD_compressContinue_internal() * 2 breaks at zstd_compress.c:2276 -- in ZSTD_compressBlock_internal() * 1 break at zstd_compress.c:1547 Why the error occurred: When `zc->forceWindow == 1`, after calling `ZSTD_loadDictionaryContent()` we have `zc->loadedDictEnd == zc->nextToUpdate == 0`. But, we've really loaded up to `iend` into the dictionary. Then in `ZSTD_compressBlock_internal()` we see that `current > zc->nextToUpdate + 384`, so we load the last 192 bytes a second time. In this case the bytes we are loading are a block of all 0s, starting in the previous block. So when we are loading the last 192 bytes, we find a `match` in the future, 183 bytes beyond `ip`. Since the block is all 0s, the match extends to the end of the block. But in `ZSTD_count()` we only check that `pIn < pInLoopLimit`, but since `pMatch > pIn`, `pMatch` eventually points past the end of the buffer, causing the MSAN failure. The fix: The line changed sets sets `zc->nextToUpdate` to the end of the dictionary. This is the behavior that existed before `ZSTD_p_forceWindow` was introduced. This fixes the exposing test case. Since the code doesn't fail without `zc->forceWindow`, it makes sense that this works. I've run the command `./zstreamtest -T2mn` 64 times without failures. CI should also verify nothing obvious broke.
2017-02-14 02:27:34 +00:00
zc->nextToUpdate = (U32)(iend - zc->base);
return 0;
}
2015-10-22 14:31:46 +00:00
/* Dictionaries that assign zero probability to symbols that show up causes problems
when FSE encoding. Refuse dictionaries that assign zero probability to symbols
that we may encounter during compression.
NOTE: This behavior is not standard and could be improved in the future. */
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
U32 s;
if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
for (s = 0; s <= maxSymbolValue; ++s) {
if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
}
return 0;
}
2016-01-26 02:14:20 +00:00
/* Dictionary format :
* See :
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
*/
/*! ZSTD_loadZstdDictionary() :
* @return : 0, or an error code
* assumptions : magic number supposed already checked
* dictSize supposed > 8
*/
static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2016-01-26 02:14:20 +00:00
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff;
ZSTD_STATIC_ASSERT(sizeof(cctx->entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
2016-05-31 16:13:56 +00:00
dictPtr += 4; /* skip magic number */
cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
dictPtr += 4;
{ size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)cctx->entropy->hufCTable, 255, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ unsigned offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
CHECK_E( FSE_buildCTable_wksp(cctx->entropy->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
dictionary_corrupted);
dictPtr += offcodeHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ short matchlengthNCount[MaxML+1];
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
CHECK_E( FSE_buildCTable_wksp(cctx->entropy->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ short litlengthNCount[MaxLL+1];
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
CHECK_E( FSE_buildCTable_wksp(cctx->entropy->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
dictionary_corrupted);
dictPtr += litlengthHeaderSize;
2016-05-31 16:13:56 +00:00
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
cctx->seqStore.rep[0] = MEM_readLE32(dictPtr+0);
cctx->seqStore.rep[1] = MEM_readLE32(dictPtr+4);
cctx->seqStore.rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
U32 offcodeMax = MaxOff;
if (dictContentSize <= ((U32)-1) - 128 KB) {
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
2016-10-24 21:11:27 +00:00
}
/* All offset values <= dictContentSize + 128 KB must be representable */
CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
/* All repCodes must be <= dictContentSize and != 0*/
{ U32 u;
for (u=0; u<3; u++) {
if (cctx->seqStore.rep[u] == 0) return ERROR(dictionary_corrupted);
if (cctx->seqStore.rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
} }
cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid;
cctx->entropy->offcode_repeatMode = FSE_repeat_valid;
cctx->entropy->matchlength_repeatMode = FSE_repeat_valid;
cctx->entropy->litlength_repeatMode = FSE_repeat_valid;
return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
}
2016-01-26 02:14:20 +00:00
}
2016-03-15 00:24:33 +00:00
/** ZSTD_compress_insertDictionary() :
* @return : 0, or an error code */
static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
ZSTD_dictMode_e dictMode)
2016-01-26 02:14:20 +00:00
{
DEBUGLOG(5, "ZSTD_compress_insertDictionary");
2016-05-29 03:01:04 +00:00
if ((dict==NULL) || (dictSize<=8)) return 0;
2016-01-26 14:58:49 +00:00
/* dict restricted modes */
if (dictMode==ZSTD_dm_rawContent)
return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
2016-03-15 00:24:33 +00:00
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
if (dictMode == ZSTD_dm_auto) {
DEBUGLOG(5, "raw content dictionary detected");
return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
}
if (dictMode == ZSTD_dm_fullDict)
return ERROR(dictionary_wrong);
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
2016-01-07 14:35:18 +00:00
}
2016-04-01 13:48:48 +00:00
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2016-01-26 15:31:22 +00:00
const void* dict, size_t dictSize,
ZSTD_dictMode_e dictMode,
const ZSTD_CDict* cdict,
2017-08-18 18:20:08 +00:00
ZSTD_CCtx_params params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
2015-10-22 14:31:46 +00:00
{
DEBUGLOG(4, "ZSTD_compressBegin_internal");
2017-07-03 22:52:19 +00:00
DEBUGLOG(4, "dict ? %s", dict ? "dict" : (cdict ? "cdict" : "none"));
DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
if (cdict && cdict->dictContentSize>0) {
return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
params.fParams, pledgedSrcSize,
zbuff);
}
CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff) );
return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
2015-10-22 14:31:46 +00:00
}
size_t ZSTD_compressBegin_advanced_internal(
2017-08-21 05:55:07 +00:00
ZSTD_CCtx* cctx,
2017-08-18 23:17:24 +00:00
const void* dict, size_t dictSize,
ZSTD_dictMode_e dictMode,
2017-08-18 23:17:24 +00:00
ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize)
{
/* compression parameters verification and optimization */
CHECK_F( ZSTD_checkCParams(params.cParams) );
return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictMode, NULL,
2017-08-18 23:17:24 +00:00
params, pledgedSrcSize,
ZSTDb_not_buffered);
}
2015-10-25 13:06:35 +00:00
2016-04-01 13:48:48 +00:00
/*! ZSTD_compressBegin_advanced() :
* @return : 0, or an error code */
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2016-04-01 13:48:48 +00:00
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
2016-04-01 13:48:48 +00:00
{
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dm_auto,
cctxParams,
pledgedSrcSize);
2016-04-01 13:48:48 +00:00
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2016-01-26 02:14:20 +00:00
{
2016-06-27 13:28:45 +00:00
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
ZSTD_CCtx_params const cctxParams =
2017-08-23 19:30:47 +00:00
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
2017-08-23 19:30:47 +00:00
cctxParams, 0, ZSTDb_not_buffered);
2016-01-26 15:31:22 +00:00
}
2015-11-25 13:42:45 +00:00
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2015-11-25 13:42:45 +00:00
{
return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2015-10-25 13:06:35 +00:00
}
2016-07-28 13:29:08 +00:00
/*! ZSTD_writeEpilogue() :
* Ends a frame.
2015-11-25 13:42:45 +00:00
* @return : nb of bytes written into dst (or an error code) */
2016-07-28 13:29:08 +00:00
static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{
2016-07-27 22:55:43 +00:00
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
2016-04-12 13:52:33 +00:00
size_t fhSize = 0;
DEBUGLOG(5, "ZSTD_writeEpilogue");
if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
/* special case : empty frame */
2016-07-27 22:55:43 +00:00
if (cctx->stage == ZSTDcs_init) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
2016-04-12 13:52:33 +00:00
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
op += fhSize;
2016-07-27 19:05:12 +00:00
cctx->stage = ZSTDcs_ongoing;
2016-01-07 14:35:18 +00:00
}
2016-07-27 22:55:43 +00:00
if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, cBlockHeader24);
op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
}
if (cctx->appliedParams.fParams.checksumFlag) {
2016-07-27 22:55:43 +00:00
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, checksum);
op += 4;
2016-05-31 16:13:56 +00:00
}
2016-07-27 19:05:12 +00:00
cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
2016-07-27 22:55:43 +00:00
return op-ostart;
}
2016-07-28 13:29:08 +00:00
size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
size_t endResult;
size_t const cSize = ZSTD_compressContinue_internal(cctx,
dst, dstCapacity, src, srcSize,
1 /* frame mode */, 1 /* last chunk */);
2016-07-28 13:29:08 +00:00
if (ZSTD_isError(cSize)) return cSize;
endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
if (ZSTD_isError(endResult)) return endResult;
if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
DEBUGLOG(5, "end of frame : controlling src size");
if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
DEBUGLOG(5, "error : pledgedSrcSize = %u, while realSrcSize = %u",
(U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
return ERROR(srcSize_wrong);
} }
2016-07-28 13:29:08 +00:00
return cSize + endResult;
}
2016-07-27 23:25:46 +00:00
static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2016-03-15 00:24:33 +00:00
void* dst, size_t dstCapacity,
2015-11-25 13:42:45 +00:00
const void* src, size_t srcSize,
2015-12-18 00:26:48 +00:00
const void* dict,size_t dictSize,
2015-11-25 13:42:45 +00:00
ZSTD_parameters params)
2015-10-22 14:31:46 +00:00
{
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
return ZSTD_compress_advanced_internal(cctx,
dst, dstCapacity,
src, srcSize,
dict, dictSize,
cctxParams);
2015-10-22 14:31:46 +00:00
}
size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params)
{
CHECK_F(ZSTD_checkCParams(params.cParams));
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
}
2017-08-18 23:17:24 +00:00
/* Internal */
size_t ZSTD_compress_advanced_internal(
2017-08-21 05:55:07 +00:00
ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_CCtx_params params)
2017-08-18 23:17:24 +00:00
{
CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
2017-08-18 23:17:24 +00:00
params, srcSize, ZSTDb_not_buffered) );
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
const void* dict, size_t dictSize, int compressionLevel)
2015-12-18 00:26:48 +00:00
{
2016-11-03 22:52:01 +00:00
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
params.fParams.contentSizeFlag = 1;
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2015-12-18 00:26:48 +00:00
}
2016-03-15 00:24:33 +00:00
size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2015-10-25 13:06:35 +00:00
{
return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2015-10-25 13:06:35 +00:00
}
2016-03-15 00:24:33 +00:00
size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2015-10-22 14:31:46 +00:00
{
2015-10-29 21:02:40 +00:00
size_t result;
ZSTD_CCtx ctxBody;
2015-10-29 17:41:45 +00:00
memset(&ctxBody, 0, sizeof(ctxBody));
ctxBody.customMem = ZSTD_defaultCMem;
2016-03-15 00:24:33 +00:00
result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
2015-10-29 21:02:40 +00:00
return result;
2015-10-22 14:31:46 +00:00
}
2015-12-17 22:50:15 +00:00
/* ===== Dictionary API ===== */
/*! ZSTD_estimateCDictSize_advanced() :
* Estimate amount of memory that will be needed to create a dictionary with following arguments */
2017-08-29 18:55:02 +00:00
size_t ZSTD_estimateCDictSize_advanced(
size_t dictSize, ZSTD_compressionParameters cParams,
ZSTD_dictLoadMethod_e dictLoadMethod)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
DEBUGLOG(5, "CCtx estimate : %u",
(U32)ZSTD_estimateCCtxSize_advanced_usingCParams(cParams));
return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_advanced_usingCParams(cParams)
2017-08-29 18:55:02 +00:00
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
}
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
}
size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
2016-12-23 21:25:03 +00:00
return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
2017-05-26 01:05:49 +00:00
static size_t ZSTD_initCDict_internal(
ZSTD_CDict* cdict,
const void* dictBuffer, size_t dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictMode_e dictMode,
2017-05-26 01:05:49 +00:00
ZSTD_compressionParameters cParams)
{
DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
2017-08-29 18:55:02 +00:00
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
2017-05-26 01:05:49 +00:00
cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
cdict->dictBuffer = internalBuffer;
cdict->dictContent = internalBuffer;
if (!internalBuffer) return ERROR(memory_allocation);
memcpy(internalBuffer, dictBuffer, dictSize);
2017-05-26 01:05:49 +00:00
}
cdict->dictContentSize = dictSize;
{ ZSTD_CCtx_params cctxParams = cdict->refContext->requestedParams;
cctxParams.cParams = cParams;
CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
cdict->dictContent, dictSize, dictMode,
NULL,
cctxParams, ZSTD_CONTENTSIZE_UNKNOWN,
ZSTDb_not_buffered) );
2017-05-26 01:05:49 +00:00
}
return 0;
}
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
{
DEBUGLOG(5, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
{ ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2016-12-21 15:20:11 +00:00
if (!cdict || !cctx) {
ZSTD_free(cdict, customMem);
2017-02-21 17:59:56 +00:00
ZSTD_freeCCtx(cctx);
return NULL;
}
2017-05-26 01:05:49 +00:00
cdict->refContext = cctx;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize,
2017-08-29 18:55:02 +00:00
dictLoadMethod, dictMode,
2017-05-26 01:05:49 +00:00
cParams) )) {
ZSTD_freeCDict(cdict);
return NULL;
}
2016-12-21 15:20:11 +00:00
return cdict;
}
}
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dlm_byCopy, ZSTD_dm_auto,
cParams, ZSTD_defaultCMem);
2016-12-21 15:20:11 +00:00
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dlm_byRef, ZSTD_dm_auto,
cParams, ZSTD_defaultCMem);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
2016-09-21 14:46:08 +00:00
{ ZSTD_customMem const cMem = cdict->refContext->customMem;
ZSTD_freeCCtx(cdict->refContext);
2016-12-21 15:44:35 +00:00
ZSTD_free(cdict->dictBuffer, cMem);
ZSTD_free(cdict, cMem);
return 0;
}
}
2017-05-26 01:05:49 +00:00
/*! ZSTD_initStaticCDict_advanced() :
* Generate a digested dictionary in provided memory area.
* workspace: The memory area to emplace the dictionary into.
* Provided pointer must 8-bytes aligned.
* It must outlive dictionary usage.
* workspaceSize: Use ZSTD_estimateCDictSize()
* to determine how large workspace must be.
* cParams : use ZSTD_getCParams() to transform a compression level
* into its relevants cParams.
* @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
* Note : there is no corresponding "free" function.
* Since workspace was allocated externally, it must be freed externally.
*/
ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictMode_e dictMode,
2017-05-26 01:05:49 +00:00
ZSTD_compressionParameters cParams)
{
size_t const cctxSize = ZSTD_estimateCCtxSize_advanced_usingCParams(cParams);
2017-08-29 18:55:02 +00:00
size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
2017-05-26 01:05:49 +00:00
+ cctxSize;
ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
void* ptr;
DEBUGLOG(5, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
2017-05-26 01:05:49 +00:00
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
DEBUGLOG(5, "(workspaceSize < neededSize) : (%u < %u) => %u",
2017-05-26 01:05:49 +00:00
(U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
if (workspaceSize < neededSize) return NULL;
2017-08-29 18:55:02 +00:00
if (dictLoadMethod == ZSTD_dlm_byCopy) {
2017-05-26 01:05:49 +00:00
memcpy(cdict+1, dict, dictSize);
dict = cdict+1;
ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
} else {
ptr = cdict+1;
}
cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
2017-08-29 18:55:02 +00:00
ZSTD_dlm_byRef, dictMode,
cParams) ))
2017-05-26 01:05:49 +00:00
return NULL;
return cdict;
}
2017-08-26 00:58:28 +00:00
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) {
return cdict->refContext->appliedParams.cParams;
}
/* ZSTD_compressBegin_usingCDict_advanced() :
* cdict must be != NULL */
size_t ZSTD_compressBegin_usingCDict_advanced(
ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
2016-09-15 12:54:07 +00:00
{
if (cdict==NULL) return ERROR(dictionary_wrong);
2017-08-26 00:58:28 +00:00
{ ZSTD_CCtx_params params = cctx->requestedParams;
params.cParams = ZSTD_getCParamsFromCDict(cdict);
params.fParams = fParams;
DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
return ZSTD_compressBegin_internal(cctx,
NULL, 0, ZSTD_dm_auto,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
}
2016-09-15 12:54:07 +00:00
}
/* ZSTD_compressBegin_usingCDict() :
* pledgedSrcSize=0 means "unknown"
* if pledgedSrcSize>0, it will enable contentSizeFlag */
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
}
size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
{
CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
/*! ZSTD_compress_usingCDict() :
* Compression using a digested Dictionary.
* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
* Note that compression parameters are decided at CDict creation time
* while frame parameters are hardcoded */
2016-09-15 12:54:07 +00:00
size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
}
2016-08-12 11:04:27 +00:00
/* ******************************************************************
* Streaming
********************************************************************/
2016-08-11 23:20:36 +00:00
ZSTD_CStream* ZSTD_createCStream(void)
{
return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
2016-08-11 23:20:36 +00:00
}
ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
{
return ZSTD_initStaticCCtx(workspace, workspaceSize);
}
2016-08-11 23:20:36 +00:00
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
{ /* CStream and CCtx are now same object */
return ZSTD_createCCtx_advanced(customMem);
2016-08-11 23:20:36 +00:00
}
size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
{
return ZSTD_freeCCtx(zcs); /* same object */
2016-08-11 23:20:36 +00:00
}
2016-08-12 11:04:27 +00:00
/*====== Initialization ======*/
size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2016-08-11 23:20:36 +00:00
size_t ZSTD_CStreamOutSize(void)
{
return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
}
2016-08-11 23:20:36 +00:00
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
2017-06-28 00:09:12 +00:00
const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
const ZSTD_CDict* cdict,
const ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
2016-09-15 12:54:07 +00:00
{
DEBUGLOG(4, "ZSTD_resetCStream_internal");
2017-06-27 22:49:12 +00:00
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
CHECK_F( ZSTD_compressBegin_internal(zcs,
2017-06-28 00:09:12 +00:00
dict, dictSize, dictMode,
2017-06-27 22:49:12 +00:00
cdict,
params, pledgedSrcSize,
ZSTDb_buffered) );
2016-09-15 12:54:07 +00:00
zcs->inToCompress = 0;
zcs->inBuffPos = 0;
zcs->inBuffTarget = zcs->blockSize;
zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
zcs->streamStage = zcss_load;
2016-09-15 12:54:07 +00:00
zcs->frameEnded = 0;
return 0; /* ready to go */
}
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params params = zcs->requestedParams;
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
2017-08-29 02:25:17 +00:00
params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0);
DEBUGLOG(5, "ZSTD_resetCStream");
return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
}
/*! ZSTD_initCStream_internal() :
2017-06-27 22:49:12 +00:00
* Note : not static, but hidden (not exposed). Used by zstdmt_compress.c
* Assumption 1 : params are valid
* Assumption 2 : either dict, or cdict, is defined, not both */
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
2016-08-11 23:20:36 +00:00
{
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2016-08-11 23:20:36 +00:00
if (dict && dictSize >= 8) {
DEBUGLOG(5, "loading dictionary of size %u", (U32)dictSize);
if (zcs->staticSize) { /* static CCtx : never uses malloc */
/* incompatible with internal cdict creation */
return ERROR(memory_allocation);
2016-08-11 23:20:36 +00:00
}
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byCopy, ZSTD_dm_auto,
params.cParams, zcs->customMem);
zcs->cdict = zcs->cdictLocal;
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
} else {
if (cdict) {
2017-08-26 00:58:28 +00:00
params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict */
}
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = NULL;
zcs->cdict = cdict;
2016-08-11 23:20:36 +00:00
}
2017-08-23 19:30:47 +00:00
params.compressionLevel = ZSTD_CLEVEL_CUSTOM;
zcs->requestedParams = params;
2017-08-18 18:43:31 +00:00
return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
}
/* ZSTD_initCStream_usingCDict_advanced() :
* same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
const ZSTD_CDict* cdict,
ZSTD_frameParameters fParams,
unsigned long long pledgedSrcSize)
{ /* cannot handle NULL cdict (does not know what to do) */
if (!cdict) return ERROR(dictionary_wrong);
2017-08-26 00:58:28 +00:00
{ ZSTD_CCtx_params params = zcs->requestedParams;
params.cParams = ZSTD_getCParamsFromCDict(cdict);
params.fParams = fParams;
return ZSTD_initCStream_internal(zcs,
NULL, 0, cdict,
params, pledgedSrcSize);
2016-08-11 23:20:36 +00:00
}
}
/* note : cdict must outlive compression session */
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ };
return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0); /* note : will check that cdict != NULL */
2016-08-11 23:20:36 +00:00
}
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
CHECK_F( ZSTD_checkCParams(params.cParams) );
2017-08-22 01:10:44 +00:00
return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, pledgedSrcSize);
}
2016-08-11 23:20:36 +00:00
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
2017-08-22 01:10:44 +00:00
return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, 0);
2016-08-11 23:20:36 +00:00
}
size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
{
2017-08-23 19:30:47 +00:00
ZSTD_CCtx_params cctxParams;
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
2017-08-23 19:30:47 +00:00
cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
cctxParams.fParams.contentSizeFlag = (pledgedSrcSize>0);
2017-08-23 19:30:47 +00:00
return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize);
}
2016-08-11 23:20:36 +00:00
size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
{
return ZSTD_initCStream_srcSize(zcs, compressionLevel, 0);
2016-08-22 22:30:31 +00:00
}
2016-08-11 23:20:36 +00:00
2016-08-12 11:04:27 +00:00
/*====== Compression ======*/
2016-08-11 23:20:36 +00:00
MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
2016-08-11 23:20:36 +00:00
{
size_t const length = MIN(dstCapacity, srcSize);
2017-05-31 16:59:22 +00:00
if (length) memcpy(dst, src, length);
2016-08-11 23:20:36 +00:00
return length;
}
/** ZSTD_compressStream_generic():
* internal function for all *compressStream*() variants and *compress_generic()
* @return : hint size for next input */
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective const flushMode)
2016-08-11 23:20:36 +00:00
{
const char* const istart = (const char*)input->src;
const char* const iend = istart + input->size;
const char* ip = istart + input->pos;
char* const ostart = (char*)output->dst;
char* const oend = ostart + output->size;
char* op = ostart + output->pos;
2016-08-11 23:20:36 +00:00
U32 someMoreWork = 1;
/* check expectations */
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode);
assert(zcs->inBuff != NULL);
2017-06-22 00:25:01 +00:00
assert(zcs->inBuffSize>0);
assert(zcs->outBuff!= NULL);
2017-06-22 00:25:01 +00:00
assert(zcs->outBuffSize>0);
assert(output->pos <= output->size);
assert(input->pos <= input->size);
2016-08-11 23:20:36 +00:00
while (someMoreWork) {
switch(zcs->streamStage)
2016-08-11 23:20:36 +00:00
{
case zcss_init:
/* call ZSTD_initCStream() first ! */
return ERROR(init_missing);
2016-08-11 23:20:36 +00:00
case zcss_load:
if ( (flushMode == ZSTD_e_end)
2017-07-03 22:52:19 +00:00
&& ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */
&& (zcs->inBuffPos == 0) ) {
/* shortcut to compression pass directly into output buffer */
size_t const cSize = ZSTD_compressEnd(zcs,
op, oend-op, ip, iend-ip);
DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
if (ZSTD_isError(cSize)) return cSize;
ip = iend;
op += cSize;
zcs->frameEnded = 1;
ZSTD_startNewCompression(zcs);
someMoreWork = 0; break;
2017-07-03 22:52:19 +00:00
}
/* complete loading into inBuffer */
2016-08-11 23:20:36 +00:00
{ size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
size_t const loaded = ZSTD_limitCopy(
zcs->inBuff + zcs->inBuffPos, toLoad,
ip, iend-ip);
2016-08-11 23:20:36 +00:00
zcs->inBuffPos += loaded;
ip += loaded;
if ( (flushMode == ZSTD_e_continue)
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
/* not enough input to fill full block : stop here */
someMoreWork = 0; break;
}
if ( (flushMode == ZSTD_e_flush)
&& (zcs->inBuffPos == zcs->inToCompress) ) {
/* empty */
someMoreWork = 0; break;
}
}
2016-08-11 23:20:36 +00:00
/* compress current block (note : this stage cannot be stopped in the middle) */
DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
2016-08-11 23:20:36 +00:00
{ void* cDst;
size_t cSize;
size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
size_t oSize = oend-op;
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
2016-08-11 23:20:36 +00:00
if (oSize >= ZSTD_compressBound(iSize))
cDst = op; /* compress into output buffer, to skip flush stage */
2016-08-11 23:20:36 +00:00
else
cDst = zcs->outBuff, oSize = zcs->outBuffSize;
cSize = lastBlock ?
ZSTD_compressEnd(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize) :
ZSTD_compressContinue(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize);
2016-08-11 23:20:36 +00:00
if (ZSTD_isError(cSize)) return cSize;
zcs->frameEnded = lastBlock;
2016-08-11 23:20:36 +00:00
/* prepare next block */
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
if (zcs->inBuffTarget > zcs->inBuffSize)
zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
(U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
if (!lastBlock)
assert(zcs->inBuffTarget <= zcs->inBuffSize);
2016-08-11 23:20:36 +00:00
zcs->inToCompress = zcs->inBuffPos;
if (cDst == op) { /* no need to flush */
op += cSize;
if (zcs->frameEnded) {
DEBUGLOG(5, "Frame completed directly in outBuffer");
someMoreWork = 0;
2017-06-16 21:00:46 +00:00
ZSTD_startNewCompression(zcs);
}
break;
}
2016-08-11 23:20:36 +00:00
zcs->outBuffContentSize = cSize;
zcs->outBuffFlushedSize = 0;
zcs->streamStage = zcss_flush; /* pass-through to flush stage */
2016-08-11 23:20:36 +00:00
}
/* fall-through */
2016-08-11 23:20:36 +00:00
case zcss_flush:
DEBUGLOG(5, "flush stage");
2016-08-11 23:20:36 +00:00
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
size_t const flushed = ZSTD_limitCopy(op, oend-op,
zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
(U32)toFlush, (U32)(oend-op), (U32)flushed);
2016-08-11 23:20:36 +00:00
op += flushed;
zcs->outBuffFlushedSize += flushed;
if (toFlush!=flushed) {
/* flush not fully completed, presumably because dst is too small */
assert(op==oend);
someMoreWork = 0;
break;
}
2016-08-11 23:20:36 +00:00
zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
if (zcs->frameEnded) {
DEBUGLOG(5, "Frame completed on flush");
someMoreWork = 0;
2017-06-16 21:00:46 +00:00
ZSTD_startNewCompression(zcs);
break;
}
zcs->streamStage = zcss_load;
2016-08-11 23:20:36 +00:00
break;
}
default: /* impossible */
assert(0);
2016-08-11 23:20:36 +00:00
}
}
input->pos = ip - istart;
output->pos = op - ostart;
2016-08-11 23:20:36 +00:00
if (zcs->frameEnded) return 0;
{ size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
if (hintInSize==0) hintInSize = zcs->blockSize;
return hintInSize;
}
}
2016-08-16 23:39:22 +00:00
size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
/* check conditions */
if (output->pos > output->size) return ERROR(GENERIC);
if (input->pos > input->size) return ERROR(GENERIC);
return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
2016-08-11 23:20:36 +00:00
}
2017-06-06 01:32:48 +00:00
size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp)
{
/* check conditions */
if (output->pos > output->size) return ERROR(GENERIC);
if (input->pos > input->size) return ERROR(GENERIC);
assert(cctx!=NULL);
/* transparent initialization stage */
if (cctx->streamStage == zcss_init) {
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
ZSTD_CCtx_params params = cctx->requestedParams;
2017-08-29 02:25:17 +00:00
params.cParams = ZSTD_getCParamsFromCCtxParams(
cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
#ifdef ZSTD_MULTITHREAD
if (params.nbThreads > 1) {
2017-08-29 23:18:21 +00:00
if (cctx->mtctx == NULL || cctx->appliedParams.nbThreads != params.nbThreads) {
ZSTDMT_freeCCtx(cctx->mtctx);
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
}
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads);
CHECK_F( ZSTDMT_initCStream_internal(
cctx->mtctx,
prefixDict.dict, prefixDict.dictSize, ZSTD_dm_rawContent,
cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
2017-06-12 01:32:36 +00:00
cctx->streamStage = zcss_load;
2017-08-29 23:18:21 +00:00
cctx->appliedParams.nbThreads = params.nbThreads;
} else
#endif
{
CHECK_F( ZSTD_resetCStream_internal(
cctx, prefixDict.dict, prefixDict.dictSize,
prefixDict.dictMode, cctx->cdict, params,
cctx->pledgedSrcSizePlusOne-1) );
2017-06-06 01:32:48 +00:00
} }
/* compression stage */
#ifdef ZSTD_MULTITHREAD
2017-08-29 23:18:21 +00:00
if (cctx->appliedParams.nbThreads > 1) {
2017-06-12 01:32:36 +00:00
size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
DEBUGLOG(5, "ZSTDMT_compressStream_generic : %u", (U32)flushMin);
2017-06-16 21:00:46 +00:00
if ( ZSTD_isError(flushMin)
|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
ZSTD_startNewCompression(cctx);
}
2017-06-12 01:32:36 +00:00
return flushMin;
}
#endif
CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
DEBUGLOG(5, "completed ZSTD_compress_generic");
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
}
size_t ZSTD_compress_generic_simpleArgs (
ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos,
ZSTD_EndDirective endOp)
{
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
ZSTD_inBuffer input = { src, srcSize, *srcPos };
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */
size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
2016-08-11 23:20:36 +00:00
}
2016-08-12 11:04:27 +00:00
/*====== Finalize ======*/
2016-08-11 23:20:36 +00:00
/*! ZSTD_flushStream() :
* @return : amount of data remaining to flush */
2016-08-16 23:39:22 +00:00
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2016-08-11 23:20:36 +00:00
{
2017-05-31 16:59:22 +00:00
ZSTD_inBuffer input = { NULL, 0, 0 };
if (output->pos > output->size) return ERROR(GENERIC);
CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
2016-08-11 23:20:36 +00:00
}
2016-08-16 23:39:22 +00:00
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2016-08-11 23:20:36 +00:00
{
2017-05-31 16:59:22 +00:00
ZSTD_inBuffer input = { NULL, 0, 0 };
if (output->pos > output->size) return ERROR(GENERIC);
CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize;
DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u",
(unsigned)toFlush);
return toFlush;
}
2016-08-11 23:20:36 +00:00
}
2016-02-10 12:37:52 +00:00
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_MAX_CLEVEL 22
int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - guarantees a monotonically increasing memory budget */
2016-04-09 18:32:00 +00:00
/* W, C, H, S, L, TL, strat */
2016-08-26 18:02:49 +00:00
{ 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
2016-07-13 12:56:24 +00:00
{ 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
{ 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
{ 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3 */
{ 20, 17, 18, 1, 5, 16, ZSTD_dfast }, /* level 4 */
{ 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
{ 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
{ 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
2016-08-26 18:02:49 +00:00
{ 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
{ 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
2016-07-13 12:56:24 +00:00
{ 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
{ 22, 21, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
2016-07-13 12:56:24 +00:00
{ 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
{ 23, 22, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
{ 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */
2016-08-26 18:02:49 +00:00
{ 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
2017-05-11 00:48:42 +00:00
{ 25, 25, 23, 7, 3, 64, ZSTD_btultra }, /* level 20 */
{ 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */
2017-05-11 00:48:42 +00:00
{ 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
2016-08-26 18:02:49 +00:00
{ 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
2016-08-24 17:42:15 +00:00
{ 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
2016-08-24 12:22:26 +00:00
{ 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
{ 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
{ 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
{ 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
{ 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
{ 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
{ 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
{ 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
{ 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
2016-04-08 10:36:19 +00:00
{ 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
2016-08-24 12:22:26 +00:00
{ 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
{ 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
2016-04-08 10:36:19 +00:00
{ 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
{ 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
2017-05-11 00:48:42 +00:00
{ 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/
{ 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/
{ 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
2016-07-22 12:36:46 +00:00
{ 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
{ 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
{ 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
{ 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
{ 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
{ 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
{ 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
{ 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
{ 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
{ 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
{ 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
{ 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
{ 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
{ 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
2017-05-11 00:48:42 +00:00
{ 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
{ 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
{ 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
{ 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
2016-07-17 14:21:37 +00:00
{ 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
{ 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
{ 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
{ 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
{ 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
{ 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
{ 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
{ 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
{ 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
{ 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
{ 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
{ 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
{ 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
{ 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
{ 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
{ 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
2017-05-11 00:48:42 +00:00
{ 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
{ 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
{ 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
},
};
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
/* This function just controls
* the monotonic memory budget increase of ZSTD_defaultCParameters[0].
* Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
*/
MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
{
int level;
for (level=1; level<ZSTD_maxCLevel(); level++) {
ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
assert(c1.windowLog <= c2.windowLog);
# define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
}
}
#endif
/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
* Size values are optional, provide 0 if not known or unused */
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
{
size_t const addedSize = srcSizeHint ? 0 : 500;
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
static int g_monotonicTest = 1;
if (g_monotonicTest) {
ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
g_monotonicTest=0;
}
#endif
if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
2017-06-21 00:44:55 +00:00
{ ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
2017-08-29 02:25:17 +00:00
}
/*! ZSTD_getParams() :
2016-07-12 11:42:10 +00:00
* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
* All fields of `ZSTD_frameParameters` are set to default (0) */
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
ZSTD_parameters params;
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
memset(&params, 0, sizeof(params));
params.cParams = cParams;
return params;
}