zstd/lib/compress/zstd_compress.c

3552 lines
151 KiB
C
Raw Normal View History

2016-08-30 17:04:33 +00:00
/**
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
2015-10-22 14:31:46 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2016-02-03 01:46:46 +00:00
* Dependencies
2015-10-22 14:31:46 +00:00
***************************************/
#include <string.h> /* memset */
2015-11-11 20:38:21 +00:00
#include "mem.h"
2016-08-11 23:20:36 +00:00
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
#include "zstd_internal.h" /* includes zstd.h */
2015-10-22 14:31:46 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Constants
2015-10-22 14:31:46 +00:00
***************************************/
2016-03-19 14:18:42 +00:00
static const U32 g_searchStrength = 8; /* control skip over incompressible data */
2016-07-27 19:05:12 +00:00
#define HASH_READ_SIZE 8
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
2015-10-22 14:31:46 +00:00
/* entropy tables always have same size */
static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
2015-10-22 14:31:46 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2016-01-23 18:28:41 +00:00
* Helper functions
***************************************/
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG==1)
# include <assert.h>
#else
# define assert(condition) ((void)0)
#endif
2016-12-11 23:25:07 +00:00
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
size_t ZSTD_compressBound(size_t srcSize) {
size_t const lowLimit = 256 KB;
size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */
return srcSize + (srcSize >> 8) + margin;
}
2016-01-23 18:28:41 +00:00
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Sequence storage
***************************************/
static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
ssPtr->lit = ssPtr->litStart;
2016-07-29 22:55:13 +00:00
ssPtr->sequences = ssPtr->sequencesStart;
2016-04-07 15:19:00 +00:00
ssPtr->longLengthID = 0;
2015-11-11 20:38:21 +00:00
}
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Context memory management
2015-10-22 14:31:46 +00:00
***************************************/
2016-12-23 21:25:03 +00:00
struct ZSTD_CCtx_s {
const BYTE* nextSrc; /* next block here to continue on current prefix */
2015-10-22 15:55:40 +00:00
const BYTE* base; /* All regular indexes relative to this position */
const BYTE* dictBase; /* extDict indexes relative to this position */
2015-10-22 14:31:46 +00:00
U32 dictLimit; /* below that point, need extDict */
2015-10-22 15:55:40 +00:00
U32 lowLimit; /* below that point, no more data */
2015-10-22 14:31:46 +00:00
U32 nextToUpdate; /* index from which to continue dictionary update */
2016-02-19 09:09:35 +00:00
U32 nextToUpdate3; /* index from which to continue dictionary update */
2016-03-23 14:53:38 +00:00
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32 loadedDictEnd; /* index of end of dictionary */
U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
2016-07-27 19:05:12 +00:00
ZSTD_compressionStage_e stage;
2016-06-13 23:49:25 +00:00
U32 rep[ZSTD_REP_NUM];
2017-01-20 01:33:37 +00:00
U32 repToConfirm[ZSTD_REP_NUM];
2016-05-29 03:01:04 +00:00
U32 dictID;
ZSTD_parameters params;
2015-10-29 17:41:45 +00:00
void* workSpace;
size_t workSpaceSize;
size_t blockSize;
U64 frameContentSize;
U64 consumedSrcSize;
2016-05-31 16:13:56 +00:00
XXH64_state_t xxhState;
ZSTD_customMem customMem;
2016-01-07 14:35:18 +00:00
2015-10-29 17:41:45 +00:00
seqStore_t seqStore; /* sequences storage ptrs */
2015-10-25 13:06:35 +00:00
U32* hashTable;
2016-02-19 09:09:35 +00:00
U32* hashTable3;
2016-04-04 11:49:18 +00:00
U32* chainTable;
HUF_repeat hufCTable_repeatMode;
HUF_CElt* hufCTable;
U32 fseCTables_ready;
FSE_CTable* offcodeCTable;
FSE_CTable* matchlengthCTable;
FSE_CTable* litlengthCTable;
unsigned* entropyScratchSpace;
2015-10-22 14:31:46 +00:00
};
ZSTD_CCtx* ZSTD_createCCtx(void)
2015-10-22 14:31:46 +00:00
{
return ZSTD_createCCtx_advanced(defaultCustomMem);
2016-05-23 13:49:09 +00:00
}
ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
{
2016-07-14 14:52:45 +00:00
ZSTD_CCtx* cctx;
2016-05-23 14:24:52 +00:00
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree) return NULL;
2016-05-23 13:49:09 +00:00
cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
2016-07-14 14:52:45 +00:00
if (!cctx) return NULL;
memset(cctx, 0, sizeof(ZSTD_CCtx));
cctx->customMem = customMem;
2016-07-14 14:52:45 +00:00
return cctx;
2015-10-22 14:31:46 +00:00
}
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
2015-10-25 13:06:35 +00:00
{
if (cctx==NULL) return 0; /* support free on NULL */
ZSTD_free(cctx->workSpace, cctx->customMem);
ZSTD_free(cctx, cctx->customMem);
return 0; /* reserved as a potential error code in the future */
2015-10-25 13:06:35 +00:00
}
2015-10-22 14:31:46 +00:00
2016-08-22 23:18:06 +00:00
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
2016-07-11 01:12:17 +00:00
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
2016-07-11 01:12:17 +00:00
return sizeof(*cctx) + cctx->workSpaceSize;
}
size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
{
switch(param)
{
case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
default: return ERROR(parameter_unknown);
}
}
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
2016-02-11 23:07:30 +00:00
{
return &(ctx->seqStore);
2016-02-11 23:07:30 +00:00
}
static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx)
{
return cctx->params;
}
2015-11-04 11:05:27 +00:00
/** ZSTD_checkParams() :
ensure param values remain within authorized range.
@return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{
2016-09-07 12:54:23 +00:00
# define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
2016-04-04 11:49:18 +00:00
CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
2016-10-25 10:25:07 +00:00
if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
return 0;
}
2016-02-10 12:37:52 +00:00
2016-12-11 23:47:30 +00:00
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
return hashLog - btScale;
}
/** ZSTD_adjustCParams() :
optimize `cPar` for a given input (`srcSize` and `dictSize`).
mostly downsizing to reduce memory consumption and initialization.
Both `srcSize` and `dictSize` are optional (use 0 if unknown),
but if both are 0, no optimization can be done.
Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
{
if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
2015-11-04 11:05:27 +00:00
2016-03-19 17:08:32 +00:00
/* resize params, to use less memory when necessary */
2016-03-30 18:06:26 +00:00
{ U32 const minSrcSize = (srcSize==0) ? 500 : 0;
U64 const rSize = srcSize + dictSize + minSrcSize;
2016-04-04 12:53:16 +00:00
if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
} }
if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
2016-12-11 23:47:30 +00:00
{ U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
}
2016-03-19 16:18:00 +00:00
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
return cPar;
2015-11-04 11:05:27 +00:00
}
2016-07-14 15:05:38 +00:00
size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
2016-03-19 15:09:09 +00:00
{
2016-07-27 19:05:12 +00:00
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
2016-07-11 01:12:17 +00:00
2016-07-27 19:05:12 +00:00
size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
size_t const hSize = ((size_t)1) << cParams.hashLog;
U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
size_t const entropySpace = hufCTable_size + litlengthCTable_size
+ offcodeCTable_size + matchlengthCTable_size
+ entropyScratchSpace_size;
2016-07-27 19:05:12 +00:00
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
2016-07-11 01:12:17 +00:00
size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const neededSpace = entropySpace + tableSpace + tokenSpace
2016-10-25 10:25:07 +00:00
+ (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
2016-07-11 01:12:17 +00:00
return sizeof(ZSTD_CCtx) + neededSpace;
}
static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
{
return (param1.cParams.hashLog == param2.cParams.hashLog)
& (param1.cParams.chainLog == param2.cParams.chainLog)
2016-09-06 12:30:57 +00:00
& (param1.cParams.strategy == param2.cParams.strategy)
& ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
}
/*! ZSTD_continueCCtx() :
reuse CCtx without reset (note : requires no dictionary) */
static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
{
U32 const end = (U32)(cctx->nextSrc - cctx->base);
cctx->params = params;
cctx->frameContentSize = frameContentSize;
cctx->consumedSrcSize = 0;
cctx->lowLimit = end;
cctx->dictLimit = end;
cctx->nextToUpdate = end+1;
cctx->stage = ZSTDcs_init;
cctx->dictID = 0;
cctx->loadedDictEnd = 0;
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
2016-09-06 07:54:22 +00:00
cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
XXH64_reset(&cctx->xxhState, 0);
return 0;
}
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
/*! ZSTD_resetCCtx_internal() :
note : `params` must be validated */
static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc,
2016-07-14 20:43:12 +00:00
ZSTD_parameters params, U64 frameContentSize,
2016-09-15 12:54:07 +00:00
ZSTD_compResetPolicy_e const crp)
{
2016-09-15 12:54:07 +00:00
if (crp == ZSTDcrp_continue)
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if (ZSTD_equivalentParams(params, zc->params)) {
zc->fseCTables_ready = 0;
zc->hufCTable_repeatMode = HUF_repeat_none;
return ZSTD_continueCCtx(zc, params, frameContentSize);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
{ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
size_t const hSize = ((size_t)1) << params.cParams.hashLog;
U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const entropySpace = hufCTable_size + litlengthCTable_size
+ offcodeCTable_size + matchlengthCTable_size
+ entropyScratchSpace_size;
size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+ (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
size_t const optSpace = ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optPotentialSpace : 0;
size_t const neededSpace = entropySpace + optSpace + tableSpace + tokenSpace;
if (zc->workSpaceSize < neededSpace) {
zc->workSpaceSize = 0;
ZSTD_free(zc->workSpace, zc->customMem);
zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
if (zc->workSpace == NULL) return ERROR(memory_allocation);
zc->workSpaceSize = neededSpace;
ptr = zc->workSpace;
/* entropy space */
zc->hufCTable = (HUF_CElt*)ptr;
ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */
zc->offcodeCTable = (FSE_CTable*) ptr;
ptr = (char*)ptr + offcodeCTable_size;
zc->matchlengthCTable = (FSE_CTable*) ptr;
ptr = (char*)ptr + matchlengthCTable_size;
zc->litlengthCTable = (FSE_CTable*) ptr;
ptr = (char*)ptr + litlengthCTable_size;
assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
zc->entropyScratchSpace = (unsigned*) ptr;
} }
2016-03-19 15:09:09 +00:00
/* init params */
zc->params = params;
zc->blockSize = blockSize;
zc->frameContentSize = frameContentSize;
zc->consumedSrcSize = 0;
XXH64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
zc->dictID = 0;
zc->loadedDictEnd = 0;
zc->fseCTables_ready = 0;
zc->hufCTable_repeatMode = HUF_repeat_none;
zc->nextToUpdate = 1;
zc->nextSrc = NULL;
zc->base = NULL;
zc->dictBase = NULL;
zc->dictLimit = 0;
zc->lowLimit = 0;
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
zc->hashLog3 = hashLog3;
zc->seqStore.litLengthSum = 0;
/* ensure entropy tables are close together at the beginning */
assert((void*)zc->hufCTable == zc->workSpace);
assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
/* opt parser space */
2016-10-25 10:25:07 +00:00
if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->seqStore.litFreq = (U32*)ptr;
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
}
/* table Space */
if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->hashTable = (U32*)(ptr);
zc->chainTable = zc->hashTable + hSize;
zc->hashTable3 = zc->chainTable + chainSize;
ptr = zc->hashTable3 + h3Size;
/* sequences storage */
zc->seqStore.sequencesStart = (seqDef*)ptr;
ptr = zc->seqStore.sequencesStart + maxNbSeq;
zc->seqStore.llCode = (BYTE*) ptr;
zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
return 0;
}
2015-10-22 14:31:46 +00:00
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;
* do not use with extDict variant ! */
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
int i;
for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
}
2015-10-25 13:06:35 +00:00
/*! ZSTD_copyCCtx_internal() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
* @return : 0, or an error code */
size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx,
ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize)
2016-01-26 14:58:49 +00:00
{
2016-07-27 19:05:12 +00:00
if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
2016-01-26 14:58:49 +00:00
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_parameters params = srcCCtx->params;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
}
2016-01-26 14:58:49 +00:00
/* copy tables */
2016-07-27 19:05:12 +00:00
{ size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->params.cParams.hashLog;
2016-07-27 19:05:12 +00:00
size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
2016-03-19 16:18:00 +00:00
}
2016-01-26 14:58:49 +00:00
2016-05-29 03:01:04 +00:00
/* copy dictionary offsets */
2016-03-19 16:18:00 +00:00
dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
dstCCtx->nextSrc = srcCCtx->nextSrc;
dstCCtx->base = srcCCtx->base;
dstCCtx->dictBase = srcCCtx->dictBase;
dstCCtx->dictLimit = srcCCtx->dictLimit;
dstCCtx->lowLimit = srcCCtx->lowLimit;
dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
2016-05-29 03:01:04 +00:00
dstCCtx->dictID = srcCCtx->dictID;
2016-01-26 14:58:49 +00:00
2016-01-27 23:18:06 +00:00
/* copy entropy tables */
dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
if (srcCCtx->fseCTables_ready) {
memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
2016-01-27 23:18:06 +00:00
}
dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
if (srcCCtx->hufCTable_repeatMode) {
memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
2016-01-26 14:58:49 +00:00
return 0;
}
/*! ZSTD_copyCCtx() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* pledgedSrcSize==0 means "unknown".
* @return : 0, or an error code */
size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
{
ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
fParams.contentSizeFlag = pledgedSrcSize>0;
return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize);
}
2016-01-26 14:58:49 +00:00
2016-03-20 15:20:06 +00:00
/*! ZSTD_reduceTable() :
* reduce table indexes by `reducerValue` */
2016-03-20 15:20:06 +00:00
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
{
2016-03-20 15:20:06 +00:00
U32 u;
for (u=0 ; u < size ; u++) {
if (table[u] < reducerValue) table[u] = 0;
else table[u] -= reducerValue;
}
}
2016-03-20 15:20:06 +00:00
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
{
2016-07-27 19:05:12 +00:00
{ U32 const hSize = 1 << zc->params.cParams.hashLog;
2016-03-20 15:20:06 +00:00
ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
2016-07-27 19:05:12 +00:00
{ U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
2016-04-04 11:49:18 +00:00
ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
2016-03-20 15:20:06 +00:00
2016-07-27 19:05:12 +00:00
{ U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
2016-03-20 15:20:06 +00:00
ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
}
2016-01-28 16:56:33 +00:00
/*-*******************************************************
2015-11-11 20:38:21 +00:00
* Block entropic compression
*********************************************************/
/* See doc/zstd_compression_format.md for detailed format description */
2015-11-11 20:38:21 +00:00
2016-03-15 00:24:33 +00:00
size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
2016-03-15 00:24:33 +00:00
if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
2015-11-11 20:38:21 +00:00
return ZSTD_blockHeaderSize+srcSize;
}
2016-03-15 00:24:33 +00:00
static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
BYTE* const ostart = (BYTE* const)dst;
2016-07-27 19:05:12 +00:00
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
2015-11-11 20:38:21 +00:00
2016-03-15 00:24:33 +00:00
if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
2015-11-11 20:38:21 +00:00
2016-01-23 18:28:41 +00:00
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
2016-07-23 14:31:49 +00:00
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
2016-01-23 18:28:41 +00:00
break;
case 2: /* 2 - 2 - 12 */
2016-07-23 14:31:49 +00:00
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
default: /*note : should not be necessary : flSize is within {1,2,3} */
case 3: /* 2 - 2 - 20 */
2016-07-23 14:31:49 +00:00
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
}
memcpy(ostart + flSize, src, srcSize);
return srcSize + flSize;
2015-11-11 20:38:21 +00:00
}
2016-03-15 00:24:33 +00:00
static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-11 20:38:21 +00:00
{
BYTE* const ostart = (BYTE* const)dst;
2016-07-27 19:05:12 +00:00
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
2016-01-23 18:28:41 +00:00
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
2016-01-23 18:28:41 +00:00
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
2016-07-23 14:31:49 +00:00
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
2016-01-23 18:28:41 +00:00
break;
case 2: /* 2 - 2 - 12 */
2016-07-23 14:31:49 +00:00
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
2016-03-18 11:37:45 +00:00
default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
2016-01-23 18:28:41 +00:00
case 3: /* 2 - 2 - 20 */
2016-07-23 14:31:49 +00:00
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
2016-01-23 18:28:41 +00:00
break;
}
2015-11-11 20:38:21 +00:00
2016-01-23 18:28:41 +00:00
ostart[flSize] = *(const BYTE*)src;
return flSize+1;
2015-11-11 20:38:21 +00:00
}
2016-01-23 18:28:41 +00:00
2016-03-20 00:09:18 +00:00
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
2015-11-11 20:38:21 +00:00
2016-01-26 02:14:20 +00:00
static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
2016-03-15 00:24:33 +00:00
void* dst, size_t dstCapacity,
2015-11-11 20:38:21 +00:00
const void* src, size_t srcSize)
{
2016-03-18 11:37:45 +00:00
size_t const minGain = ZSTD_minGain(srcSize);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
2016-07-27 19:05:12 +00:00
BYTE* const ostart = (BYTE*)dst;
2016-01-25 03:10:46 +00:00
U32 singleStream = srcSize < 256;
2016-07-23 14:31:49 +00:00
symbolEncodingType_e hType = set_compressed;
2016-03-18 11:37:45 +00:00
size_t cLitSize;
2015-11-11 20:38:21 +00:00
2016-03-20 00:09:18 +00:00
/* small ? don't even attempt compression (speed opt) */
# define LITERAL_NOENTROPY 63
{ size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
2016-03-20 00:09:18 +00:00
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
{ HUF_repeat repeat = zc->hufCTable_repeatMode;
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
2017-04-20 18:14:13 +00:00
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
2017-04-20 18:14:13 +00:00
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
2016-01-26 02:14:20 +00:00
}
2015-11-11 20:38:21 +00:00
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
zc->hufCTable_repeatMode = HUF_repeat_none;
2016-03-18 11:37:45 +00:00
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
if (cLitSize==1) {
zc->hufCTable_repeatMode = HUF_repeat_none;
2016-03-18 11:37:45 +00:00
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
}
2015-11-11 20:38:21 +00:00
/* Build header */
2016-01-23 18:28:41 +00:00
switch(lhSize)
2015-11-11 20:38:21 +00:00
{
2016-01-23 18:28:41 +00:00
case 3: /* 2 - 2 - 10 - 10 */
2016-07-22 15:30:52 +00:00
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
2016-01-23 18:28:41 +00:00
case 4: /* 2 - 2 - 14 - 14 */
2016-07-22 02:45:06 +00:00
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
2016-03-18 11:37:45 +00:00
default: /* should not be necessary, lhSize is only {3,4,5} */
2016-01-23 18:28:41 +00:00
case 5: /* 2 - 2 - 18 - 18 */
2016-07-22 02:45:06 +00:00
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
break;
}
2015-11-11 20:38:21 +00:00
}
2016-03-18 11:37:45 +00:00
return lhSize+cLitSize;
2015-11-11 20:38:21 +00:00
}
static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 16, 17, 17, 18, 18, 19, 19,
20, 20, 20, 20, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22,
23, 23, 23, 23, 23, 23, 23, 23,
24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24 };
static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
2016-07-29 19:22:17 +00:00
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
{
2016-07-29 19:22:17 +00:00
BYTE const LL_deltaCode = 19;
BYTE const ML_deltaCode = 36;
2016-07-29 22:55:13 +00:00
const seqDef* const sequences = seqStorePtr->sequencesStart;
2016-07-29 19:22:17 +00:00
BYTE* const llCodeTable = seqStorePtr->llCode;
BYTE* const ofCodeTable = seqStorePtr->ofCode;
BYTE* const mlCodeTable = seqStorePtr->mlCode;
2016-07-29 22:55:13 +00:00
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2016-07-29 19:22:17 +00:00
U32 u;
for (u=0; u<nbSeq; u++) {
U32 const llv = sequences[u].litLength;
U32 const mlv = sequences[u].matchLength;
llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
2016-07-29 19:22:17 +00:00
ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
2016-04-07 15:19:00 +00:00
}
2016-07-29 19:22:17 +00:00
if (seqStorePtr->longLengthID==1)
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
if (seqStorePtr->longLengthID==2)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
2016-03-15 00:24:33 +00:00
void* dst, size_t dstCapacity,
size_t srcSize)
2015-11-11 20:38:21 +00:00
{
const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2016-01-26 02:14:20 +00:00
const seqStore_t* seqStorePtr = &(zc->seqStore);
2015-11-11 20:38:21 +00:00
U32 count[MaxSeq+1];
S16 norm[MaxSeq+1];
2016-01-27 23:18:06 +00:00
FSE_CTable* CTable_LitLength = zc->litlengthCTable;
FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
2015-11-11 20:38:21 +00:00
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2016-07-29 22:55:13 +00:00
const seqDef* const sequences = seqStorePtr->sequencesStart;
2016-07-29 19:22:17 +00:00
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2015-11-23 12:34:21 +00:00
BYTE* const ostart = (BYTE*)dst;
2016-03-15 00:24:33 +00:00
BYTE* const oend = ostart + dstCapacity;
2016-03-18 11:37:45 +00:00
BYTE* op = ostart;
2016-07-29 22:55:13 +00:00
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2015-11-11 20:38:21 +00:00
BYTE* seqHead;
BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
2015-11-11 20:38:21 +00:00
/* Compress literals */
2016-03-20 00:09:18 +00:00
{ const BYTE* const literals = seqStorePtr->litStart;
2016-03-18 11:37:45 +00:00
size_t const litSize = seqStorePtr->lit - literals;
2016-03-20 00:09:18 +00:00
size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
2015-11-11 20:38:21 +00:00
if (ZSTD_isError(cSize)) return cSize;
op += cSize;
}
/* Sequences Header */
2016-03-23 21:31:57 +00:00
if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
2016-01-30 23:58:06 +00:00
if (nbSeq==0) goto _check_compressibility;
2015-11-11 20:38:21 +00:00
/* seqHead : flags for FSE encoding type */
seqHead = op++;
2015-11-11 20:38:21 +00:00
2016-01-27 23:18:06 +00:00
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
/* convert length/distances into codes */
2016-07-29 19:22:17 +00:00
ZSTD_seqToCodes(seqStorePtr);
2016-03-20 18:14:22 +00:00
2015-11-11 20:38:21 +00:00
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = llCodeTable[0];
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
2016-07-23 14:31:49 +00:00
LLtype = set_rle;
} else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
2016-07-23 14:31:49 +00:00
LLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
LLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
2016-03-23 00:32:41 +00:00
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
2016-03-23 00:32:41 +00:00
op += NCountSize; }
FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
LLtype = set_compressed;
} }
2015-11-11 20:38:21 +00:00
/* CTable for Offsets */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
2016-03-23 21:31:57 +00:00
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
2016-07-23 14:31:49 +00:00
Offtype = set_rle;
} else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
2016-07-23 14:31:49 +00:00
Offtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
Offtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
2016-03-23 21:31:57 +00:00
if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
2016-03-23 00:32:41 +00:00
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
2016-03-23 00:32:41 +00:00
op += NCountSize; }
FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
Offtype = set_compressed;
} }
2015-11-11 20:38:21 +00:00
/* CTable for MatchLengths */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
2016-07-23 14:31:49 +00:00
MLtype = set_rle;
} else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
2016-07-23 14:31:49 +00:00
MLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
MLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
op += NCountSize; }
FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
2016-07-23 14:31:49 +00:00
MLtype = set_compressed;
} }
2015-11-11 20:38:21 +00:00
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
zc->fseCTables_ready = 0;
2015-11-11 20:38:21 +00:00
/* Encoding Sequences */
2016-03-19 17:08:32 +00:00
{ BIT_CStream_t blockStream;
2016-03-18 11:37:45 +00:00
FSE_CState_t stateMatchLength;
FSE_CState_t stateOffsetBits;
FSE_CState_t stateLitLength;
2015-11-11 20:38:21 +00:00
2016-09-06 14:38:51 +00:00
CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
2016-03-20 18:14:22 +00:00
/* first symbols */
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
2016-03-23 21:31:57 +00:00
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
2016-03-20 18:14:22 +00:00
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
2016-07-29 19:22:17 +00:00
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
2016-03-26 16:18:11 +00:00
if (MEM_32bits()) BIT_flushBits(&blockStream);
2016-07-29 19:22:17 +00:00
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
2016-03-26 16:18:11 +00:00
if (MEM_32bits()) BIT_flushBits(&blockStream);
2017-03-01 22:36:25 +00:00
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
2016-03-20 18:14:22 +00:00
BIT_flushBits(&blockStream);
2015-11-11 20:38:21 +00:00
{ size_t n;
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
2016-07-30 01:20:47 +00:00
BYTE const llCode = llCodeTable[n];
2016-07-27 19:05:12 +00:00
BYTE const ofCode = ofCodeTable[n];
BYTE const mlCode = mlCodeTable[n];
U32 const llBits = LL_bits[llCode];
U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
2016-07-30 01:20:47 +00:00
U32 const mlBits = ML_bits[mlCode];
/* (7)*/ /* (7)*/
2016-03-26 16:18:11 +00:00
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
2016-03-26 16:18:11 +00:00
BIT_flushBits(&blockStream); /* (7)*/
2016-07-29 19:22:17 +00:00
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
2016-03-26 16:18:11 +00:00
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
2016-07-29 19:22:17 +00:00
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
2016-03-26 16:18:11 +00:00
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
2017-03-01 22:36:25 +00:00
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
2016-03-26 16:18:11 +00:00
BIT_flushBits(&blockStream); /* (7)*/
} }
2015-11-11 20:38:21 +00:00
FSE_flushCState(&blockStream, &stateMatchLength);
FSE_flushCState(&blockStream, &stateOffsetBits);
FSE_flushCState(&blockStream, &stateLitLength);
2016-03-26 16:18:11 +00:00
{ size_t const streamSize = BIT_closeCStream(&blockStream);
if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
op += streamSize;
} }
2015-11-11 20:38:21 +00:00
/* check compressibility */
2016-01-30 23:58:06 +00:00
_check_compressibility:
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
{ size_t const minGain = ZSTD_minGain(srcSize);
size_t const maxCSize = srcSize - minGain;
if ((size_t)(op-ostart) >= maxCSize) {
zc->hufCTable_repeatMode = HUF_repeat_none;
Allow compressor to repeat Huffman tables * Compressor saves most recently used Huffman table and reuses it if it produces better results. * I attempted to preserve CPU usage profile. I intentionally left all of the existing heuristics in place. There is only a speed difference on the second block and later. When compressing large enough blocks (say >= 4 KiB) there is no significant difference in compression speed. Dictionary compression of one block is the same speed for blocks with literals <= 1 KiB, and after that the difference is not very significant. * In the synthetic data, with blocks 10 KB or smaller, most blocks can't use repeated tables because the previous block did not contain a symbol that the current block contains. Once blocks are about 12 KB or more, most previous blocks have valid Huffman tables for the current block, and the compression ratio and decompression speed jumped. * In silesia blocks as small as 4KB can frequently reuse the previous Huffman table (85%), but it isn't as profitable, and the previous Huffman table only gets used about 3% of the time. * Microbenchmarks show that `HUF_validateCTable()` takes ~55 ns and `HUF_estimateCompressedSize()` takes ~35 ns. They are decently well optimized, the first versions took 90 ns and 120 ns respectively. `HUF_validateCTable()` could be twice as fast, if we cast the `HUF_CElt*` to a `U32*` and compare to 0. However, `U32` has an alignment of 4 instead of 2, so I think that might be undefined behavior. * I've ran `zstreamtest` compiled normally, with UASAN and with MSAN for 4 hours each. The worst case for the speed difference is a bunch of small blocks in the same frame. I modified `bench.c` to compress the input in a single frame but with blocks of the given block size, set by `-B`. Benchmarks on level 1: | Program | Block size | Corpus | Ratio | Compression MB/s | Decompression MB/s | |-----------|------------|-----------|-------|------------------|--------------------| | zstd.base | 256 | synthetic | 2.364 | 110.0 | 297.0 | | zstd | 256 | synthetic | 2.367 | 108.9 | 297.0 | | zstd.base | 256 | silesia | 2.204 | 93.8 | 415.7 | | zstd | 256 | silesia | 2.204 | 93.4 | 415.7 | | zstd.base | 512 | synthetic | 2.594 | 144.2 | 420.0 | | zstd | 512 | synthetic | 2.599 | 141.5 | 425.7 | | zstd.base | 512 | silesia | 2.358 | 118.4 | 432.6 | | zstd | 512 | silesia | 2.358 | 119.8 | 432.6 | | zstd.base | 1024 | synthetic | 2.790 | 192.3 | 594.1 | | zstd | 1024 | synthetic | 2.794 | 192.3 | 600.0 | | zstd.base | 1024 | silesia | 2.524 | 148.2 | 464.2 | | zstd | 1024 | silesia | 2.525 | 148.2 | 467.6 | | zstd.base | 4096 | synthetic | 3.023 | 300.0 | 1000.0 | | zstd | 4096 | synthetic | 3.024 | 300.0 | 1010.1 | | zstd.base | 4096 | silesia | 2.779 | 223.1 | 623.5 | | zstd | 4096 | silesia | 2.779 | 223.1 | 636.0 | | zstd.base | 16384 | synthetic | 3.131 | 350.0 | 1150.1 | | zstd | 16384 | synthetic | 3.152 | 350.0 | 1630.3 | | zstd.base | 16384 | silesia | 2.871 | 296.5 | 883.3 | | zstd | 16384 | silesia | 2.872 | 294.4 | 898.3 |
2017-03-02 01:51:56 +00:00
return 0;
} }
2015-11-11 20:38:21 +00:00
2016-06-13 23:49:25 +00:00
/* confirm repcodes */
2017-01-20 01:33:37 +00:00
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
2016-06-13 23:49:25 +00:00
2015-11-23 12:34:21 +00:00
return op - ostart;
2015-11-11 20:38:21 +00:00
}
#if 0 /* for debug */
# define STORESEQ_DEBUG
#include <stdio.h> /* fprintf */
U32 g_startDebug = 0;
const BYTE* g_start = NULL;
#endif
/*! ZSTD_storeSeq() :
Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
`offsetCode` : distance to match, or 0 == repCode.
`matchCode` : matchLength - MINMATCH
2015-11-11 20:38:21 +00:00
*/
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
2015-11-11 20:38:21 +00:00
{
#ifdef STORESEQ_DEBUG
if (g_startDebug) {
const U32 pos = (U32)((const BYTE*)literals - g_start);
if (g_start==NULL) g_start = (const BYTE*)literals;
if ((pos > 1895000) && (pos < 1895300))
fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
}
2015-11-11 20:38:21 +00:00
#endif
/* copy Literals */
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
seqStorePtr->lit += litLength;
/* literal Length */
if (litLength>0xFFFF) {
seqStorePtr->longLengthID = 1;
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
}
2016-07-29 22:55:13 +00:00
seqStorePtr->sequences[0].litLength = (U16)litLength;
2015-11-11 20:38:21 +00:00
/* match offset */
2016-07-29 22:55:13 +00:00
seqStorePtr->sequences[0].offset = offsetCode + 1;
2015-11-11 20:38:21 +00:00
/* match Length */
if (matchCode>0xFFFF) {
seqStorePtr->longLengthID = 2;
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
}
2016-07-29 22:55:13 +00:00
seqStorePtr->sequences[0].matchLength = (U16)matchCode;
2016-07-29 19:22:17 +00:00
2016-07-29 22:55:13 +00:00
seqStorePtr->sequences++;
2015-11-11 20:38:21 +00:00
}
2016-02-11 23:07:30 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Match length counter
***************************************/
2015-11-23 12:34:21 +00:00
static unsigned ZSTD_NbCommonBytes (register size_t val)
2015-11-11 20:38:21 +00:00
{
2016-01-28 16:56:33 +00:00
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
2015-11-11 20:38:21 +00:00
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanForward64( &r, (U64)val );
return (unsigned)(r>>3);
2015-11-11 20:38:21 +00:00
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctzll((U64)val) >> 3);
# else
2017-04-20 18:14:13 +00:00
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
2015-11-11 20:38:21 +00:00
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
2016-01-28 16:56:33 +00:00
} else { /* 32 bits */
2015-11-11 20:38:21 +00:00
# if defined(_MSC_VER)
unsigned long r=0;
_BitScanForward( &r, (U32)val );
return (unsigned)(r>>3);
2015-11-11 20:38:21 +00:00
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
2017-04-20 18:14:13 +00:00
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
2015-11-11 20:38:21 +00:00
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
2016-01-28 16:56:33 +00:00
} else { /* Big Endian CPU */
if (MEM_64bits()) {
2015-11-11 20:38:21 +00:00
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanReverse64( &r, val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
2016-01-28 16:56:33 +00:00
} else { /* 32 bits */
2015-11-11 20:38:21 +00:00
# if defined(_MSC_VER)
unsigned long r = 0;
_BitScanReverse( &r, (unsigned long)val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
2016-01-28 16:56:33 +00:00
} }
2015-11-11 20:38:21 +00:00
}
static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
2015-11-11 20:38:21 +00:00
{
const BYTE* const pStart = pIn;
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
2015-11-11 20:38:21 +00:00
while (pIn < pInLoopLimit) {
2016-05-20 09:44:43 +00:00
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
2015-11-11 20:38:21 +00:00
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
pIn += ZSTD_NbCommonBytes(diff);
return (size_t)(pIn - pStart);
}
if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
return (size_t)(pIn - pStart);
}
/** ZSTD_count_2segments() :
2016-02-11 23:07:30 +00:00
* can count match length with `ip` & `match` in 2 different segments.
2015-11-23 12:34:21 +00:00
* convention : on reaching mEnd, match count continue starting from iStart
*/
static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
{
2016-05-20 09:44:43 +00:00
const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
2016-07-27 19:05:12 +00:00
size_t const matchLength = ZSTD_count(ip, match, vEnd);
if (match + matchLength != mEnd) return matchLength;
return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
2015-11-23 12:34:21 +00:00
}
2015-11-11 20:38:21 +00:00
2016-01-28 16:56:33 +00:00
/*-*************************************
2015-11-11 20:38:21 +00:00
* Hashes
2015-10-22 14:31:46 +00:00
***************************************/
2016-02-19 09:09:35 +00:00
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
2016-02-19 09:09:35 +00:00
2015-10-30 14:49:48 +00:00
static const U32 prime4bytes = 2654435761U;
2016-01-28 16:56:33 +00:00
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
2015-10-30 14:49:48 +00:00
static const U64 prime5bytes = 889523592379ULL;
2016-01-28 16:56:33 +00:00
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
2016-02-07 03:00:27 +00:00
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
2015-10-30 14:49:48 +00:00
static const U64 prime6bytes = 227718039650203ULL;
2016-01-28 16:56:33 +00:00
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
2016-02-07 03:00:27 +00:00
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
2015-10-22 14:31:46 +00:00
2015-11-11 20:38:21 +00:00
static const U64 prime7bytes = 58295818150454627ULL;
2016-01-28 16:56:33 +00:00
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
2016-02-07 03:00:27 +00:00
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
2015-11-05 16:32:18 +00:00
2016-07-12 07:47:31 +00:00
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
2015-10-30 14:49:48 +00:00
{
switch(mls)
{
//case 3: return ZSTD_hash3Ptr(p, hBits);
default:
case 4: return ZSTD_hash4Ptr(p, hBits);
case 5: return ZSTD_hash5Ptr(p, hBits);
case 6: return ZSTD_hash6Ptr(p, hBits);
case 7: return ZSTD_hash7Ptr(p, hBits);
2016-07-12 07:47:31 +00:00
case 8: return ZSTD_hash8Ptr(p, hBits);
2015-10-30 14:49:48 +00:00
}
}
2016-01-28 16:56:33 +00:00
2016-02-02 13:36:49 +00:00
/*-*************************************
2015-11-05 16:32:18 +00:00
* Fast Scan
***************************************/
static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
{
U32* const hashTable = zc->hashTable;
2016-07-27 19:05:12 +00:00
U32 const hBits = zc->params.cParams.hashLog;
const BYTE* const base = zc->base;
const BYTE* ip = base + zc->nextToUpdate;
2016-07-27 19:05:12 +00:00
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
2016-03-19 14:11:42 +00:00
const size_t fastHashFillStep = 3;
2016-01-27 23:18:06 +00:00
while(ip <= iend) {
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
2016-03-19 14:11:42 +00:00
ip += fastHashFillStep;
}
}
2015-11-05 16:32:18 +00:00
FORCE_INLINE
2016-06-13 23:49:25 +00:00
void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
2016-08-07 22:44:00 +00:00
const void* src, size_t srcSize,
const U32 mls)
2015-11-05 16:32:18 +00:00
{
2016-06-13 23:49:25 +00:00
U32* const hashTable = cctx->hashTable;
2016-07-27 19:05:12 +00:00
U32 const hBits = cctx->params.cParams.hashLog;
2016-06-13 23:49:25 +00:00
seqStore_t* seqStorePtr = &(cctx->seqStore);
const BYTE* const base = cctx->base;
2015-11-05 16:32:18 +00:00
const BYTE* const istart = (const BYTE*)src;
2015-11-06 09:52:17 +00:00
const BYTE* ip = istart;
2015-11-05 16:32:18 +00:00
const BYTE* anchor = istart;
2016-07-27 19:05:12 +00:00
const U32 lowestIndex = cctx->dictLimit;
2016-06-13 23:49:25 +00:00
const BYTE* const lowest = base + lowestIndex;
2015-11-05 16:32:18 +00:00
const BYTE* const iend = istart + srcSize;
2016-07-27 19:05:12 +00:00
const BYTE* const ilimit = iend - HASH_READ_SIZE;
2016-07-02 23:10:53 +00:00
U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
U32 offsetSaved = 0;
2015-11-05 16:32:18 +00:00
/* init */
2016-06-13 23:49:25 +00:00
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
2016-07-02 23:10:53 +00:00
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
2016-06-13 23:49:25 +00:00
}
2015-11-05 16:32:18 +00:00
/* Main Search Loop */
2016-06-13 23:49:25 +00:00
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
2016-06-13 19:43:06 +00:00
size_t const h = ZSTD_hashPtr(ip, hBits, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
2015-12-29 13:29:08 +00:00
const BYTE* match = base + matchIndex;
2016-01-02 00:16:28 +00:00
hashTable[h] = current; /* update hash table */
2015-11-05 16:32:18 +00:00
2016-08-07 22:44:00 +00:00
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
2016-07-12 07:47:31 +00:00
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
2015-11-20 11:46:08 +00:00
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
2016-07-02 23:10:53 +00:00
U32 offset;
if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
2015-11-20 11:46:08 +00:00
ip += ((ip-anchor) >> g_searchStrength) + 1;
continue;
}
2016-07-12 07:47:31 +00:00
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
2016-07-02 23:10:53 +00:00
offset = (U32)(ip-match);
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
2015-11-20 11:46:08 +00:00
offset_2 = offset_1;
offset_1 = offset;
2016-03-16 14:35:14 +00:00
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
2015-11-20 11:46:08 +00:00
}
/* match found */
ip += mLength;
2015-11-20 11:46:08 +00:00
anchor = ip;
2016-01-27 23:18:06 +00:00
if (ip <= ilimit) {
2015-11-20 11:46:08 +00:00
/* Fill Table */
2016-01-07 14:35:18 +00:00
hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
2015-11-20 11:46:08 +00:00
hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
2016-06-13 23:49:25 +00:00
&& ( (offset_2>0)
2016-06-13 19:43:06 +00:00
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
2015-11-20 11:46:08 +00:00
/* store sequence */
2016-07-12 07:47:31 +00:00
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
2016-07-02 23:10:53 +00:00
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
2015-11-20 11:46:08 +00:00
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
2015-11-20 11:46:08 +00:00
anchor = ip;
continue; /* faster when present ... (?) */
2016-01-27 23:18:06 +00:00
} } }
2015-11-05 16:32:18 +00:00
2016-06-13 23:49:25 +00:00
/* save reps for next block */
2017-01-20 01:33:37 +00:00
cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
2016-06-13 23:49:25 +00:00
2016-03-19 17:08:32 +00:00
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
2015-11-05 16:32:18 +00:00
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
2016-01-23 18:28:41 +00:00
const void* src, size_t srcSize)
2015-11-05 16:32:18 +00:00
{
const U32 mls = ctx->params.cParams.searchLength;
2015-11-05 16:32:18 +00:00
switch(mls)
{
default: /* includes case 3 */
2015-11-05 16:32:18 +00:00
case 4 :
2016-01-23 18:28:41 +00:00
ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
2015-11-05 16:32:18 +00:00
case 5 :
2016-01-23 18:28:41 +00:00
ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
2015-11-05 16:32:18 +00:00
case 6 :
2016-01-23 18:28:41 +00:00
ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
2015-11-05 16:32:18 +00:00
case 7 :
2016-01-23 18:28:41 +00:00
ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
2015-11-05 16:32:18 +00:00
}
}
2015-10-22 14:31:46 +00:00
static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
2016-01-23 18:28:41 +00:00
const void* src, size_t srcSize,
const U32 mls)
{
U32* hashTable = ctx->hashTable;
const U32 hBits = ctx->params.cParams.hashLog;
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
const BYTE* const dictBase = ctx->dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
2016-06-13 19:43:06 +00:00
const U32 lowestIndex = ctx->lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
const U32 dictLimit = ctx->dictLimit;
2015-11-20 11:03:53 +00:00
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
2016-06-13 23:49:25 +00:00
U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
/* Search Loop */
2016-01-27 23:18:06 +00:00
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hBits, mls);
2015-11-20 11:03:53 +00:00
const U32 matchIndex = hashTable[h];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
2015-11-26 10:43:00 +00:00
const BYTE* match = matchBase + matchIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
2015-11-20 11:46:08 +00:00
const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* repMatch = repBase + repIndex;
size_t mLength;
hashTable[h] = current; /* update hash table */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
2016-06-13 23:49:25 +00:00
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
2015-11-20 11:46:08 +00:00
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
2015-11-20 11:03:53 +00:00
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
2016-01-27 23:18:06 +00:00
} else {
2016-06-13 19:43:06 +00:00
if ( (matchIndex < lowestIndex) ||
2016-03-20 15:00:00 +00:00
(MEM_read32(match) != MEM_read32(ip)) ) {
ip += ((ip-anchor) >> g_searchStrength) + 1;
continue;
}
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
2015-11-23 12:34:21 +00:00
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
U32 offset;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
2015-11-20 11:46:08 +00:00
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
2016-01-27 23:18:06 +00:00
} }
2015-11-20 11:46:08 +00:00
2015-11-23 12:34:21 +00:00
/* found a match : store it */
ip += mLength;
2015-11-20 11:46:08 +00:00
anchor = ip;
2016-01-27 23:18:06 +00:00
if (ip <= ilimit) {
2015-11-26 10:43:00 +00:00
/* Fill Table */
2016-09-06 13:36:19 +00:00
hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
2015-11-20 11:46:08 +00:00
hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
2016-01-27 23:18:06 +00:00
while (ip <= ilimit) {
2016-04-01 13:48:48 +00:00
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
2015-11-20 11:46:08 +00:00
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
2016-06-13 23:49:25 +00:00
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
2015-11-23 12:34:21 +00:00
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
2015-11-23 12:34:21 +00:00
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
2016-04-06 07:46:01 +00:00
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
2015-11-23 12:34:21 +00:00
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
2016-04-06 07:46:01 +00:00
ip += repLength2;
2015-11-20 11:46:08 +00:00
anchor = ip;
continue;
}
2015-11-20 11:03:53 +00:00
break;
2016-01-27 23:18:06 +00:00
} } }
2016-06-13 23:49:25 +00:00
/* save reps for next block */
2017-01-20 01:33:37 +00:00
ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2016-06-13 23:49:25 +00:00
/* Last Literals */
2016-03-19 17:08:32 +00:00
{ size_t const lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
const void* src, size_t srcSize)
{
2016-07-27 19:05:12 +00:00
U32 const mls = ctx->params.cParams.searchLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
case 5 :
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
case 6 :
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
case 7 :
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
}
}
2016-07-12 07:47:31 +00:00
/*-*************************************
* Double Fast
***************************************/
static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
{
U32* const hashLarge = cctx->hashTable;
2016-07-27 19:05:12 +00:00
U32 const hBitsL = cctx->params.cParams.hashLog;
2016-07-12 07:47:31 +00:00
U32* const hashSmall = cctx->chainTable;
2016-07-27 19:05:12 +00:00
U32 const hBitsS = cctx->params.cParams.chainLog;
2016-07-12 07:47:31 +00:00
const BYTE* const base = cctx->base;
const BYTE* ip = base + cctx->nextToUpdate;
2016-07-27 19:05:12 +00:00
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
2016-07-12 07:47:31 +00:00
const size_t fastHashFillStep = 3;
while(ip <= iend) {
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
ip += fastHashFillStep;
}
}
FORCE_INLINE
void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
const void* src, size_t srcSize,
const U32 mls)
{
U32* const hashLong = cctx->hashTable;
const U32 hBitsL = cctx->params.cParams.hashLog;
U32* const hashSmall = cctx->chainTable;
const U32 hBitsS = cctx->params.cParams.chainLog;
seqStore_t* seqStorePtr = &(cctx->seqStore);
const BYTE* const base = cctx->base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 lowestIndex = cctx->dictLimit;
const BYTE* const lowest = base + lowestIndex;
const BYTE* const iend = istart + srcSize;
2016-07-27 19:05:12 +00:00
const BYTE* const ilimit = iend - HASH_READ_SIZE;
2016-07-12 07:47:31 +00:00
U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
U32 offsetSaved = 0;
/* init */
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2];
U32 const matchIndexS = hashSmall[h];
const BYTE* matchLong = base + matchIndexL;
const BYTE* match = base + matchIndexS;
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
assert(offset_1 <= current); /* supposed guaranteed by construction */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
/* favor repcode */
2016-07-12 07:47:31 +00:00
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
2016-07-12 13:11:40 +00:00
U32 offset;
2016-07-12 07:47:31 +00:00
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
2016-07-12 13:11:40 +00:00
offset = (U32)(ip-matchLong);
2016-07-12 07:47:31 +00:00
while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
} else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = current + 1;
if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
2016-08-23 23:10:42 +00:00
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
2016-08-23 23:10:42 +00:00
} else {
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
offset = (U32)(ip-match);
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
2016-07-12 07:47:31 +00:00
} else {
ip += ((ip-anchor) >> g_searchStrength) + 1;
continue;
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
2016-07-12 13:11:40 +00:00
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
2016-07-12 07:47:31 +00:00
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
2017-01-20 01:33:37 +00:00
cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
2016-07-12 07:47:31 +00:00
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
const U32 mls = ctx->params.cParams.searchLength;
switch(mls)
{
default: /* includes case 3 */
2016-07-12 07:47:31 +00:00
case 4 :
ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
case 5 :
ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
case 6 :
ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
case 7 :
ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
}
}
static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
const U32 mls)
{
U32* const hashLong = ctx->hashTable;
2016-07-27 19:05:12 +00:00
U32 const hBitsL = ctx->params.cParams.hashLog;
2016-07-12 07:47:31 +00:00
U32* const hashSmall = ctx->chainTable;
2016-07-27 19:05:12 +00:00
U32 const hBitsS = ctx->params.cParams.chainLog;
2016-07-12 07:47:31 +00:00
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
const BYTE* const dictBase = ctx->dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 lowestIndex = ctx->lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
const U32 dictLimit = ctx->dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
const U32 matchIndex = hashSmall[hSmall];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
const U32 matchLongIndex = hashLong[hLong];
const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
const BYTE* matchLong = matchLongBase + matchLongIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* repMatch = repBase + repIndex;
size_t mLength;
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
U32 offset;
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
offset = current - matchLongIndex;
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
2016-08-23 23:10:42 +00:00
2016-07-12 11:03:48 +00:00
} else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
2016-08-23 23:10:42 +00:00
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
U32 const matchIndex3 = hashLong[h3];
const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
const BYTE* match3 = match3Base + matchIndex3;
2016-07-12 07:47:31 +00:00
U32 offset;
2016-08-23 23:10:42 +00:00
hashLong[h3] = current + 1;
if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
ip++;
offset = current+1 - matchIndex3;
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
} else {
const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
offset = current - matchIndex;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
2016-07-12 07:47:31 +00:00
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
2016-08-23 23:10:42 +00:00
2016-07-12 07:47:31 +00:00
} else {
ip += ((ip-anchor) >> g_searchStrength) + 1;
continue;
} }
/* found a match : store it */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
2017-03-30 01:51:58 +00:00
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
2016-07-12 07:47:31 +00:00
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
2016-07-12 07:47:31 +00:00
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
} } }
/* save reps for next block */
2017-01-20 01:33:37 +00:00
ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2016-07-12 07:47:31 +00:00
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
const void* src, size_t srcSize)
{
2016-07-27 19:05:12 +00:00
U32 const mls = ctx->params.cParams.searchLength;
2016-07-12 07:47:31 +00:00
switch(mls)
{
default: /* includes case 3 */
2016-07-12 07:47:31 +00:00
case 4 :
ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
case 5 :
ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
case 6 :
ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
case 7 :
ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
}
}
/*-*************************************
2015-11-04 02:52:54 +00:00
* Binary Tree search
2015-10-22 14:31:46 +00:00
***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
2015-11-23 13:23:47 +00:00
* @return : nb of positions added */
2016-01-01 06:29:39 +00:00
static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
U32 extDict)
2015-11-04 02:52:54 +00:00
{
2016-07-27 19:05:12 +00:00
U32* const hashTable = zc->hashTable;
U32 const hashLog = zc->params.cParams.hashLog;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32* const bt = zc->chainTable;
U32 const btLog = zc->params.cParams.chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
U32 matchIndex = hashTable[h];
2015-11-04 02:52:54 +00:00
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = zc->base;
2016-01-01 06:29:39 +00:00
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
2016-10-14 23:03:34 +00:00
const BYTE* match;
const U32 current = (U32)(ip-base);
2015-11-08 14:08:03 +00:00
const U32 btLow = btMask >= current ? 0 : current - btMask;
2015-11-04 02:52:54 +00:00
U32* smallerPtr = bt + 2*(current&btMask);
2016-01-16 23:12:55 +00:00
U32* largerPtr = smallerPtr + 1;
2015-11-04 11:05:27 +00:00
U32 dummy32; /* to be nullified at the end */
2016-07-27 19:05:12 +00:00
U32 const windowLow = zc->lowLimit;
2015-12-31 18:08:44 +00:00
U32 matchEndIdx = current+8;
size_t bestLength = 8;
2016-06-30 12:07:30 +00:00
#ifdef ZSTD_C_PREDICT
2016-01-21 10:57:45 +00:00
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
predictedSmall += (predictedSmall>0);
predictedLarge += (predictedLarge>0);
2016-06-30 12:07:30 +00:00
#endif /* ZSTD_C_PREDICT */
2015-11-07 00:13:31 +00:00
hashTable[h] = current; /* Update Hash Table */
2015-11-04 02:52:54 +00:00
2016-01-27 23:18:06 +00:00
while (nbCompares-- && (matchIndex > windowLow)) {
2016-11-30 00:59:27 +00:00
U32* const nextPtr = bt + 2*(matchIndex & btMask);
2015-11-04 02:52:54 +00:00
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
2016-11-30 00:59:27 +00:00
2016-06-30 12:07:30 +00:00
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
2016-02-10 12:37:52 +00:00
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
2016-01-27 23:18:06 +00:00
if (matchIndex == predictedSmall) {
/* no need to check length, result known */
2016-01-16 23:12:55 +00:00
*smallerPtr = matchIndex;
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
2016-01-21 10:57:45 +00:00
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
2016-01-16 23:12:55 +00:00
continue;
}
2016-01-27 23:18:06 +00:00
if (matchIndex == predictedLarge) {
2016-01-16 23:12:55 +00:00
*largerPtr = matchIndex;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
2016-01-21 10:57:45 +00:00
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
2016-01-16 23:12:55 +00:00
continue;
}
#endif
2016-01-27 23:18:06 +00:00
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
2016-01-01 06:29:39 +00:00
match = base + matchIndex;
if (match[matchLength] == ip[matchLength])
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
2016-01-27 23:18:06 +00:00
} else {
2016-01-01 06:29:39 +00:00
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
2017-03-30 01:51:58 +00:00
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
2016-01-01 06:29:39 +00:00
}
2015-11-04 02:52:54 +00:00
if (matchLength > bestLength) {
bestLength = matchLength;
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
}
2015-11-04 11:05:27 +00:00
if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
2016-01-01 06:29:39 +00:00
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
2015-11-04 02:52:54 +00:00
2016-01-27 23:18:06 +00:00
if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
2015-11-04 02:52:54 +00:00
/* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
2015-11-07 00:13:31 +00:00
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2015-11-04 02:52:54 +00:00
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
2015-11-07 00:13:31 +00:00
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
2016-01-27 23:18:06 +00:00
} else {
2015-11-04 02:52:54 +00:00
/* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
2015-11-07 00:13:31 +00:00
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2015-11-04 02:52:54 +00:00
largerPtr = nextPtr;
2015-11-07 00:13:31 +00:00
matchIndex = nextPtr[0];
2016-01-27 23:18:06 +00:00
} }
2015-11-04 02:52:54 +00:00
2015-11-04 11:05:27 +00:00
*smallerPtr = *largerPtr = 0;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
return 1;
2015-11-04 02:52:54 +00:00
}
static size_t ZSTD_insertBtAndFindBestMatch (
ZSTD_CCtx* zc,
2015-11-04 02:52:54 +00:00
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
2016-01-01 06:47:58 +00:00
U32 nbCompares, const U32 mls,
U32 extDict)
2015-11-04 02:52:54 +00:00
{
2016-07-27 19:05:12 +00:00
U32* const hashTable = zc->hashTable;
U32 const hashLog = zc->params.cParams.hashLog;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32* const bt = zc->chainTable;
U32 const btLog = zc->params.cParams.chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
2015-11-04 02:52:54 +00:00
U32 matchIndex = hashTable[h];
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = zc->base;
2016-01-01 06:47:58 +00:00
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
2015-11-04 02:52:54 +00:00
const U32 current = (U32)(ip-base);
const U32 btLow = btMask >= current ? 0 : current - btMask;
const U32 windowLow = zc->lowLimit;
2015-11-04 02:52:54 +00:00
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
2015-12-31 18:08:44 +00:00
U32 matchEndIdx = current+8;
2015-11-04 11:05:27 +00:00
U32 dummy32; /* to be nullified at the end */
size_t bestLength = 0;
2015-11-04 02:52:54 +00:00
hashTable[h] = current; /* Update Hash Table */
2015-11-04 02:52:54 +00:00
2016-01-27 23:18:06 +00:00
while (nbCompares-- && (matchIndex > windowLow)) {
2016-11-30 00:59:27 +00:00
U32* const nextPtr = bt + 2*(matchIndex & btMask);
2015-11-04 02:52:54 +00:00
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
2016-01-01 06:47:58 +00:00
const BYTE* match;
2015-11-04 02:52:54 +00:00
2016-01-27 23:18:06 +00:00
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
2016-01-01 06:47:58 +00:00
match = base + matchIndex;
if (match[matchLength] == ip[matchLength])
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
2016-01-27 23:18:06 +00:00
} else {
2016-01-01 06:47:58 +00:00
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
2017-03-30 01:51:58 +00:00
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
2016-01-01 06:47:58 +00:00
}
2015-11-04 02:52:54 +00:00
2016-01-27 23:18:06 +00:00
if (matchLength > bestLength) {
if (matchLength > matchEndIdx - matchIndex)
2015-12-29 22:40:02 +00:00
matchEndIdx = matchIndex + (U32)matchLength;
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
2016-04-06 10:34:42 +00:00
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
2015-11-04 11:05:27 +00:00
if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
2015-11-23 12:34:21 +00:00
break; /* drop, to guarantee consistency (miss a little bit of compression) */
2015-11-04 02:52:54 +00:00
}
2016-01-27 23:18:06 +00:00
if (match[matchLength] < ip[matchLength]) {
2015-11-04 02:52:54 +00:00
/* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
2015-11-23 12:34:21 +00:00
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2015-11-04 11:05:27 +00:00
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
2015-11-23 12:34:21 +00:00
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
2016-01-27 23:18:06 +00:00
} else {
2015-11-04 02:52:54 +00:00
/* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
2015-11-23 12:34:21 +00:00
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2015-11-04 02:52:54 +00:00
largerPtr = nextPtr;
2015-11-23 12:34:21 +00:00
matchIndex = nextPtr[0];
} }
2015-11-04 02:52:54 +00:00
2015-11-04 11:05:27 +00:00
*smallerPtr = *largerPtr = 0;
2015-11-04 02:52:54 +00:00
2015-12-31 18:08:44 +00:00
zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
return bestLength;
2015-11-04 02:52:54 +00:00
}
static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
{
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
while(idx < target)
idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
}
2016-03-20 15:00:00 +00:00
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch (
ZSTD_CCtx* zc,
2015-11-04 02:52:54 +00:00
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 mls)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
2016-01-01 06:47:58 +00:00
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
2015-11-04 02:52:54 +00:00
}
static size_t ZSTD_BtFindBestMatch_selectMLS (
ZSTD_CCtx* zc, /* Index table will be updated */
2015-11-04 02:52:54 +00:00
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
case 7 :
case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
2015-11-04 02:52:54 +00:00
}
}
static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
{
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
}
/** Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch_extDict (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 mls)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
2016-01-01 06:47:58 +00:00
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
}
static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
case 7 :
case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
2015-11-05 16:32:18 +00:00
2016-07-27 19:05:12 +00:00
/* *********************************
* Hash Chain
2016-07-27 19:05:12 +00:00
***********************************/
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
2015-11-05 14:00:24 +00:00
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
FORCE_INLINE
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.cParams.hashLog;
U32* const chainTable = zc->chainTable;
const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
2015-11-05 14:00:24 +00:00
2016-06-21 06:01:51 +00:00
while(idx < target) { /* catch up */
size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
2015-11-23 13:37:59 +00:00
}
zc->nextToUpdate = target;
return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
2015-11-23 13:37:59 +00:00
}
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_HcFindBestMatch_generic (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 mls, const U32 extDict)
2015-11-05 14:00:24 +00:00
{
U32* const chainTable = zc->chainTable;
const U32 chainSize = (1 << zc->params.cParams.chainLog);
const U32 chainMask = chainSize-1;
const BYTE* const base = zc->base;
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
2016-04-05 17:01:10 +00:00
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = zc->lowLimit;
const U32 current = (U32)(ip-base);
const U32 minChain = current > chainSize ? current - chainSize : 0;
int nbAttempts=maxNbAttempts;
size_t ml=4-1;
2016-04-05 17:01:10 +00:00
/* HC4 match finder */
U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
2016-04-05 17:01:10 +00:00
2016-06-21 06:01:51 +00:00
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
const BYTE* match;
size_t currentMl=0;
if ((!extDict) || matchIndex >= dictLimit) {
match = base + matchIndex;
if (match[ml] == ip[ml]) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else {
match = dictBase + matchIndex;
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
2016-04-05 17:01:10 +00:00
/* save best solution */
if (currentMl > ml) {
ml = currentMl;
*offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
2016-04-05 17:01:10 +00:00
if (matchIndex <= minChain) break;
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
2016-04-05 17:01:10 +00:00
return ml;
}
2016-04-05 17:01:10 +00:00
FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
ZSTD_CCtx* zc,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
}
}
2016-04-05 17:01:10 +00:00
FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
ZSTD_CCtx* zc,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
2015-11-05 14:00:24 +00:00
}
}
2016-04-05 17:01:10 +00:00
2015-11-22 12:24:05 +00:00
/* *******************************
2015-11-22 01:53:43 +00:00
* Common parser - lazy strategy
2015-11-22 12:24:05 +00:00
*********************************/
2015-11-05 14:00:24 +00:00
FORCE_INLINE
2016-01-23 18:28:41 +00:00
void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
2015-11-22 01:42:28 +00:00
const U32 searchMethod, const U32 depth)
2015-11-04 02:52:54 +00:00
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ctx->base + ctx->dictLimit;
2015-11-04 02:52:54 +00:00
2016-04-09 18:32:00 +00:00
U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
U32 const mls = ctx->params.cParams.searchLength;
2015-11-04 02:52:54 +00:00
typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2015-11-05 14:00:24 +00:00
size_t* offsetPtr,
U32 maxNbAttempts, U32 matchLengthSearch);
2016-06-13 19:43:06 +00:00
searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
2016-07-02 23:23:58 +00:00
U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
2016-06-13 23:49:25 +00:00
/* init */
ip += (ip==base);
2016-04-06 10:34:42 +00:00
ctx->nextToUpdate3 = ctx->nextToUpdate;
2016-07-02 23:23:58 +00:00
{ U32 const maxRep = (U32)(ip-base);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
}
2015-11-04 02:52:54 +00:00
/* Match Loop */
2016-01-27 23:18:06 +00:00
while (ip < ilimit) {
2015-11-21 14:27:35 +00:00
size_t matchLength=0;
size_t offset=0;
const BYTE* start=ip+1;
2015-11-04 02:52:54 +00:00
2015-11-20 11:03:53 +00:00
/* check repCode */
2016-07-02 23:23:58 +00:00
if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
2015-11-20 11:03:53 +00:00
/* repcode : we take it */
matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
if (depth==0) goto _storeSequence;
2015-11-04 02:52:54 +00:00
}
2016-03-19 17:08:32 +00:00
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2015-11-23 15:17:21 +00:00
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
2015-11-23 15:17:21 +00:00
ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
2015-11-04 02:52:54 +00:00
/* let's try to find a better solution */
2015-11-22 01:42:28 +00:00
if (depth>=1)
2016-01-27 23:18:06 +00:00
while (ip<ilimit) {
2015-11-04 02:52:54 +00:00
ip ++;
2016-07-02 23:23:58 +00:00
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
2016-03-19 17:08:32 +00:00
int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
2015-11-04 02:52:54 +00:00
}
2016-03-19 17:08:32 +00:00
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
2015-11-06 17:44:54 +00:00
matchLength = ml2, offset = offset2, start = ip;
2015-11-04 02:52:54 +00:00
continue; /* search a better one */
2016-01-27 23:18:06 +00:00
} }
2015-11-04 02:52:54 +00:00
/* let's find an even better one */
2016-01-27 23:18:06 +00:00
if ((depth==2) && (ip<ilimit)) {
2015-11-04 02:52:54 +00:00
ip ++;
2016-07-02 23:23:58 +00:00
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
2016-03-19 17:08:32 +00:00
int const gain2 = (int)(ml2 * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
if ((ml2 >= 4) && (gain2 > gain1))
matchLength = ml2, offset = 0, start = ip;
2015-11-04 02:52:54 +00:00
}
2016-03-19 17:08:32 +00:00
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
2015-11-06 17:44:54 +00:00
matchLength = ml2, offset = offset2, start = ip;
continue;
2016-01-27 23:18:06 +00:00
} } }
2015-11-04 02:52:54 +00:00
break; /* nothing found : store previous solution */
}
2016-02-16 16:41:03 +00:00
/* catch up */
if (offset) {
while ( (start > anchor)
&& (start > base+offset-ZSTD_REP_MOVE)
&& (start[-1] == start[-1-offset+ZSTD_REP_MOVE]) ) /* only search for offset within prefix */
2015-11-09 02:19:33 +00:00
{ start--; matchLength++; }
2016-07-02 23:23:58 +00:00
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2015-11-09 02:19:33 +00:00
}
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
2015-11-09 02:19:33 +00:00
anchor = ip = start + matchLength;
2015-11-04 02:52:54 +00:00
}
/* check immediate repcode */
while ( (ip <= ilimit)
2016-07-02 23:23:58 +00:00
&& ((offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
2016-07-02 23:23:58 +00:00
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
} }
2015-11-04 02:52:54 +00:00
2016-06-13 23:49:25 +00:00
/* Save reps for next block */
2017-01-20 01:33:37 +00:00
ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
2016-06-13 23:49:25 +00:00
2015-11-04 02:52:54 +00:00
/* Last Literals */
2016-03-19 17:08:32 +00:00
{ size_t const lastLLSize = iend - anchor;
2015-11-04 02:52:54 +00:00
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
2016-01-31 10:25:48 +00:00
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-10-22 14:31:46 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
2015-11-05 14:00:24 +00:00
}
2015-10-22 14:31:46 +00:00
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-05 14:00:24 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
2015-11-05 14:00:24 +00:00
}
2015-10-22 14:31:46 +00:00
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-05 14:00:24 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
2015-10-22 14:31:46 +00:00
}
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-10-31 11:57:14 +00:00
{
ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
2015-10-31 11:57:14 +00:00
}
2015-11-22 01:53:43 +00:00
FORCE_INLINE
2016-01-23 18:28:41 +00:00
void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
2015-11-22 01:53:43 +00:00
const U32 searchMethod, const U32 depth)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ctx->base;
const U32 dictLimit = ctx->dictLimit;
2016-06-13 19:43:06 +00:00
const U32 lowestIndex = ctx->lowLimit;
2015-11-22 01:53:43 +00:00
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictBase = ctx->dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
2015-11-24 13:06:07 +00:00
const BYTE* const dictStart = dictBase + ctx->lowLimit;
2015-11-22 01:53:43 +00:00
const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
const U32 mls = ctx->params.cParams.searchLength;
2015-11-22 01:53:43 +00:00
typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
size_t* offsetPtr,
U32 maxNbAttempts, U32 matchLengthSearch);
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2015-11-22 01:53:43 +00:00
2016-07-02 23:28:16 +00:00
U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
2016-07-02 23:28:16 +00:00
/* init */
2016-04-06 10:34:42 +00:00
ctx->nextToUpdate3 = ctx->nextToUpdate;
2016-06-13 23:49:25 +00:00
ip += (ip == prefixStart);
2015-11-22 01:53:43 +00:00
/* Match Loop */
2016-01-27 23:18:06 +00:00
while (ip < ilimit) {
2015-11-22 01:53:43 +00:00
size_t matchLength=0;
size_t offset=0;
const BYTE* start=ip+1;
2015-11-22 01:53:43 +00:00
U32 current = (U32)(ip-base);
/* check repCode */
2016-07-02 23:28:16 +00:00
{ const U32 repIndex = (U32)(current+1 - offset_1);
2015-11-22 01:53:43 +00:00
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
2016-06-13 19:43:06 +00:00
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2015-11-22 01:53:43 +00:00
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
if (depth==0) goto _storeSequence;
2016-01-27 23:18:06 +00:00
} }
2015-11-22 01:53:43 +00:00
2016-03-19 17:08:32 +00:00
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2015-11-23 15:17:21 +00:00
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
2015-11-23 15:17:21 +00:00
ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
2015-11-22 01:53:43 +00:00
/* let's try to find a better solution */
if (depth>=1)
2016-01-27 23:18:06 +00:00
while (ip<ilimit) {
2015-11-22 01:53:43 +00:00
ip ++;
current++;
/* check repCode */
if (offset) {
2016-07-02 23:28:16 +00:00
const U32 repIndex = (U32)(current - offset_1);
2015-11-22 01:53:43 +00:00
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
2016-06-13 19:43:06 +00:00
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
2015-11-22 01:53:43 +00:00
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2016-03-19 17:08:32 +00:00
int const gain2 = (int)(repLength * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offset = 0, start = ip;
2016-01-27 23:18:06 +00:00
} }
2015-11-22 01:53:43 +00:00
/* search match, depth 1 */
2016-03-19 17:08:32 +00:00
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
2015-11-22 01:53:43 +00:00
matchLength = ml2, offset = offset2, start = ip;
continue; /* search a better one */
2016-01-27 23:18:06 +00:00
} }
2015-11-22 01:53:43 +00:00
/* let's find an even better one */
2016-01-27 23:18:06 +00:00
if ((depth==2) && (ip<ilimit)) {
2015-11-22 01:53:43 +00:00
ip ++;
current++;
/* check repCode */
if (offset) {
2016-07-02 23:28:16 +00:00
const U32 repIndex = (U32)(current - offset_1);
2015-11-22 01:53:43 +00:00
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
2016-06-13 19:43:06 +00:00
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
2015-11-22 01:53:43 +00:00
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
int const gain2 = (int)(repLength * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offset = 0, start = ip;
2016-01-27 23:18:06 +00:00
} }
2015-11-22 01:53:43 +00:00
/* search match, depth 2 */
2016-03-19 17:08:32 +00:00
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
2015-11-22 01:53:43 +00:00
matchLength = ml2, offset = offset2, start = ip;
continue;
2016-01-27 23:18:06 +00:00
} } }
2015-11-22 01:53:43 +00:00
break; /* nothing found : store previous solution */
}
/* catch up */
if (offset) {
2016-05-31 16:13:56 +00:00
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2015-11-24 13:06:07 +00:00
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2016-07-02 23:28:16 +00:00
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2015-11-22 01:53:43 +00:00
}
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
/* check immediate repcode */
while (ip <= ilimit) {
2016-07-02 23:28:16 +00:00
const U32 repIndex = (U32)((ip-base) - offset_2);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
2016-06-13 19:43:06 +00:00
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2016-07-02 23:28:16 +00:00
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
}
break;
2016-01-27 23:18:06 +00:00
} }
2015-11-22 01:53:43 +00:00
2016-06-13 23:49:25 +00:00
/* Save reps for next block */
2017-01-20 01:33:37 +00:00
ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2016-06-13 23:49:25 +00:00
2015-11-22 01:53:43 +00:00
/* Last Literals */
2016-03-20 15:00:00 +00:00
{ size_t const lastLLSize = iend - anchor;
2015-11-22 01:53:43 +00:00
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
2016-01-23 18:28:41 +00:00
void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-22 01:53:43 +00:00
{
ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
2015-11-22 01:53:43 +00:00
}
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-22 02:12:28 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
2015-11-22 02:12:28 +00:00
}
2015-11-22 01:53:43 +00:00
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-22 11:22:04 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
2015-11-22 11:22:04 +00:00
}
2016-01-23 18:28:41 +00:00
static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2015-11-23 12:34:21 +00:00
{
2016-01-25 03:22:03 +00:00
ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
2015-11-23 12:34:21 +00:00
}
/* The optimal parser */
#include "zstd_opt.h"
static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
#ifdef ZSTD_OPT_H_91842398743
2016-10-25 10:25:07 +00:00
ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
#else
(void)ctx; (void)src; (void)srcSize;
return;
#endif
}
static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
#ifdef ZSTD_OPT_H_91842398743
ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
#else
(void)ctx; (void)src; (void)srcSize;
return;
#endif
}
2016-02-22 09:06:17 +00:00
static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2016-01-31 10:25:48 +00:00
{
#ifdef ZSTD_OPT_H_91842398743
2016-10-25 10:25:07 +00:00
ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
#else
(void)ctx; (void)src; (void)srcSize;
return;
#endif
}
static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
#ifdef ZSTD_OPT_H_91842398743
ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
#else
(void)ctx; (void)src; (void)srcSize;
return;
#endif
2016-01-31 10:25:48 +00:00
}
2015-11-21 14:27:35 +00:00
2016-01-23 18:28:41 +00:00
typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2015-11-04 11:05:27 +00:00
2016-01-26 02:14:20 +00:00
static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2015-10-31 11:57:14 +00:00
{
2016-10-25 10:25:07 +00:00
static const ZSTD_blockCompressor blockCompressor[2][8] = {
{ ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
{ ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
};
return blockCompressor[extDict][(U32)strat];
2015-10-31 11:57:14 +00:00
}
2016-03-15 00:24:33 +00:00
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-11-04 11:05:27 +00:00
{
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
const BYTE* const base = zc->base;
const BYTE* const istart = (const BYTE*)src;
const U32 current = (U32)(istart-base);
2016-02-02 13:36:49 +00:00
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
ZSTD_resetSeqStore(&(zc->seqStore));
if (current > zc->nextToUpdate + 384)
zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */
2016-01-23 18:28:41 +00:00
blockCompressor(zc, src, srcSize);
2016-03-15 00:24:33 +00:00
return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
2015-11-04 11:05:27 +00:00
}
2016-07-27 22:55:43 +00:00
/*! ZSTD_compress_generic() :
* Compress a chunk of data into one or multiple blocks.
* All blocks will be terminated, all input will be consumed.
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
* Frame is supposed already started (header already produced)
* @return : compressed size, or an error code
*/
2016-05-31 16:13:56 +00:00
static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
2016-07-27 22:55:43 +00:00
const void* src, size_t srcSize,
U32 lastFrameChunk)
2015-10-22 14:31:46 +00:00
{
2016-05-31 16:13:56 +00:00
size_t blockSize = cctx->blockSize;
2015-10-22 14:31:46 +00:00
size_t remaining = srcSize;
const BYTE* ip = (const BYTE*)src;
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
2016-07-27 19:21:36 +00:00
U32 const maxDist = 1 << cctx->params.cParams.windowLog;
2015-11-01 11:40:22 +00:00
if (cctx->params.fParams.checksumFlag && srcSize)
2016-05-31 16:13:56 +00:00
XXH64_update(&cctx->xxhState, src, srcSize);
2016-02-02 13:36:49 +00:00
while (remaining) {
2016-07-27 22:55:43 +00:00
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2015-11-04 17:19:39 +00:00
size_t cSize;
2015-10-22 14:31:46 +00:00
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
2015-11-04 17:19:39 +00:00
if (remaining < blockSize) blockSize = remaining;
2016-08-02 12:26:00 +00:00
/* preemptive overflow correction */
if (cctx->lowLimit > (3U<<29)) {
2016-12-11 23:47:30 +00:00
U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2016-12-11 23:25:07 +00:00
U32 const current = (U32)(ip - cctx->base);
2016-12-11 23:47:30 +00:00
U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
2016-12-11 23:25:07 +00:00
U32 const correction = current - newCurrent;
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
2016-08-02 12:26:00 +00:00
ZSTD_reduceIndex(cctx, correction);
cctx->base += correction;
cctx->dictBase += correction;
2016-12-11 23:25:07 +00:00
cctx->lowLimit -= correction;
2016-08-02 12:26:00 +00:00
cctx->dictLimit -= correction;
if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
else cctx->nextToUpdate -= correction;
}
if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2016-03-19 17:08:32 +00:00
/* enforce maxDist */
2016-05-31 16:13:56 +00:00
U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2015-11-24 13:06:07 +00:00
}
2016-05-31 16:13:56 +00:00
cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
if (ZSTD_isError(cSize)) return cSize;
2015-10-22 14:31:46 +00:00
2016-02-02 13:36:49 +00:00
if (cSize == 0) { /* block is not compressible */
2016-07-27 22:55:43 +00:00
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
cSize = ZSTD_blockHeaderSize+blockSize;
2016-02-02 13:36:49 +00:00
} else {
2016-07-27 22:55:43 +00:00
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader24);
2016-07-27 22:55:43 +00:00
cSize += ZSTD_blockHeaderSize;
2015-10-22 14:31:46 +00:00
}
2015-11-04 17:19:39 +00:00
remaining -= blockSize;
2016-03-15 00:24:33 +00:00
dstCapacity -= cSize;
2015-11-04 17:19:39 +00:00
ip += blockSize;
2015-10-22 14:31:46 +00:00
op += cSize;
}
2016-07-28 13:29:08 +00:00
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2015-10-22 14:31:46 +00:00
return op-ostart;
}
2016-04-12 13:52:33 +00:00
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2016-05-29 03:01:04 +00:00
ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
2016-04-12 13:52:33 +00:00
{ BYTE* const op = (BYTE*)dst;
2016-07-27 19:05:12 +00:00
U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
U32 const checksumFlag = params.fParams.checksumFlag>0;
U32 const windowSize = 1U << params.cParams.windowLog;
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2016-07-27 19:05:12 +00:00
BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
U32 const fcsCode = params.fParams.contentSizeFlag ?
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
0;
2016-07-27 19:05:12 +00:00
BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2016-05-29 03:01:04 +00:00
size_t pos;
if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2016-04-12 13:52:33 +00:00
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
op[4] = frameHeaderDecriptionByte; pos=5;
if (!singleSegment) op[pos++] = windowLogByte;
2016-05-29 03:01:04 +00:00
switch(dictIDSizeCode)
{
default: /* impossible */
case 0 : break;
case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2016-07-27 19:21:36 +00:00
case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2016-05-29 03:01:04 +00:00
case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
}
switch(fcsCode)
2016-04-12 13:52:33 +00:00
{
default: /* impossible */
case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2016-05-29 03:01:04 +00:00
case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2016-04-12 13:52:33 +00:00
}
2016-05-29 03:01:04 +00:00
return pos;
2016-04-12 13:52:33 +00:00
}
2016-08-02 12:26:00 +00:00
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2016-03-23 21:31:57 +00:00
void* dst, size_t dstCapacity,
2016-01-09 00:08:23 +00:00
const void* src, size_t srcSize,
2016-07-27 22:55:43 +00:00
U32 frame, U32 lastFrameChunk)
2015-10-22 14:31:46 +00:00
{
const BYTE* const ip = (const BYTE*) src;
2016-04-12 13:52:33 +00:00
size_t fhSize = 0;
2016-01-07 14:35:18 +00:00
2016-08-02 12:26:00 +00:00
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2016-07-27 19:21:36 +00:00
2016-08-02 12:26:00 +00:00
if (frame && (cctx->stage==ZSTDcs_init)) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
2016-04-12 13:52:33 +00:00
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
dst = (char*)dst + fhSize;
2016-08-02 12:26:00 +00:00
cctx->stage = ZSTDcs_ongoing;
2016-01-07 14:35:18 +00:00
}
2015-10-22 14:31:46 +00:00
/* Check if blocks follow each other */
2016-08-02 12:26:00 +00:00
if (src != cctx->nextSrc) {
/* not contiguous */
2016-08-02 12:26:00 +00:00
ptrdiff_t const delta = cctx->nextSrc - ip;
cctx->lowLimit = cctx->dictLimit;
cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
cctx->dictBase = cctx->base;
cctx->base -= delta;
cctx->nextToUpdate = cctx->dictLimit;
if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
}
2016-08-02 12:26:00 +00:00
/* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
cctx->lowLimit = lowLimitMax;
}
2016-08-02 12:26:00 +00:00
cctx->nextSrc = ip + srcSize;
2015-10-22 14:31:46 +00:00
if (srcSize) {
size_t const cSize = frame ?
2016-08-02 12:26:00 +00:00
ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2016-01-07 14:35:18 +00:00
if (ZSTD_isError(cSize)) return cSize;
cctx->consumedSrcSize += srcSize;
2016-04-12 13:52:33 +00:00
return cSize + fhSize;
} else
return fhSize;
2015-10-22 14:31:46 +00:00
}
2016-01-09 00:08:23 +00:00
2016-07-27 23:17:22 +00:00
size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2016-03-23 21:31:57 +00:00
void* dst, size_t dstCapacity,
2016-01-09 00:08:23 +00:00
const void* src, size_t srcSize)
{
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
2016-07-27 23:17:22 +00:00
}
2016-07-18 14:52:10 +00:00
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
2016-01-09 00:08:23 +00:00
{
2016-07-18 14:52:10 +00:00
return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
}
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2016-01-09 00:08:23 +00:00
}
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
2016-01-26 02:14:20 +00:00
static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
/* input becomes current prefix */
zc->lowLimit = zc->dictLimit;
zc->dictLimit = (U32)(zc->nextSrc - zc->base);
zc->dictBase = zc->base;
zc->base += ip - zc->nextSrc;
zc->nextToUpdate = zc->dictLimit;
zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
zc->nextSrc = iend;
2016-07-27 19:05:12 +00:00
if (srcSize <= HASH_READ_SIZE) return 0;
switch(zc->params.cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
break;
2016-07-12 07:47:31 +00:00
case ZSTD_dfast:
ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
if (srcSize >= HASH_READ_SIZE)
ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
break;
case ZSTD_btlazy2:
2016-02-15 06:21:54 +00:00
case ZSTD_btopt:
2016-10-25 10:25:07 +00:00
case ZSTD_btopt2:
if (srcSize >= HASH_READ_SIZE)
ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
break;
default:
return ERROR(GENERIC); /* strategy doesn't exist; impossible */
}
[zstdmt] Fix MSAN failure with ZSTD_p_forceWindow Reproduction steps: ``` make zstreamtest CC=clang CFLAGS="-O3 -g -fsanitize=memory -fsanitize-memory-track-origins" ./zstreamtest -vv -t4178 -i4178 -s4531 ``` How to get to the error in gdb (may be a more efficient way): * 2 breaks at zstd_compress.c:2418 -- in ZSTD_compressContinue_internal() * 2 breaks at zstd_compress.c:2276 -- in ZSTD_compressBlock_internal() * 1 break at zstd_compress.c:1547 Why the error occurred: When `zc->forceWindow == 1`, after calling `ZSTD_loadDictionaryContent()` we have `zc->loadedDictEnd == zc->nextToUpdate == 0`. But, we've really loaded up to `iend` into the dictionary. Then in `ZSTD_compressBlock_internal()` we see that `current > zc->nextToUpdate + 384`, so we load the last 192 bytes a second time. In this case the bytes we are loading are a block of all 0s, starting in the previous block. So when we are loading the last 192 bytes, we find a `match` in the future, 183 bytes beyond `ip`. Since the block is all 0s, the match extends to the end of the block. But in `ZSTD_count()` we only check that `pIn < pInLoopLimit`, but since `pMatch > pIn`, `pMatch` eventually points past the end of the buffer, causing the MSAN failure. The fix: The line changed sets sets `zc->nextToUpdate` to the end of the dictionary. This is the behavior that existed before `ZSTD_p_forceWindow` was introduced. This fixes the exposing test case. Since the code doesn't fail without `zc->forceWindow`, it makes sense that this works. I've run the command `./zstreamtest -T2mn` 64 times without failures. CI should also verify nothing obvious broke.
2017-02-14 02:27:34 +00:00
zc->nextToUpdate = (U32)(iend - zc->base);
return 0;
}
2015-10-22 14:31:46 +00:00
/* Dictionaries that assign zero probability to symbols that show up causes problems
when FSE encoding. Refuse dictionaries that assign zero probability to symbols
that we may encounter during compression.
NOTE: This behavior is not standard and could be improved in the future. */
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
U32 s;
if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
for (s = 0; s <= maxSymbolValue; ++s) {
if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
}
return 0;
}
2016-01-26 02:14:20 +00:00
/* Dictionary format :
* See :
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
*/
/*! ZSTD_loadZstdDictionary() :
* @return : 0, or an error code
* assumptions : magic number supposed already checked
* dictSize supposed > 8
*/
static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2016-01-26 02:14:20 +00:00
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff;
BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
2016-05-31 16:13:56 +00:00
dictPtr += 4; /* skip magic number */
cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
dictPtr += 4;
{ size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ unsigned offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
dictionary_corrupted);
dictPtr += offcodeHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ short matchlengthNCount[MaxML+1];
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
2016-05-31 16:13:56 +00:00
}
{ short litlengthNCount[MaxLL+1];
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2016-05-31 16:13:56 +00:00
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
Fix buffer overrun in ZSTD_loadDictEntropyStats() The table log set by `FSE_readNCount()` was not checked in `ZSTD_loadDictEntropyStats()`. This caused `FSE_buildCTable()` to stack/heap overflow in a few places. The benchmarks look good, there is no obvious compression performance regression: > ./zstds/zstd.opt.0 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 271.6 MB/s , 716.8 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 204.8 MB/s , 671.1 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 156.8 MB/s , 658.6 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 136.4 MB/s , 665.3 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 98.9 MB/s , 647.0 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 65.2 MB/s , 670.4 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.9 MB/s , 688.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 32.4 MB/s , 711.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.1 MB/s , 718.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 22.2 MB/s , 721.8 MB/s > ./compress_zstds/zstd.opt.1 -i10 -b1 -e10 ~/bench/silesia.tar 1#silesia.tar : 211988480 -> 73656930 (2.878), 273.8 MB/s , 722.0 MB/s 2#silesia.tar : 211988480 -> 70162842 (3.021), 203.2 MB/s , 666.6 MB/s 3#silesia.tar : 211988480 -> 66997986 (3.164), 157.4 MB/s , 666.5 MB/s 4#silesia.tar : 211988480 -> 66002591 (3.212), 132.1 MB/s , 661.9 MB/s 5#silesia.tar : 211988480 -> 65008480 (3.261), 96.8 MB/s , 641.6 MB/s 6#silesia.tar : 211988480 -> 62979643 (3.366), 63.1 MB/s , 677.0 MB/s 7#silesia.tar : 211988480 -> 61974560 (3.421), 44.3 MB/s , 678.2 MB/s 8#silesia.tar : 211988480 -> 61028308 (3.474), 33.1 MB/s , 708.9 MB/s 9#silesia.tar : 211988480 -> 60416751 (3.509), 21.5 MB/s , 710.1 MB/s 10#silesia.tar : 211988480 -> 60174239 (3.523), 21.9 MB/s , 723.9 MB/s
2016-10-17 23:55:52 +00:00
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
dictionary_corrupted);
dictPtr += litlengthHeaderSize;
2016-05-31 16:13:56 +00:00
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
cctx->rep[0] = MEM_readLE32(dictPtr+0);
cctx->rep[1] = MEM_readLE32(dictPtr+4);
cctx->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
U32 offcodeMax = MaxOff;
if (dictContentSize <= ((U32)-1) - 128 KB) {
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
2016-10-24 21:11:27 +00:00
}
/* All offset values <= dictContentSize + 128 KB must be representable */
CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
/* All repCodes must be <= dictContentSize and != 0*/
{ U32 u;
for (u=0; u<3; u++) {
if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
} }
cctx->fseCTables_ready = 1;
cctx->hufCTable_repeatMode = HUF_repeat_valid;
return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
}
2016-01-26 02:14:20 +00:00
}
2016-03-15 00:24:33 +00:00
/** ZSTD_compress_insertDictionary() :
* @return : 0, or an error code */
static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2016-01-26 02:14:20 +00:00
{
2016-05-29 03:01:04 +00:00
if ((dict==NULL) || (dictSize<=8)) return 0;
2016-01-26 14:58:49 +00:00
/* dict as pure content */
if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
2016-03-15 00:24:33 +00:00
/* dict as zstd dictionary */
return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
2016-01-07 14:35:18 +00:00
}
2016-04-01 13:48:48 +00:00
/*! ZSTD_compressBegin_internal() :
2016-01-07 14:35:18 +00:00
* @return : 0, or an error code */
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2016-01-26 15:31:22 +00:00
const void* dict, size_t dictSize,
ZSTD_parameters params, U64 pledgedSrcSize)
2015-10-22 14:31:46 +00:00
{
ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, crp));
return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
2015-10-22 14:31:46 +00:00
}
2015-10-25 13:06:35 +00:00
2016-04-01 13:48:48 +00:00
/*! ZSTD_compressBegin_advanced() :
* @return : 0, or an error code */
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2016-04-01 13:48:48 +00:00
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
2016-04-01 13:48:48 +00:00
{
/* compression parameters verification and optimization */
CHECK_F(ZSTD_checkCParams(params.cParams));
return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
2016-04-01 13:48:48 +00:00
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2016-01-26 02:14:20 +00:00
{
2016-06-27 13:28:45 +00:00
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
2016-01-26 15:31:22 +00:00
}
2015-11-25 13:42:45 +00:00
2016-04-04 10:10:00 +00:00
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2015-11-25 13:42:45 +00:00
{
return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2015-10-25 13:06:35 +00:00
}
2016-07-28 13:29:08 +00:00
/*! ZSTD_writeEpilogue() :
* Ends a frame.
2015-11-25 13:42:45 +00:00
* @return : nb of bytes written into dst (or an error code) */
2016-07-28 13:29:08 +00:00
static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{
2016-07-27 22:55:43 +00:00
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
2016-04-12 13:52:33 +00:00
size_t fhSize = 0;
if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
/* special case : empty frame */
2016-07-27 22:55:43 +00:00
if (cctx->stage == ZSTDcs_init) {
2016-05-31 16:13:56 +00:00
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
2016-04-12 13:52:33 +00:00
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
op += fhSize;
2016-07-27 19:05:12 +00:00
cctx->stage = ZSTDcs_ongoing;
2016-01-07 14:35:18 +00:00
}
2016-07-27 22:55:43 +00:00
if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, cBlockHeader24);
op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
}
if (cctx->params.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, checksum);
op += 4;
2016-05-31 16:13:56 +00:00
}
2016-07-27 19:05:12 +00:00
cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
2016-07-27 22:55:43 +00:00
return op-ostart;
}
2016-07-28 13:29:08 +00:00
size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
size_t endResult;
size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */);
2016-07-28 13:29:08 +00:00
if (ZSTD_isError(cSize)) return cSize;
endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
if (ZSTD_isError(endResult)) return endResult;
if (cctx->params.fParams.contentSizeFlag) { /* control src size */
if (cctx->frameContentSize != cctx->consumedSrcSize) return ERROR(srcSize_wrong);
}
2016-07-28 13:29:08 +00:00
return cSize + endResult;
}
2016-07-27 23:25:46 +00:00
static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2016-03-15 00:24:33 +00:00
void* dst, size_t dstCapacity,
2015-11-25 13:42:45 +00:00
const void* src, size_t srcSize,
2015-12-18 00:26:48 +00:00
const void* dict,size_t dictSize,
2015-11-25 13:42:45 +00:00
ZSTD_parameters params)
2015-10-22 14:31:46 +00:00
{
2016-09-06 13:36:19 +00:00
CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
2016-07-28 13:29:08 +00:00
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2015-10-22 14:31:46 +00:00
}
size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params)
{
CHECK_F(ZSTD_checkCParams(params.cParams));
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
const void* dict, size_t dictSize, int compressionLevel)
2015-12-18 00:26:48 +00:00
{
2016-11-03 22:52:01 +00:00
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
params.fParams.contentSizeFlag = 1;
return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2015-12-18 00:26:48 +00:00
}
2016-03-15 00:24:33 +00:00
size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2015-10-25 13:06:35 +00:00
{
return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2015-10-25 13:06:35 +00:00
}
2016-03-15 00:24:33 +00:00
size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2015-10-22 14:31:46 +00:00
{
2015-10-29 21:02:40 +00:00
size_t result;
ZSTD_CCtx ctxBody;
2015-10-29 17:41:45 +00:00
memset(&ctxBody, 0, sizeof(ctxBody));
memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
2016-03-15 00:24:33 +00:00
result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
2015-10-29 21:02:40 +00:00
return result;
2015-10-22 14:31:46 +00:00
}
2015-12-17 22:50:15 +00:00
/* ===== Dictionary API ===== */
struct ZSTD_CDict_s {
2016-12-21 15:20:11 +00:00
void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
ZSTD_CCtx* refContext;
}; /* typedef'd tp ZSTD_CDict within "zstd.h" */
size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
2016-12-23 21:25:03 +00:00
return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
2016-12-21 15:20:11 +00:00
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
ZSTD_parameters params, ZSTD_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree) return NULL;
{ ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2016-12-21 15:20:11 +00:00
if (!cdict || !cctx) {
ZSTD_free(cdict, customMem);
2017-02-21 17:59:56 +00:00
ZSTD_freeCCtx(cctx);
return NULL;
}
2016-12-21 15:20:11 +00:00
if ((byReference) || (!dictBuffer) || (!dictSize)) {
cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
2016-12-21 15:44:35 +00:00
if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
2016-12-21 15:20:11 +00:00
memcpy(internalBuffer, dictBuffer, dictSize);
cdict->dictBuffer = internalBuffer;
cdict->dictContent = internalBuffer;
}
2016-12-21 15:20:11 +00:00
{ size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
if (ZSTD_isError(errorCode)) {
2016-12-21 15:20:11 +00:00
ZSTD_free(cdict->dictBuffer, customMem);
ZSTD_free(cdict, customMem);
2017-02-21 17:59:56 +00:00
ZSTD_freeCCtx(cctx);
return NULL;
} }
cdict->refContext = cctx;
2016-12-21 15:20:11 +00:00
cdict->dictContentSize = dictSize;
return cdict;
}
}
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
params.fParams.contentSizeFlag = 1;
2016-12-21 15:20:11 +00:00
return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
params.fParams.contentSizeFlag = 1;
return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
2016-09-21 14:46:08 +00:00
{ ZSTD_customMem const cMem = cdict->refContext->customMem;
ZSTD_freeCCtx(cdict->refContext);
2016-12-21 15:44:35 +00:00
ZSTD_free(cdict->dictBuffer, cMem);
ZSTD_free(cdict, cMem);
return 0;
}
}
static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
return ZSTD_getParamsFromCCtx(cdict->refContext);
}
/* ZSTD_compressBegin_usingCDict_advanced() :
* cdict must be != NULL */
size_t ZSTD_compressBegin_usingCDict_advanced(
ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
2016-09-15 12:54:07 +00:00
{
if (cdict==NULL) return ERROR(GENERIC); /* does not support NULL cdict */
if (cdict->dictContentSize)
CHECK_F( ZSTD_copyCCtx_internal(cctx, cdict->refContext, fParams, pledgedSrcSize) )
else {
ZSTD_parameters params = cdict->refContext->params;
params.fParams = fParams;
CHECK_F(ZSTD_compressBegin_internal(cctx, NULL, 0, params, pledgedSrcSize));
}
2016-09-15 12:54:07 +00:00
return 0;
}
/* ZSTD_compressBegin_usingCDict() :
* pledgedSrcSize=0 means "unknown"
* if pledgedSrcSize>0, it will enable contentSizeFlag */
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
{
ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
fParams.contentSizeFlag = (pledgedSrcSize > 0);
return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, pledgedSrcSize);
}
/*! ZSTD_compress_usingCDict() :
* Compression using a digested Dictionary.
* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
* Note that compression parameters are decided at CDict creation time
* while frame parameters are hardcoded */
2016-09-15 12:54:07 +00:00
size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
2016-08-12 11:04:27 +00:00
/* ******************************************************************
* Streaming
********************************************************************/
2016-08-11 23:20:36 +00:00
typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
struct ZSTD_CStream_s {
2016-09-15 12:11:01 +00:00
ZSTD_CCtx* cctx;
ZSTD_CDict* cdictLocal;
const ZSTD_CDict* cdict;
2016-08-11 23:20:36 +00:00
char* inBuff;
size_t inBuffSize;
size_t inToCompress;
size_t inBuffPos;
size_t inBuffTarget;
size_t blockSize;
char* outBuff;
size_t outBuffSize;
size_t outBuffContentSize;
size_t outBuffFlushedSize;
ZSTD_cStreamStage stage;
U32 checksum;
U32 frameEnded;
U64 pledgedSrcSize;
ZSTD_parameters params;
2016-08-11 23:20:36 +00:00
ZSTD_customMem customMem;
}; /* typedef'd to ZSTD_CStream within "zstd.h" */
ZSTD_CStream* ZSTD_createCStream(void)
{
return ZSTD_createCStream_advanced(defaultCustomMem);
}
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
{
ZSTD_CStream* zcs;
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree) return NULL;
2016-08-11 23:20:36 +00:00
zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
2016-08-11 23:20:36 +00:00
if (zcs==NULL) return NULL;
memset(zcs, 0, sizeof(ZSTD_CStream));
memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
2016-09-15 12:11:01 +00:00
zcs->cctx = ZSTD_createCCtx_advanced(customMem);
if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
2016-08-11 23:20:36 +00:00
return zcs;
}
size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
{
if (zcs==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = zcs->customMem;
2016-09-15 12:11:01 +00:00
ZSTD_freeCCtx(zcs->cctx);
2017-03-21 20:20:59 +00:00
zcs->cctx = NULL;
ZSTD_freeCDict(zcs->cdictLocal);
2017-03-21 20:20:59 +00:00
zcs->cdictLocal = NULL;
ZSTD_free(zcs->inBuff, cMem);
2017-03-21 20:20:59 +00:00
zcs->inBuff = NULL;
ZSTD_free(zcs->outBuff, cMem);
2017-03-21 20:20:59 +00:00
zcs->outBuff = NULL;
ZSTD_free(zcs, cMem);
return 0;
}
2016-08-11 23:20:36 +00:00
}
2016-08-12 11:04:27 +00:00
/*====== Initialization ======*/
size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
size_t ZSTD_CStreamOutSize(void)
{
return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
}
2016-08-11 23:20:36 +00:00
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2016-09-15 12:54:07 +00:00
{
if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict_advanced(zcs->cctx, zcs->cdict, zcs->params.fParams, pledgedSrcSize))
else CHECK_F(ZSTD_compressBegin_internal(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
2016-09-15 12:54:07 +00:00
zcs->inToCompress = 0;
zcs->inBuffPos = 0;
zcs->inBuffTarget = zcs->blockSize;
zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
zcs->stage = zcss_load;
zcs->frameEnded = 0;
zcs->pledgedSrcSize = pledgedSrcSize;
2016-09-15 12:54:07 +00:00
return 0; /* ready to go */
}
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
{
zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
}
/* ZSTD_initCStream_internal() :
* params are supposed validated at this stage
* and zcs->cdict is supposed to be correct */
static size_t ZSTD_initCStream_stage2(ZSTD_CStream* zcs,
const ZSTD_parameters params,
unsigned long long pledgedSrcSize)
2016-08-11 23:20:36 +00:00
{
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2016-08-11 23:20:36 +00:00
/* allocate buffers */
{ size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
if (zcs->inBuffSize < neededInBuffSize) {
zcs->inBuffSize = 0;
ZSTD_free(zcs->inBuff, zcs->customMem);
zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
2016-08-11 23:20:36 +00:00
if (zcs->inBuff == NULL) return ERROR(memory_allocation);
zcs->inBuffSize = neededInBuffSize;
2016-08-11 23:20:36 +00:00
}
zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
}
if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
size_t const outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
zcs->outBuffSize = 0;
ZSTD_free(zcs->outBuff, zcs->customMem);
zcs->outBuff = (char*) ZSTD_malloc(outBuffSize, zcs->customMem);
2016-08-11 23:20:36 +00:00
if (zcs->outBuff == NULL) return ERROR(memory_allocation);
zcs->outBuffSize = outBuffSize;
2016-08-11 23:20:36 +00:00
}
zcs->checksum = params.fParams.checksumFlag > 0;
zcs->params = params;
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
}
/* note : cdict must outlive compression session */
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
{
if (!cdict) return ERROR(GENERIC); /* cannot handle NULL cdict (does not know what to do) */
{ ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
params.fParams.contentSizeFlag = 0;
zcs->cdict = cdict;
return ZSTD_initCStream_stage2(zcs, params, 0);
}
}
static size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
zcs->cdict = NULL;
if (dict && dictSize >= 8) {
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params, zcs->customMem);
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
zcs->cdict = zcs->cdictLocal;
}
2016-09-15 12:54:07 +00:00
return ZSTD_initCStream_stage2(zcs, params, pledgedSrcSize);
2016-08-11 23:20:36 +00:00
}
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
CHECK_F( ZSTD_checkCParams(params.cParams) );
return ZSTD_initCStream_internal(zcs, dict, dictSize, params, pledgedSrcSize);
}
2016-08-11 23:20:36 +00:00
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
return ZSTD_initCStream_internal(zcs, dict, dictSize, params, 0);
2016-08-11 23:20:36 +00:00
}
size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
{
ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
params.fParams.contentSizeFlag = (pledgedSrcSize>0);
return ZSTD_initCStream_internal(zcs, NULL, 0, params, pledgedSrcSize);
}
2016-08-11 23:20:36 +00:00
size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
return ZSTD_initCStream_internal(zcs, NULL, 0, params, 0);
2016-08-11 23:20:36 +00:00
}
2016-08-22 23:18:06 +00:00
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
2016-08-22 22:30:31 +00:00
{
if (zcs==NULL) return 0; /* support sizeof on NULL */
2017-03-08 21:45:10 +00:00
return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
2016-08-22 22:30:31 +00:00
}
2016-08-11 23:20:36 +00:00
2016-08-12 11:04:27 +00:00
/*====== Compression ======*/
2016-08-11 23:20:36 +00:00
typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
size_t const length = MIN(dstCapacity, srcSize);
memcpy(dst, src, length);
return length;
}
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
void* dst, size_t* dstCapacityPtr,
const void* src, size_t* srcSizePtr,
ZSTD_flush_e const flush)
{
U32 someMoreWork = 1;
const char* const istart = (const char*)src;
const char* const iend = istart + *srcSizePtr;
const char* ip = istart;
char* const ostart = (char*)dst;
char* const oend = ostart + *dstCapacityPtr;
char* op = ostart;
while (someMoreWork) {
switch(zcs->stage)
{
case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
case zcss_load:
/* complete inBuffer */
{ size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
zcs->inBuffPos += loaded;
ip += loaded;
if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */
} }
/* compress current block (note : this stage cannot be stopped in the middle) */
{ void* cDst;
size_t cSize;
size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
size_t oSize = oend-op;
if (oSize >= ZSTD_compressBound(iSize))
cDst = op; /* compress directly into output buffer (avoid flush stage) */
else
cDst = zcs->outBuff, oSize = zcs->outBuffSize;
cSize = (flush == zsf_end) ?
2016-09-15 12:11:01 +00:00
ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
2016-08-11 23:20:36 +00:00
if (ZSTD_isError(cSize)) return cSize;
if (flush == zsf_end) zcs->frameEnded = 1;
/* prepare next block */
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
if (zcs->inBuffTarget > zcs->inBuffSize)
zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
zcs->inToCompress = zcs->inBuffPos;
if (cDst == op) { op += cSize; break; } /* no need to flush */
zcs->outBuffContentSize = cSize;
zcs->outBuffFlushedSize = 0;
zcs->stage = zcss_flush; /* pass-through to flush stage */
}
case zcss_flush:
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
op += flushed;
zcs->outBuffFlushedSize += flushed;
if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
zcs->stage = zcss_load;
break;
}
case zcss_final:
someMoreWork = 0; /* do nothing */
break;
default:
return ERROR(GENERIC); /* impossible */
}
}
*srcSizePtr = ip - istart;
*dstCapacityPtr = op - ostart;
if (zcs->frameEnded) return 0;
{ size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
if (hintInSize==0) hintInSize = zcs->blockSize;
return hintInSize;
}
}
2016-08-16 23:39:22 +00:00
size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
size_t sizeRead = input->size - input->pos;
size_t sizeWritten = output->size - output->pos;
size_t const result = ZSTD_compressStream_generic(zcs,
(char*)(output->dst) + output->pos, &sizeWritten,
(const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
input->pos += sizeRead;
output->pos += sizeWritten;
2016-08-11 23:20:36 +00:00
return result;
}
2016-08-12 11:04:27 +00:00
/*====== Finalize ======*/
2016-08-11 23:20:36 +00:00
/*! ZSTD_flushStream() :
* @return : amount of data remaining to flush */
2016-08-16 23:39:22 +00:00
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2016-08-11 23:20:36 +00:00
{
size_t srcSize = 0;
2016-08-16 23:39:22 +00:00
size_t sizeWritten = output->size - output->pos;
2016-08-16 23:50:54 +00:00
size_t const result = ZSTD_compressStream_generic(zcs,
2016-09-06 14:38:51 +00:00
(char*)(output->dst) + output->pos, &sizeWritten,
&srcSize, &srcSize, /* use a valid src address instead of NULL */
2016-08-16 23:50:54 +00:00
zsf_flush);
2016-08-16 23:39:22 +00:00
output->pos += sizeWritten;
2016-08-11 23:20:36 +00:00
if (ZSTD_isError(result)) return result;
return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
}
2016-08-16 23:39:22 +00:00
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2016-08-11 23:20:36 +00:00
{
2016-08-16 23:39:22 +00:00
BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
BYTE* const oend = (BYTE*)(output->dst) + output->size;
2016-08-11 23:20:36 +00:00
BYTE* op = ostart;
if (zcs->stage != zcss_final) {
/* flush whatever remains */
size_t srcSize = 0;
2016-08-16 23:39:22 +00:00
size_t sizeWritten = output->size - output->pos;
size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten,
&srcSize /* use a valid src address instead of NULL */, &srcSize, zsf_end);
2016-08-11 23:20:36 +00:00
size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
op += sizeWritten;
if (remainingToFlush) {
2016-08-16 23:39:22 +00:00
output->pos += sizeWritten;
2016-08-11 23:20:36 +00:00
return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
}
/* create epilogue */
zcs->stage = zcss_final;
zcs->outBuffContentSize = !notEnded ? 0 :
/* write epilogue, including final empty block, into outBuff */
ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0);
if (ZSTD_isError(zcs->outBuffContentSize)) return zcs->outBuffContentSize;
2016-08-11 23:20:36 +00:00
}
/* flush epilogue */
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
op += flushed;
zcs->outBuffFlushedSize += flushed;
2016-08-16 23:39:22 +00:00
output->pos += op-ostart;
2016-08-11 23:20:36 +00:00
if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */
return toFlush - flushed;
}
}
2016-02-10 12:37:52 +00:00
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_DEFAULT_CLEVEL 1
#define ZSTD_MAX_CLEVEL 22
int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" */
2016-04-09 18:32:00 +00:00
/* W, C, H, S, L, TL, strat */
2016-08-26 18:02:49 +00:00
{ 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
2016-07-13 12:56:24 +00:00
{ 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
{ 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
2016-08-26 18:02:49 +00:00
{ 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/
{ 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/
2016-07-13 12:56:24 +00:00
{ 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */
{ 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
{ 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */
2016-08-26 18:02:49 +00:00
{ 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
2016-07-13 12:56:24 +00:00
{ 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
{ 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
{ 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
{ 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
2016-08-26 18:02:49 +00:00
{ 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
{ 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */
{ 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
2016-10-25 10:25:07 +00:00
{ 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */
{ 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */
{ 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
2016-08-26 18:02:49 +00:00
{ 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
2016-08-24 17:42:15 +00:00
{ 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
2016-08-24 12:22:26 +00:00
{ 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
{ 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
{ 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
{ 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
{ 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
{ 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
{ 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
{ 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
{ 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
2016-04-08 10:36:19 +00:00
{ 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
2016-08-24 12:22:26 +00:00
{ 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
{ 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
2016-04-08 10:36:19 +00:00
{ 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
{ 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
2016-10-25 10:25:07 +00:00
{ 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/
{ 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/
{ 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
2016-07-22 12:36:46 +00:00
{ 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
{ 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
{ 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
{ 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
{ 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
{ 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
{ 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
{ 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
{ 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
{ 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
{ 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
{ 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
{ 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
{ 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
2016-10-25 10:25:07 +00:00
{ 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/
{ 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/
{ 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
{ 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
2016-07-17 14:21:37 +00:00
{ 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
{ 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
{ 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
{ 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
{ 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
{ 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
{ 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
{ 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
{ 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
{ 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
{ 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
{ 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
{ 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
{ 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
{ 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
{ 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
2016-10-25 10:25:07 +00:00
{ 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/
{ 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/
{ 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/
},
};
/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
* Size values are optional, provide 0 if not known or unused */
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
{
ZSTD_compressionParameters cp;
size_t const addedSize = srcSize ? 0 : 500;
U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
cp = ZSTD_defaultCParameters[tableID][compressionLevel];
if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
2016-04-04 11:49:18 +00:00
if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
}
cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
return cp;
}
/*! ZSTD_getParams() :
2016-07-12 11:42:10 +00:00
* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
* All fields of `ZSTD_frameParameters` are set to default (0) */
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
ZSTD_parameters params;
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
memset(&params, 0, sizeof(params));
params.cParams = cParams;
return params;
}