Always check Huffman tables for ZSTD_lazy+
The compressor always reuses the existing Huffman table if the literals size is at most 1 KiB. If the compression strategy is `ZSTD_lazy` or stronger always check to see if reusing the previous table or creating a new table is better. This doesn't yet weigh in decompression speed. I don't want to add any heuristics there until I have real data to work with to ensure that the heuristic works for at least one use case, preferably more.
This commit is contained in:
parent
f44b55c18d
commit
54c4babd8f
@ -180,8 +180,9 @@ typedef enum {
|
||||
/** HUF_compress4X_repeat() :
|
||||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
|
||||
/** HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
@ -233,8 +234,9 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
|
||||
/** HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||
|
@ -552,7 +552,8 @@ static size_t HUF_compress_internal (
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
unsigned singleStream,
|
||||
void* workSpace, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat)
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
@ -580,7 +581,7 @@ static size_t HUF_compress_internal (
|
||||
wkspSize -= CTableSize;
|
||||
|
||||
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
|
||||
if (srcSize <= 1024 && repeat && *repeat == HUF_repeat_valid) {
|
||||
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
|
||||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
||||
}
|
||||
|
||||
@ -595,7 +596,7 @@ static size_t HUF_compress_internal (
|
||||
*repeat = HUF_repeat_none;
|
||||
}
|
||||
/* Heuristic : use existing table for small inputs */
|
||||
if (srcSize <= 1024 && repeat && *repeat != HUF_repeat_none) {
|
||||
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
|
||||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
||||
}
|
||||
|
||||
@ -632,16 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL);
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat);
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||
@ -657,16 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL);
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat);
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
||||
}
|
||||
|
||||
size_t HUF_compress2 (void* dst, size_t dstSize,
|
||||
|
@ -505,9 +505,10 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
||||
|
||||
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
|
||||
{ HUF_repeat repeat = zc->flagStaticHufTable;
|
||||
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
||||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
||||
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat)
|
||||
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat);
|
||||
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
|
||||
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
|
||||
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
|
||||
else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user