Always check Huffman tables for ZSTD_lazy+

The compressor always reuses the existing Huffman table if the literals
size is at most 1 KiB. If the compression strategy is `ZSTD_lazy` or
stronger always check to see if reusing the previous table or creating
a new table is better.

This doesn't yet weigh in decompression speed. I don't want to add any
heuristics there until I have real data to work with to ensure that the
heuristic works for at least one use case, preferably more.
This commit is contained in:
Nick Terrell 2017-03-03 12:30:24 -08:00
parent f44b55c18d
commit 54c4babd8f
3 changed files with 19 additions and 15 deletions

View File

@ -180,8 +180,9 @@ typedef enum {
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
@ -233,8 +234,9 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
/** HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */

View File

@ -552,7 +552,8 @@ static size_t HUF_compress_internal (
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream,
void* workSpace, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat)
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
{
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
@ -580,7 +581,7 @@ static size_t HUF_compress_internal (
wkspSize -= CTableSize;
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
if (srcSize <= 1024 && repeat && *repeat == HUF_repeat_valid) {
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
}
@ -595,7 +596,7 @@ static size_t HUF_compress_internal (
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (srcSize <= 1024 && repeat && *repeat != HUF_repeat_none) {
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
}
@ -632,16 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat)
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
}
size_t HUF_compress1X (void* dst, size_t dstSize,
@ -657,16 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
}
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat)
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
}
size_t HUF_compress2 (void* dst, size_t dstSize,

View File

@ -505,9 +505,10 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
{ HUF_repeat repeat = zc->flagStaticHufTable;
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat);
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
}