Merge pull request #1047 from facebook/hufCompress

removed huf_compress_impl.h
This commit is contained in:
Yann Collet 2018-03-15 14:14:03 -07:00 committed by GitHub
commit 192542b63c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 273 additions and 259 deletions

View File

@ -345,7 +345,7 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* s
*/ */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
/*! FSE_count_simple /*! FSE_count_simple() :
* Same as FSE_countFast(), but does not use any additional memory (not even on stack). * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
*/ */

View File

@ -58,31 +58,32 @@ extern "C" {
#endif #endif
/* *** simple functions *** */ /* ========================== */
/** /* *** simple functions *** */
HUF_compress() : /* ========================== */
Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
'dst' buffer must be already allocated. /** HUF_compress() :
Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
`srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. * 'dst' buffer must be already allocated.
@return : size of compressed data (<= `dstCapacity`). * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
if HUF_isError(return), compression failed (more details using HUF_getErrorName()) * @return : size of compressed data (<= `dstCapacity`).
*/ * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize); const void* src, size_t srcSize);
/** /** HUF_decompress() :
HUF_decompress() : * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', * into already allocated buffer 'dst', of minimum size 'dstSize'.
into already allocated buffer 'dst', of minimum size 'dstSize'. * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
`originalSize` : **must** be the ***exact*** size of original (uncompressed) data. * Note : in contrast with FSE, HUF_decompress can regenerate
Note : in contrast with FSE, HUF_decompress can regenerate * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, * because it knows size to regenerate (originalSize).
because it knows size to regenerate (originalSize). * @return : size of regenerated data (== originalSize),
@return : size of regenerated data (== originalSize), * or an error code, which can be tested using HUF_isError()
or an error code, which can be tested using HUF_isError() */
*/
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize); const void* cSrc, size_t cSrcSize);
@ -99,30 +100,22 @@ HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error c
/* *** Advanced function *** */ /* *** Advanced function *** */
/** HUF_compress2() : /** HUF_compress2() :
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`. * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() : /** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`. * Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as following macro */ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE (6 << 10) #define HUF_WORKSPACE_SIZE (6 << 10)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
/** unsigned maxSymbolValue, unsigned tableLog,
* The minimum workspace size for the `workSpace` used in void* workSpace, size_t wkspSize);
* HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
*
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
* Buffer overflow errors may potentially occur if code modifications result in
* a required workspace size greater than that specified in the following
* macro.
*/
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#endif /* HUF_H_298734234 */ #endif /* HUF_H_298734234 */
@ -132,7 +125,7 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
* which shall never be used in the context of a dynamic library, * which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future. * because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking. * Only consider them in association with static linking.
*******************************************************************/ * *****************************************************************/
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) #if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY #define HUF_H_HUF_STATIC_LINKING_ONLY
@ -192,24 +185,23 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
/* **************************************** /* ****************************************
* HUF detailed API * HUF detailed API
******************************************/ * ****************************************/
/*!
HUF_compress() does the following:
1. count symbol occurrence from source[] into table count[] using FSE_count()
2. (optional) refine tableLog using HUF_optimalTableLog()
3. build Huffman table from count using HUF_buildCTable()
4. save Huffman table to memory buffer using HUF_writeCTable()
5. encode the data stream using HUF_compress4X_usingCTable()
The following API allows targeting specific sub-functions for advanced tasks. /*! HUF_compress() does the following:
For example, it's possible to compress several blocks using the same 'CTable', * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
or to save and regenerate 'CTable' using external methods. * 2. (optional) refine tableLog using HUF_optimalTableLog()
*/ * 3. build Huffman table from count using HUF_buildCTable()
/* FSE_count() : exposed within "fse.h" */ * 4. save Huffman table to memory buffer using HUF_writeCTable()
* 5. encode the data stream using HUF_compress4X_usingCTable()
*
* The following API allows targeting specific sub-functions for advanced tasks.
* For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap, in which case, CTable will overwrite count content */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@ -219,47 +211,65 @@ typedef enum {
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
} HUF_repeat; } HUF_repeat;
/** HUF_compress4X_repeat() : /** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid. */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
/** HUF_buildCTable_wksp() : /** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/ */
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
/*! HUF_readStats() : /*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable(). * Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer. * `huffWeight` is destination buffer.
@return : size read from `src` , or an error Code . * @return : size read from `src` , or an error Code .
Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize); const void* src, size_t srcSize);
/** HUF_readCTable() : /** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */ * Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/* /*
HUF_decompress() does the following: * HUF_decompress() does the following:
1. select the decompression algorithm (X2, X4) based on pre-computed heuristics * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
2. build Huffman table from save, using HUF_readDTableXn() * 2. build Huffman table from save, using HUF_readDTableX?()
3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
*/ */
/** HUF_selectDecoder() : /** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster, * Tells which decoder is likely to decode faster,
* based on a set of pre-determined metrics. * based on a set of pre-computed metrics.
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
/**
* The minimum workspace size for the `workSpace` used in
* HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
*
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
* Buffer overflow errors may potentially occur if code modifications result in
* a required workspace size greater than that specified in the following
* macro.
*/
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
@ -270,17 +280,23 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* ====================== */
/* single stream variants */ /* single stream variants */
/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/** HUF_compress1X_repeat() : /** HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid. */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
@ -297,7 +313,7 @@ size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* c
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* BMI2 variants. /* BMI2 variants.
* If the CPU has BMI2 support pass bmi2=1, otherwise pass bmi2=0. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/ */
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);

View File

@ -292,7 +292,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
It doesn't use any additional memory. It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements. For this reason, prefer using a table `count` with 256 elements.
@return : count of most numerous element @return : count of most numerous element.
*/ */
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize) const void* src, size_t srcSize)
@ -305,7 +305,10 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) count[*ip++]++; while (ip<end) {
assert(*ip <= maxSymbolValue);
count[*ip++]++;
}
while (!count[maxSymbolValue]) maxSymbolValue--; while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue; *maxSymbolValuePtr = maxSymbolValue;
@ -318,7 +321,8 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
/* FSE_count_parallel_wksp() : /* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer. * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
* @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t FSE_count_parallel_wksp( static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr, unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, const void* source, size_t sourceSize,
@ -333,7 +337,7 @@ static size_t FSE_count_parallel_wksp(
U32* const Counting3 = Counting2 + 256; U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256; U32* const Counting4 = Counting3 + 256;
memset(Counting1, 0, 4*256*sizeof(unsigned)); memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */ /* safety checks */
if (!sourceSize) { if (!sourceSize) {
@ -379,7 +383,9 @@ static size_t FSE_count_parallel_wksp(
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} } } }
{ U32 s; for (s=0; s<=maxSymbolValue; s++) { { U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s]; if (count[s] > max) max = count[s];
} } } }
@ -393,9 +399,11 @@ static size_t FSE_count_parallel_wksp(
* Same as FSE_countFast(), but using an externally provided scratch buffer. * Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */ * `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace) const void* source, size_t sourceSize,
unsigned* workSpace)
{ {
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); if (sourceSize < 1500) /* heuristic threshold */
return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
} }

View File

@ -440,59 +440,169 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
FORCE_INLINE_TEMPLATE void
#define FUNCTION(fn) fn##_default HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
#define TARGET
#include "huf_compress_impl.h"
#undef TARGET
#undef FUNCTION
#if DYNAMIC_BMI2
#define FUNCTION(fn) fn##_bmi2
#define TARGET TARGET_ATTRIBUTE("bmi2")
#include "huf_compress_impl.h"
#undef TARGET
#undef FUNCTION
#endif
static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
{ {
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
}
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
#define HUF_FLUSHBITS_1(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
FORCE_INLINE_TEMPLATE size_t
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
size_t n;
BIT_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
switch (srcSize & 3)
{
case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
HUF_FLUSHBITS_2(&bitC);
/* fall-through */
case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
HUF_FLUSHBITS_1(&bitC);
/* fall-through */
case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
HUF_FLUSHBITS(&bitC);
/* fall-through */
case 0 : /* fall-through */
default: break;
}
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
HUF_FLUSHBITS_2(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
HUF_FLUSHBITS(&bitC);
}
return BIT_closeCStream(&bitC);
}
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}
static size_t
HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
{
if (bmi2) { if (bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
} }
#endif
(void)bmi2;
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
} }
static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, #else
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2) static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
{ {
#if DYNAMIC_BMI2
if (bmi2) {
return HUF_compress4X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
}
#endif
(void)bmi2; (void)bmi2;
return HUF_compress4X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
} }
#endif
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{ {
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
} }
static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, int bmi2)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
if (cSize==0) return 0;
op += cSize;
}
return op-ostart;
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{ {
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
} }
static size_t HUF_compressCTable_internal( static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend, BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize, const void* src, size_t srcSize,

View File

@ -1,120 +0,0 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef FUNCTION
# error "FUNCTION(name) must be defined"
#endif
#ifndef TARGET
# error "TARGET must be defined"
#endif
static void FUNCTION(HUF_encodeSymbol)(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
}
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
#define HUF_FLUSHBITS_1(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
static TARGET
size_t FUNCTION(HUF_compress1X_usingCTable_internal)(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
size_t n;
BIT_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
switch (srcSize & 3)
{
case 3 : FUNCTION(HUF_encodeSymbol)(&bitC, ip[n+ 2], CTable);
HUF_FLUSHBITS_2(&bitC);
/* fall-through */
case 2 : FUNCTION(HUF_encodeSymbol)(&bitC, ip[n+ 1], CTable);
HUF_FLUSHBITS_1(&bitC);
/* fall-through */
case 1 : FUNCTION(HUF_encodeSymbol)(&bitC, ip[n+ 0], CTable);
HUF_FLUSHBITS(&bitC);
/* fall-through */
case 0 : /* fall-through */
default: break;
}
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
FUNCTION(HUF_encodeSymbol)(&bitC, ip[n- 1], CTable);
HUF_FLUSHBITS_1(&bitC);
FUNCTION(HUF_encodeSymbol)(&bitC, ip[n- 2], CTable);
HUF_FLUSHBITS_2(&bitC);
FUNCTION(HUF_encodeSymbol)(&bitC, ip[n- 3], CTable);
HUF_FLUSHBITS_1(&bitC);
FUNCTION(HUF_encodeSymbol)(&bitC, ip[n- 4], CTable);
HUF_FLUSHBITS(&bitC);
}
return BIT_closeCStream(&bitC);
}
static TARGET
size_t FUNCTION(HUF_compress4X_usingCTable_internal)(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
{ CHECK_V_F(cSize, FUNCTION(HUF_compress1X_usingCTable_internal)(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, FUNCTION(HUF_compress1X_usingCTable_internal)(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, FUNCTION(HUF_compress1X_usingCTable_internal)(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, FUNCTION(HUF_compress1X_usingCTable_internal)(op, oend-op, ip, iend-ip, CTable) );
if (cSize==0) return 0;
op += cSize;
}
return op-ostart;
}

View File

@ -1415,10 +1415,9 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */ /* small ? don't even attempt compression (speed opt) */
# define LITERAL_NOENTROPY 63 # define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : LITERAL_NOENTROPY; { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */

View File

@ -958,21 +958,22 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
}; };
/** HUF_selectDecoder() : /** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster, * Tells which decoder is likely to decode faster,
* based on a set of pre-determined metrics. * based on a set of pre-computed metrics.
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
* Assumption : 0 < cSrcSize, dstSize <= 128 KB */ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{ {
assert(dstSize > 0);
assert(dstSize <= 128 KB);
/* decoder timing evaluation */ /* decoder timing evaluation */
U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
U32 const D256 = (U32)(dstSize >> 8); U32 const D256 = (U32)(dstSize >> 8);
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
return DTime1 < DTime0;
return DTime1 < DTime0; } }
}
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);