zstd/lib/common/huf.h

287 lines
13 KiB
C
Raw Normal View History

2015-10-18 21:18:32 +00:00
/* ******************************************************************
Huffman coder, part of New Generation Entropy library
2015-10-18 21:18:32 +00:00
header file
2016-01-23 18:28:41 +00:00
Copyright (C) 2013-2016, Yann Collet.
2015-10-18 21:18:32 +00:00
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
****************************************************************** */
#ifndef HUF_H
#define HUF_H
2015-10-18 21:18:32 +00:00
#if defined (__cplusplus)
extern "C" {
#endif
2016-01-06 00:58:37 +00:00
/* ****************************************
2015-10-18 21:18:32 +00:00
* Dependency
******************************************/
#include <stddef.h> /* size_t */
2016-01-06 00:58:37 +00:00
/* ****************************************
* HUF simple functions
2015-10-18 21:18:32 +00:00
******************************************/
2016-03-20 04:40:39 +00:00
size_t HUF_compress(void* dst, size_t dstCapacity,
2015-10-18 21:18:32 +00:00
const void* src, size_t srcSize);
size_t HUF_decompress(void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize);
2016-03-20 04:40:39 +00:00
/*
HUF_compress() :
2015-10-18 21:18:32 +00:00
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
2016-03-20 04:40:39 +00:00
'dst' buffer must be already allocated. Compression runs faster if dstCapacity >= HUF_compressBound(srcSize).
2015-10-18 21:18:32 +00:00
Note : srcSize must be <= 128 KB
2016-03-20 04:40:39 +00:00
@return : size of compressed data (<= dstCapacity)
2015-10-18 21:18:32 +00:00
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
2016-03-20 04:40:39 +00:00
if return == 1, srcData is a single repeated byte symbol (RLE compression).
2015-10-18 21:18:32 +00:00
if HUF_isError(return), compression failed (more details using HUF_getErrorName())
2016-03-20 04:40:39 +00:00
HUF_decompress() :
Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
2015-10-18 21:18:32 +00:00
into already allocated destination buffer 'dst', of size 'dstSize'.
2016-03-20 04:40:39 +00:00
`dstSize` : must be the **exact** size of original (uncompressed) data.
2016-01-23 18:28:41 +00:00
Note : in contrast with FSE, HUF_decompress can regenerate
RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
because it knows size to regenerate.
2016-01-06 00:58:37 +00:00
@return : size of regenerated data (== dstSize)
or an error code, which can be tested using HUF_isError()
2015-10-18 21:18:32 +00:00
*/
2016-01-06 00:58:37 +00:00
/* ****************************************
2015-10-18 21:18:32 +00:00
* Tool functions
******************************************/
2016-05-20 12:36:36 +00:00
#define HUF_BLOCKSIZE_MAX (128 * 1024)
2016-03-20 04:40:39 +00:00
size_t HUF_compressBound(size_t size); /**< maximum compressed size */
2015-10-18 21:18:32 +00:00
/* Error Management */
2016-03-20 04:40:39 +00:00
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
2015-10-18 21:18:32 +00:00
2016-01-06 00:58:37 +00:00
/* ****************************************
2015-10-18 21:18:32 +00:00
* Advanced functions
******************************************/
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
#ifdef HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
#include "fse.h"
#include "bitstream.h" /* includes mem.h */
/* ****************************************
* Static allocation
******************************************/
/* HUF buffer bounds */
#define HUF_CTABLEBOUND 129
#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* static allocation of HUF's Compression Table */
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
U32 name##hb[maxSymbolValue+1]; \
void* name##hv = &(name##hb); \
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
/* static allocation of HUF's DTable */
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<maxTableLog))
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
/* ****************************************
* Advanced decompression functions
******************************************/
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
/* ****************************************
* HUF detailed API
******************************************/
/*!
HUF_compress() does the following:
1. count symbol occurrence from source[] into table count[] using FSE_count()
2. (optional) refine tableLog using HUF_optimalTableLog()
3. build Huffman table from count using HUF_buildCTable()
4. save Huffman table to memory buffer using HUF_writeCTable()
5. encode the data stream using HUF_compress4X_usingCTable()
The following API allows targeting specific sub-functions for advanced tasks.
For example, it's possible to compress several blocks using the same 'CTable',
or to save and regenerate 'CTable' using external methods.
*/
/* FSE_count() : find it within "fse.h" */
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/*!
HUF_decompress() does the following:
1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
2. build Huffman table from save, using HUF_readDTableXn()
3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
*/
size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize);
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
size_t HUF_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
/* single stream variants */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
size_t HUF_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
/* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
/* **************************************************************
* Constants
****************************************************************/
#define HUF_TABLELOG_ABSOLUTEMAX 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
#define HUF_TABLELOG_DEFAULT HUF_TABLELOG_MAX /* tableLog by default, when not specified */
#define HUF_SYMBOLVALUE_MAX 255
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
#endif
/* **************************************************************
* Needed by zstd in both compression and decompression
****************************************************************/
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer.
@return : size read from `src`
*/
MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize)
{
U32 weightTotal;
const BYTE* ip = (const BYTE*) src;
size_t iSize = ip[0];
size_t oSize;
//memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */
if (iSize >= 128) { /* special header */
if (iSize >= (242)) { /* RLE */
static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
oSize = l[iSize-242];
memset(huffWeight, 1, hwSize);
iSize = 0;
}
else { /* Incompressible */
oSize = iSize - 127;
iSize = ((oSize+1)/2);
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
if (oSize >= hwSize) return ERROR(corruption_detected);
ip += 1;
{ U32 n;
for (n=0; n<oSize; n+=2) {
huffWeight[n] = ip[n/2] >> 4;
huffWeight[n+1] = ip[n/2] & 15;
} } } }
else { /* header compressed with FSE (normal case) */
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */
if (FSE_isError(oSize)) return oSize;
}
/* collect weight stats */
memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }
/* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
} }
/* check tree construction validity */
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
/* results */
*nbSymbolsPtr = (U32)(oSize+1);
return iSize+1;
}
#endif /* HUF_STATIC_LINKING_ONLY */
2015-10-18 21:18:32 +00:00
#if defined (__cplusplus)
}
#endif
#endif /* HUF_H */