zstd/contrib/long_distance_matching/ldm.h

158 lines
4.7 KiB
C
Raw Normal View History

2017-07-05 20:57:07 +00:00
#ifndef LDM_H
#define LDM_H
#include "mem.h" // from /lib/common/mem.h
// The number of bytes storing the compressed and decompressed size
// in the header.
#define LDM_COMPRESSED_SIZE 8
#define LDM_DECOMPRESSED_SIZE 8
#define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE))
// THe number of bytes storing the offset.
2017-07-13 22:29:41 +00:00
#define LDM_OFFSET_SIZE 4
// Defines the size of the hash table.
2017-07-19 23:56:28 +00:00
// Note that this is not the number of buckets.
// Currently this should be less than WINDOW_SIZE_LOG + 4?
#define LDM_MEMORY_USAGE 23
// The number of entries in a hash bucket.
#define HASH_BUCKET_SIZE_LOG 0 // The maximum is 4 for now.
2017-07-13 22:29:41 +00:00
2017-07-19 23:56:28 +00:00
// Defines the lag in inserting elements into the hash table.
#define LDM_LAG 0
2017-07-24 17:18:58 +00:00
#define LDM_WINDOW_SIZE_LOG 28 // Max value is 30
2017-07-14 19:31:01 +00:00
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
2017-07-13 22:29:41 +00:00
//These should be multiples of four (and perhaps set to the same value?).
#define LDM_MIN_MATCH_LENGTH 16
#define LDM_HASH_LENGTH 16
2017-07-24 17:18:58 +00:00
// Experimental.
//#define TMP_EVICTION
2017-07-21 17:44:39 +00:00
#define TMP_TAG_INSERT
2017-07-24 17:18:58 +00:00
//#define TMP_SIMPLE_LOWER
//#define TMP_FORCE_HASH_ONLY
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
2017-07-13 21:39:35 +00:00
typedef struct LDM_DCtx LDM_DCtx;
2017-07-12 22:11:06 +00:00
/**
* Compresses src into dst.
*
* NB: This currently ignores maxDstSize and assumes enough space is available.
*
* Block format (see lz4 documentation for more information):
* github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
*
* A block is composed of sequences. Each sequence begins with a token, which
* is a one-byte value separated into two 4-bit fields.
*
* The first field uses the four high bits of the token and encodes the literal
* length. If the field value is 0, there is no literal. If it is 15,
* additional bytes are added (each ranging from 0 to 255) to the previous
* value to produce a total length.
*
* Following the token and optional length bytes are the literals.
*
* Next are the 4 bytes representing the offset of the match (2 in lz4),
* representing the position to copy the literals.
*
* The lower four bits of the token encode the match length. With additional
* bytes added similarly to the additional literal length bytes after the offset.
*
* The last sequence is incomplete and stops right after the literals.
2017-07-12 22:11:06 +00:00
*
*/
2017-07-10 13:32:29 +00:00
size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
2017-07-05 20:57:07 +00:00
2017-07-13 21:39:35 +00:00
/**
* Initialize the compression context.
*
* Allocates memory for the hash table.
2017-07-13 21:39:35 +00:00
*/
void LDM_initializeCCtx(LDM_CCtx *cctx,
const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
2017-07-13 22:29:41 +00:00
/**
* Frees up memory allocating in initializeCCtx
*/
void LDM_destroyCCtx(LDM_CCtx *cctx);
2017-07-14 19:31:01 +00:00
/**
* Prints the distribution of offsets in the hash table.
*
* The offsets are defined as the distance of the hash table entry from the
* current input position of the cctx.
*/
void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
2017-07-13 21:39:35 +00:00
/**
* Outputs compression statistics to stdout.
*/
2017-07-13 22:29:41 +00:00
void LDM_printCompressStats(const LDM_compressStats *stats);
2017-07-13 21:39:35 +00:00
/**
* Encode the literal length followed by the literals.
*
* The literal length is written to the upper four bits of pToken, with
* additional bytes written to the output as needed (see lz4).
*
* This is followed by literalLength bytes corresponding to the literals.
*/
void LDM_encodeLiteralLengthAndLiterals(
LDM_CCtx *cctx, BYTE *pToken, const U64 literalLength);
2017-07-13 21:39:35 +00:00
/**
* Write current block (literals, literal length, match offset,
* match length).
*/
void LDM_outputBlock(LDM_CCtx *cctx,
const U64 literalLength,
2017-07-13 21:39:35 +00:00
const U32 offset,
const U64 matchLength);
2017-07-13 21:39:35 +00:00
/**
* Decompresses src into dst.
*
* Note: assumes src does not have a header.
*/
2017-07-10 13:32:29 +00:00
size_t LDM_decompress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize);
2017-07-05 20:57:07 +00:00
2017-07-13 21:39:35 +00:00
/**
* Initialize the decompression context.
*/
void LDM_initializeDCtx(LDM_DCtx *dctx,
const void *src, size_t compressedSize,
void *dst, size_t maxDecompressedSize);
2017-07-12 22:11:06 +00:00
/**
* Reads the header from src and writes the compressed size and
2017-07-13 21:39:35 +00:00
* decompressed size into compressedSize and decompressedSize respectively.
2017-07-12 22:11:06 +00:00
*
* NB: LDM_compress and LDM_decompress currently do not add/read headers.
*/
2017-07-13 21:39:35 +00:00
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize);
/**
* Write the compressed and decompressed size.
*/
void LDM_writeHeader(void *memPtr, U64 compressedSize,
U64 decompressedSize);
/**
* Output the configuration used.
*/
void LDM_outputConfiguration(void);
void LDM_test(const BYTE *src);
2017-07-12 01:13:26 +00:00
2017-07-05 20:57:07 +00:00
#endif /* LDM_H */