first implementation of the new dictionary API (untested)

This commit is contained in:
Yann Collet 2016-06-07 00:51:51 +02:00
parent 9d504ae85b
commit 81e13ef7cf
3 changed files with 242 additions and 42 deletions

View File

@ -129,9 +129,47 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapa
/*-*********************** /*-***********************
* Dictionary API * Dictionary API
*************************/ *************************/
/*! ZSTD_createCDict() :
* Create a digested dictionary, ready to start compression operation without startup delay.
* `dict` can be released after creation */
typedef struct ZSTD_CDict_s ZSTD_CDict;
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
/*! ZSTD_compress_usingCDict() :
* Compression using a pre-digested Dictionary
* In contrast with older ZSTD_compress_usingDict(), use dictionary without significant overhead.
* Note that compression level is decided during dictionary creation */
ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict);
/*! ZSTD_createDDict() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* `dict` can be released after creation */
typedef struct ZSTD_DDict_s ZSTD_DDict;
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict);
/*! ZSTD_decompress_usingDDict() :
* Decompression using a pre-digested Dictionary
* In contrast with older ZSTD_decompress_usingDict(), use dictionary without significant overhead. */
ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_DDict* ddict);
/*-***********************
* Deprecated API
*************************/
/*! ZSTD_compress_usingDict() : /*! ZSTD_compress_usingDict() :
* *** Warning : this function will soon be declared deprecated ***
* Compression using a pre-defined Dictionary content (see dictBuilder). * Compression using a pre-defined Dictionary content (see dictBuilder).
* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ * Note 1 : This function load the dictionary, resulting in a significant startup time.
* Note 2 : `dict` must remain valid and unmodified during compression operation.
* Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_compressCCtx() */
ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -139,9 +177,12 @@ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
int compressionLevel); int compressionLevel);
/*! ZSTD_decompress_usingDict() : /*! ZSTD_decompress_usingDict() :
* *** Warning : this function will soon be declared deprecated ***
* Decompression using a pre-defined Dictionary content (see dictBuilder). * Decompression using a pre-defined Dictionary content (see dictBuilder).
* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. * Dictionary must be identical to the one used during compression.
* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ * Note 1 : This function load the dictionary, resulting in a significant startup time
* Note 2 : `dict` must remain valid and unmodified during compression operation.
* Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_decompressDCtx() */
ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -158,7 +199,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
* ==================================================================================== */ * ==================================================================================== */
/*--- Dependency ---*/ /*--- Dependency ---*/
#include "mem.h" #include "mem.h" /* U32 */
/*--- Constants ---*/ /*--- Constants ---*/
@ -187,22 +228,22 @@ static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable f
/*--- Types ---*/ /*--- Types ---*/
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /* from faster to stronger */ typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /*< from faster to stronger */
typedef struct { typedef struct {
U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ U32 windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */
U32 chainLog; /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */ U32 chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
U32 hashLog; /* dispatch table : larger == faster, more memory */ U32 hashLog; /*< dispatch table : larger == faster, more memory */
U32 searchLog; /* nb of searches : larger == more compression, slower */ U32 searchLog; /*< nb of searches : larger == more compression, slower */
U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ U32 searchLength; /*< match length searched : larger == faster decompression, sometimes less compression */
U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ U32 targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
ZSTD_strategy strategy; ZSTD_strategy strategy;
} ZSTD_compressionParameters; } ZSTD_compressionParameters;
typedef struct { typedef struct {
U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ U32 contentSizeFlag; /*< 1: content size will be in frame header (if known). */
U32 checksumFlag; /* 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ U32 checksumFlag; /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */ U32 noDictIDFlag; /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
} ZSTD_frameParameters; } ZSTD_frameParameters;
typedef struct { typedef struct {
@ -217,15 +258,16 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
/*-************************************* /*-*************************************
* Advanced functions * Advanced compression functions
***************************************/ ***************************************/
/*! ZSTD_createCCtx_advanced() : /*! ZSTD_createCCtx_advanced() :
* Create a ZSTD compression context using external alloc and free functions */ * Create a ZSTD compression context using external alloc and free functions */
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
/*! ZSTD_createDCtx_advanced() : /*! ZSTD_createCDict_advanced() :
* Create a ZSTD decompression context using external alloc and free functions */ * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
ZSTD_parameters params, ZSTD_customMem customMem);
ZSTDLIB_API unsigned ZSTD_maxCLevel (void); ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
@ -251,27 +293,11 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
const void* dict,size_t dictSize, const void* dict,size_t dictSize,
ZSTD_parameters params); ZSTD_parameters params);
/*! ZSTD_compress_usingPreparedDCtx() :
* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded.
* It avoids reloading the dictionary each time.
* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced().
* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */
ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx(
ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*- Advanced Decompression functions -*/ /*- Advanced Decompression functions -*/
/*! ZSTD_decompress_usingPreparedDCtx() : /*! ZSTD_createDCtx_advanced() :
* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. * Create a ZSTD decompression context using external alloc and free functions */
* It avoids reloading the dictionary each time. ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict().
* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx(
ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/* ************************************** /* **************************************
@ -405,6 +431,31 @@ ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
/*-***********************
* Deprecated API
*************************/
/*! ZSTD_compress_usingPreparedCCtx() :
* *** Warning : this function will soon be deprecated ! ***
* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded.
* It avoids reloading the dictionary each time.
* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced().
* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */
ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx(
ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*! ZSTD_decompress_usingPreparedDCtx() :
* *** Warning : this function will soon be deprecated ! ***
* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
* It avoids reloading the dictionary each time.
* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict().
* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx(
ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
#endif /* ZSTD_STATIC_LINKING_ONLY */ #endif /* ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus) #if defined (__cplusplus)

View File

@ -2385,7 +2385,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
/*! ZSTD_compressBegin_advanced() : /*! ZSTD_compressBegin_advanced() :
* @return : 0, or an error code */ * @return : 0, or an error code */
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize, const void* dict, size_t dictSize,
ZSTD_parameters params, U64 pledgedSrcSize) ZSTD_parameters params, U64 pledgedSrcSize)
{ {
@ -2393,17 +2393,17 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc,
{ size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize);
if (ZSTD_isError(errorCode)) return errorCode; } if (ZSTD_isError(errorCode)) return errorCode; }
return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
} }
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{ {
ZSTD_parameters params; ZSTD_parameters params;
memset(&params, 0, sizeof(params)); memset(&params, 0, sizeof(params));
params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", cctx->base, compressionLevel);
return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
} }
@ -2532,6 +2532,81 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS
} }
/* ===== Dictionary API ===== */
struct ZSTD_CDict_s {
void* dictContent;
size_t dictContentSize;
ZSTD_CCtx* refContext;
}; /* typedef'd tp ZSTD_CDict within zstd.h */
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree)
customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree)
return NULL;
{ ZSTD_CDict* const cdict = (ZSTD_CDict*) customMem.customAlloc(customMem.opaque, sizeof(*cdict));
void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize);
ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
if (!dictContent || !cdict || !cctx) {
customMem.customFree(customMem.opaque, dictContent);
customMem.customFree(customMem.opaque, cdict);
customMem.customFree(customMem.opaque, cctx);
return NULL;
}
memcpy(dictContent, dict, dictSize);
{ size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
if (ZSTD_isError(errorCode)) {
customMem.customFree(customMem.opaque, dictContent);
customMem.customFree(customMem.opaque, cdict);
customMem.customFree(customMem.opaque, cctx);
return NULL;
} }
cdict->dictContent = dictContent;
cdict->dictContentSize = dictSize;
cdict->refContext = cctx;
return cdict;
}
}
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
ZSTD_parameters params;
memset(&params, 0, sizeof(params));
params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
params.fParams.contentSizeFlag = 1;
return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
ZSTD_freeFunction const cFree = cdict->refContext->customMem.customFree;
void* const opaque = cdict->refContext->customMem.opaque;
ZSTD_freeCCtx(cdict->refContext);
cFree(opaque, cdict->dictContent);
cFree(opaque, cdict);
return 0;
}
ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict)
{
return ZSTD_compress_usingPreparedCCtx(cctx, cdict->refContext,
dst, dstCapacity,
src, srcSize);
}
/*-===== Pre-defined compression levels =====-*/ /*-===== Pre-defined compression levels =====-*/
#define ZSTD_DEFAULT_CLEVEL 1 #define ZSTD_DEFAULT_CLEVEL 1

View File

@ -1267,3 +1267,77 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t
return 0; return 0;
} }
struct ZSTD_DDict_s {
void* dictContent;
size_t dictContentSize;
ZSTD_DCtx* refContext;
}; /* typedef'd tp ZSTD_CDict within zstd.h */
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree)
customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree)
return NULL;
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) customMem.customAlloc(customMem.opaque, sizeof(*ddict));
void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize);
ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
if (!dictContent || !ddict || !dctx) {
customMem.customFree(customMem.opaque, dictContent);
customMem.customFree(customMem.opaque, ddict);
customMem.customFree(customMem.opaque, dctx);
return NULL;
}
memcpy(dictContent, dict, dictSize);
{ size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
if (ZSTD_isError(errorCode)) {
customMem.customFree(customMem.opaque, dictContent);
customMem.customFree(customMem.opaque, ddict);
customMem.customFree(customMem.opaque, dctx);
return NULL;
} }
ddict->dictContent = dictContent;
ddict->dictContentSize = dictSize;
ddict->refContext = dctx;
return ddict;
}
}
/*! ZSTD_createDDict() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* `dict` can be released after creation */
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
return ZSTD_createDDict_advanced(dict, dictSize, allocator);
}
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
{
ZSTD_freeFunction const cFree = ddict->refContext->customMem.customFree;
void* const opaque = ddict->refContext->customMem.opaque;
ZSTD_freeDCtx(ddict->refContext);
cFree(opaque, ddict->dictContent);
cFree(opaque, ddict);
return 0;
}
/*! ZSTD_decompress_usingDDict() :
* Decompression using a pre-digested Dictionary
* In contrast with older ZSTD_decompress_usingDict(), use dictionary without significant overhead. */
ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_DDict* ddict)
{
return ZSTD_decompress_usingPreparedDCtx(dctx, ddict->refContext,
dst, dstCapacity,
src, srcSize);
}