RepsCodes are saved into Dict
(uncomplete : need decompression to regenerate them)
This commit is contained in:
parent
efd0b4993a
commit
52a0622beb
@ -64,7 +64,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ZSTD_OPT_NUM (1<<12)
|
#define ZSTD_OPT_NUM (1<<12)
|
||||||
#define ZSTD_DICT_MAGIC 0xEC30A437
|
#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7 */
|
||||||
|
|
||||||
#define ZSTD_REP_NUM 3
|
#define ZSTD_REP_NUM 3
|
||||||
#define ZSTD_REP_INIT ZSTD_REP_NUM
|
#define ZSTD_REP_INIT ZSTD_REP_NUM
|
||||||
|
@ -256,7 +256,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|||||||
bitStream += count << bitCount;
|
bitStream += count << bitCount;
|
||||||
bitCount += nbBits;
|
bitCount += nbBits;
|
||||||
bitCount -= (count<max);
|
bitCount -= (count<max);
|
||||||
previous0 = (count==1);
|
previous0 = (count==1);
|
||||||
while (remaining<threshold) nbBits--, threshold>>=1;
|
while (remaining<threshold) nbBits--, threshold>>=1;
|
||||||
}
|
}
|
||||||
if (bitCount>16) {
|
if (bitCount>16) {
|
||||||
|
@ -2342,45 +2342,49 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
|||||||
static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
|
static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
|
||||||
{
|
{
|
||||||
/* note : magic number already checked */
|
/* note : magic number already checked */
|
||||||
size_t const dictSizeStart = dictSize;
|
const BYTE* dictPtr = (const BYTE*)dict;
|
||||||
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||||
|
|
||||||
{ size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
|
{ size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
|
||||||
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
||||||
dict = (const char*)dict + hufHeaderSize;
|
dictPtr += hufHeaderSize;
|
||||||
dictSize -= hufHeaderSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{ short offcodeNCount[MaxOff+1];
|
{ short offcodeNCount[MaxOff+1];
|
||||||
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
|
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
|
||||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
|
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||||
{ size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
|
{ size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
|
||||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||||
dict = (const char*)dict + offcodeHeaderSize;
|
dictPtr += offcodeHeaderSize;
|
||||||
dictSize -= offcodeHeaderSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{ short matchlengthNCount[MaxML+1];
|
{ short matchlengthNCount[MaxML+1];
|
||||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
|
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
|
||||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
|
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
||||||
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||||
{ size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
|
{ size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
|
||||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||||
dict = (const char*)dict + matchlengthHeaderSize;
|
dictPtr += matchlengthHeaderSize;
|
||||||
dictSize -= matchlengthHeaderSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{ short litlengthNCount[MaxLL+1];
|
{ short litlengthNCount[MaxLL+1];
|
||||||
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
|
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
|
||||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
|
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
||||||
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||||
{ size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
|
{ size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
|
||||||
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
|
||||||
dictSize -= litlengthHeaderSize;
|
dictPtr += litlengthHeaderSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
||||||
|
zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
||||||
|
zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
||||||
|
zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
||||||
|
dictPtr += 12;
|
||||||
|
|
||||||
zc->flagStaticTables = 1;
|
zc->flagStaticTables = 1;
|
||||||
return (dictSizeStart-dictSize);
|
return dictPtr - (const BYTE*)dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_compress_insertDictionary() :
|
/** ZSTD_compress_insertDictionary() :
|
||||||
|
@ -578,9 +578,10 @@ typedef struct
|
|||||||
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
||||||
} EStats_ress_t;
|
} EStats_ress_t;
|
||||||
|
|
||||||
|
#define MAXREPOFFSET 1024
|
||||||
|
|
||||||
static void ZDICT_countEStats(EStats_ress_t esr,
|
static void ZDICT_countEStats(EStats_ress_t esr,
|
||||||
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
|
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
||||||
const void* src, size_t srcSize)
|
const void* src, size_t srcSize)
|
||||||
{
|
{
|
||||||
const seqStore_t* seqStorePtr;
|
const seqStore_t* seqStorePtr;
|
||||||
@ -614,6 +615,17 @@ static void ZDICT_countEStats(EStats_ress_t esr,
|
|||||||
size_t u;
|
size_t u;
|
||||||
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
|
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
|
/* rep offsets */
|
||||||
|
{ const U32* const offsetPtr = seqStorePtr->offsetStart;
|
||||||
|
U32 offset1 = offsetPtr[0] - 3;
|
||||||
|
U32 offset2 = offsetPtr[1] - 3;
|
||||||
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
||||||
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
||||||
|
repOffsets[offset1] += 3;
|
||||||
|
repOffsets[offset2] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -629,12 +641,29 @@ static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
|||||||
|
|
||||||
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
||||||
{
|
{
|
||||||
size_t total;
|
size_t total=0;
|
||||||
unsigned u;
|
unsigned u;
|
||||||
for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u];
|
for (u=0; u<nbFiles; u++) total += fileSizes[u];
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct { U32 offset; U32 count; } offsetCount_t;
|
||||||
|
|
||||||
|
static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, U32 count)
|
||||||
|
{
|
||||||
|
U32 u;
|
||||||
|
table[ZSTD_REP_NUM].offset = val;
|
||||||
|
table[ZSTD_REP_NUM].count = count;
|
||||||
|
for (u=ZSTD_REP_NUM; u>0; u--) {
|
||||||
|
offsetCount_t tmp;
|
||||||
|
if (table[u-1].count >= table[u].count) break;
|
||||||
|
tmp = table[u-1];
|
||||||
|
table[u-1] = table[u];
|
||||||
|
table[u] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#define OFFCODE_MAX 18 /* only applicable to first block */
|
#define OFFCODE_MAX 18 /* only applicable to first block */
|
||||||
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||||
unsigned compressionLevel,
|
unsigned compressionLevel,
|
||||||
@ -649,6 +678,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
short matchLengthNCount[MaxML+1];
|
short matchLengthNCount[MaxML+1];
|
||||||
U32 litLengthCount[MaxLL+1];
|
U32 litLengthCount[MaxLL+1];
|
||||||
short litLengthNCount[MaxLL+1];
|
short litLengthNCount[MaxLL+1];
|
||||||
|
U32 repOffset[MAXREPOFFSET] = { 0 };
|
||||||
|
offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
|
||||||
EStats_ress_t esr;
|
EStats_ress_t esr;
|
||||||
ZSTD_parameters params;
|
ZSTD_parameters params;
|
||||||
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
|
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
|
||||||
@ -656,12 +687,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
size_t eSize = 0;
|
size_t eSize = 0;
|
||||||
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
||||||
size_t const averageSampleSize = totalSrcSize / nbFiles;
|
size_t const averageSampleSize = totalSrcSize / nbFiles;
|
||||||
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
||||||
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
|
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
|
||||||
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
||||||
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
||||||
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
||||||
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
||||||
esr.ref = ZSTD_createCCtx();
|
esr.ref = ZSTD_createCCtx();
|
||||||
esr.zc = ZSTD_createCCtx();
|
esr.zc = ZSTD_createCCtx();
|
||||||
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
||||||
@ -679,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
/* collect stats on all files */
|
/* collect stats on all files */
|
||||||
for (u=0; u<nbFiles; u++) {
|
for (u=0; u<nbFiles; u++) {
|
||||||
ZDICT_countEStats(esr,
|
ZDICT_countEStats(esr,
|
||||||
countLit, offcodeCount, matchLengthCount, litLengthCount,
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
||||||
(const char*)srcBuffer + pos, fileSizes[u]);
|
(const char*)srcBuffer + pos, fileSizes[u]);
|
||||||
pos += fileSizes[u];
|
pos += fileSizes[u];
|
||||||
}
|
}
|
||||||
@ -720,46 +754,70 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
}
|
}
|
||||||
llLog = (U32)errorCode;
|
llLog = (U32)errorCode;
|
||||||
|
|
||||||
|
{ U32 offset;
|
||||||
|
for (offset=1; offset<MAXREPOFFSET; offset++)
|
||||||
|
ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* write result to buffer */
|
/* write result to buffer */
|
||||||
errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
||||||
if (HUF_isError(errorCode)) {
|
if (HUF_isError(hhSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "HUF_writeCTable error");
|
DISPLAYLEVEL(1, "HUF_writeCTable error");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
|
}
|
||||||
|
dstPtr += hhSize;
|
||||||
|
maxDstSize -= hhSize;
|
||||||
|
eSize += hhSize;
|
||||||
}
|
}
|
||||||
dstBuffer = (char*)dstBuffer + errorCode;
|
|
||||||
maxDstSize -= errorCode;
|
|
||||||
eSize += errorCode;
|
|
||||||
|
|
||||||
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(ohSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
|
}
|
||||||
|
dstPtr += ohSize;
|
||||||
|
maxDstSize -= ohSize;
|
||||||
|
eSize += ohSize;
|
||||||
}
|
}
|
||||||
dstBuffer = (char*)dstBuffer + errorCode;
|
|
||||||
maxDstSize -= errorCode;
|
|
||||||
eSize += errorCode;
|
|
||||||
|
|
||||||
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(mhSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
|
}
|
||||||
|
dstPtr += mhSize;
|
||||||
|
maxDstSize -= mhSize;
|
||||||
|
eSize += mhSize;
|
||||||
}
|
}
|
||||||
dstBuffer = (char*)dstBuffer + errorCode;
|
|
||||||
maxDstSize -= errorCode;
|
|
||||||
eSize += errorCode;
|
|
||||||
|
|
||||||
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(lhSize)) {
|
||||||
|
eSize = ERROR(GENERIC);
|
||||||
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
||||||
|
goto _cleanup;
|
||||||
|
}
|
||||||
|
dstPtr += lhSize;
|
||||||
|
maxDstSize -= lhSize;
|
||||||
|
eSize += lhSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxDstSize<12) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
dstBuffer = (char*)dstBuffer + errorCode;
|
MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset);
|
||||||
maxDstSize -= errorCode;
|
MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset);
|
||||||
eSize += errorCode;
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
||||||
|
//MEM_writeLE32(dstPtr+0, 1);
|
||||||
|
//MEM_writeLE32(dstPtr+4, 4);
|
||||||
|
//MEM_writeLE32(dstPtr+8, 8);
|
||||||
|
dstPtr += 12;
|
||||||
|
eSize += 12;
|
||||||
|
|
||||||
_cleanup:
|
_cleanup:
|
||||||
ZSTD_freeCCtx(esr.ref);
|
ZSTD_freeCCtx(esr.ref);
|
||||||
|
@ -79,7 +79,7 @@ const char* ZDICT_getErrorName(size_t errorCode);
|
|||||||
|
|
||||||
/* ====================================================================================
|
/* ====================================================================================
|
||||||
* The definitions in this section are considered experimental.
|
* The definitions in this section are considered experimental.
|
||||||
* They should never be used in association with a dynamic library, as they may change in the future.
|
* They should never be used with a dynamic library, as they may change in the future.
|
||||||
* They are provided for advanced usages.
|
* They are provided for advanced usages.
|
||||||
* Use them only in association with static linking.
|
* Use them only in association with static linking.
|
||||||
* ==================================================================================== */
|
* ==================================================================================== */
|
||||||
|
Loading…
Reference in New Issue
Block a user