minor speed improvements 2
bench.c: block size has to be bigger than 32 bytes zstdcli.c: support for e.g. -B16k -B16m
This commit is contained in:
parent
ef51941822
commit
38654988f3
@ -260,7 +260,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|||||||
ZSTD_CCtx* zc,
|
ZSTD_CCtx* zc,
|
||||||
const BYTE* const ip, const BYTE* const iLimit,
|
const BYTE* const ip, const BYTE* const iLimit,
|
||||||
U32 nbCompares, const U32 mls,
|
U32 nbCompares, const U32 mls,
|
||||||
U32 extDict, ZSTD_match_t* matches)
|
U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
|
||||||
{
|
{
|
||||||
const BYTE* const base = zc->base;
|
const BYTE* const base = zc->base;
|
||||||
const U32 current = (U32)(ip-base);
|
const U32 current = (U32)(ip-base);
|
||||||
@ -285,7 +285,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|||||||
U32 mnum = 0;
|
U32 mnum = 0;
|
||||||
|
|
||||||
const U32 minMatch = (mls == 3) ? 3 : 4;
|
const U32 minMatch = (mls == 3) ? 3 : 4;
|
||||||
size_t bestLength = minMatch-1;
|
size_t bestLength = minMatchLen-1;
|
||||||
|
|
||||||
if (minMatch == 3) { /* HC3 match finder */
|
if (minMatch == 3) { /* HC3 match finder */
|
||||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
|
||||||
@ -385,26 +385,26 @@ update:
|
|||||||
static U32 ZSTD_BtGetAllMatches (
|
static U32 ZSTD_BtGetAllMatches (
|
||||||
ZSTD_CCtx* zc,
|
ZSTD_CCtx* zc,
|
||||||
const BYTE* const ip, const BYTE* const iLimit,
|
const BYTE* const ip, const BYTE* const iLimit,
|
||||||
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
|
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
|
||||||
{
|
{
|
||||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||||
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
|
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
|
||||||
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches);
|
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static U32 ZSTD_BtGetAllMatches_selectMLS (
|
static U32 ZSTD_BtGetAllMatches_selectMLS (
|
||||||
ZSTD_CCtx* zc, /* Index table will be updated */
|
ZSTD_CCtx* zc, /* Index table will be updated */
|
||||||
const BYTE* ip, const BYTE* const iHighLimit,
|
const BYTE* ip, const BYTE* const iHighLimit,
|
||||||
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
|
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
|
||||||
{
|
{
|
||||||
switch(matchLengthSearch)
|
switch(matchLengthSearch)
|
||||||
{
|
{
|
||||||
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
|
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
|
||||||
default :
|
default :
|
||||||
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
|
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
|
||||||
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
|
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
|
||||||
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
|
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -412,26 +412,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
|
|||||||
static U32 ZSTD_BtGetAllMatches_extDict (
|
static U32 ZSTD_BtGetAllMatches_extDict (
|
||||||
ZSTD_CCtx* zc,
|
ZSTD_CCtx* zc,
|
||||||
const BYTE* const ip, const BYTE* const iLimit,
|
const BYTE* const ip, const BYTE* const iLimit,
|
||||||
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
|
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
|
||||||
{
|
{
|
||||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||||
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
|
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
|
||||||
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches);
|
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
|
static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
|
||||||
ZSTD_CCtx* zc, /* Index table will be updated */
|
ZSTD_CCtx* zc, /* Index table will be updated */
|
||||||
const BYTE* ip, const BYTE* const iHighLimit,
|
const BYTE* ip, const BYTE* const iHighLimit,
|
||||||
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
|
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
|
||||||
{
|
{
|
||||||
switch(matchLengthSearch)
|
switch(matchLengthSearch)
|
||||||
{
|
{
|
||||||
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
|
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
|
||||||
default :
|
default :
|
||||||
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
|
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
|
||||||
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
|
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
|
||||||
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
|
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -499,7 +499,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|||||||
} while (mlen >= minMatch);
|
} while (mlen >= minMatch);
|
||||||
} }
|
} }
|
||||||
|
|
||||||
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */
|
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
|
||||||
|
|
||||||
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
|
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
|
||||||
if (!last_pos && !match_num) { ip++; continue; }
|
if (!last_pos && !match_num) { ip++; continue; }
|
||||||
@ -604,8 +604,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|||||||
} while (mlen >= minMatch);
|
} while (mlen >= minMatch);
|
||||||
} }
|
} }
|
||||||
|
|
||||||
|
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
|
||||||
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches);
|
|
||||||
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
|
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
|
||||||
|
|
||||||
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
|
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
|
||||||
@ -799,7 +798,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|||||||
} while (mlen >= minMatch);
|
} while (mlen >= minMatch);
|
||||||
} } }
|
} } }
|
||||||
|
|
||||||
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */
|
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
|
||||||
|
|
||||||
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
|
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
|
||||||
if (!last_pos && !match_num) { ip++; continue; }
|
if (!last_pos && !match_num) { ip++; continue; }
|
||||||
@ -913,7 +912,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|||||||
} while (mlen >= minMatch);
|
} while (mlen >= minMatch);
|
||||||
} } }
|
} } }
|
||||||
|
|
||||||
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches);
|
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
|
||||||
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
|
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
|
||||||
|
|
||||||
if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
|
if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
|
||||||
|
@ -248,7 +248,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
const size_t* fileSizes, U32 nbFiles,
|
const size_t* fileSizes, U32 nbFiles,
|
||||||
const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
|
const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
|
||||||
{
|
{
|
||||||
size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
|
size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
|
||||||
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
|
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
|
||||||
blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
|
blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
|
||||||
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
|
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
|
||||||
@ -488,9 +488,9 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
|
|||||||
dictBuffer, dictBufferSize, &result);
|
dictBuffer, dictBufferSize, &result);
|
||||||
if (g_displayLevel == 1) {
|
if (g_displayLevel == 1) {
|
||||||
if (g_additionalParam)
|
if (g_additionalParam)
|
||||||
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
|
DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
|
||||||
else
|
else
|
||||||
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
|
DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
|
||||||
total.cSize += result.cSize;
|
total.cSize += result.cSize;
|
||||||
total.cSpeed += result.cSpeed;
|
total.cSpeed += result.cSpeed;
|
||||||
total.dSpeed += result.dSpeed;
|
total.dSpeed += result.dSpeed;
|
||||||
@ -501,7 +501,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
|
|||||||
total.cSpeed /= 1+cLevelLast-cLevel;
|
total.cSpeed /= 1+cLevelLast-cLevel;
|
||||||
total.dSpeed /= 1+cLevelLast-cLevel;
|
total.dSpeed /= 1+cLevelLast-cLevel;
|
||||||
total.ratio /= 1+cLevelLast-cLevel;
|
total.ratio /= 1+cLevelLast-cLevel;
|
||||||
DISPLAY("avg%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
|
DISPLAY("avg%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
#include <stdio.h> /* fprintf, getchar */
|
#include <stdio.h> /* fprintf, getchar */
|
||||||
#include <stdlib.h> /* exit, calloc, free */
|
#include <stdlib.h> /* exit, calloc, free */
|
||||||
#include <string.h> /* strcmp, strlen */
|
#include <string.h> /* strcmp, strlen */
|
||||||
|
#include <ctype.h> /* toupper */
|
||||||
#include "fileio.h"
|
#include "fileio.h"
|
||||||
#ifndef ZSTD_NOBENCH
|
#ifndef ZSTD_NOBENCH
|
||||||
# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
|
# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
|
||||||
@ -304,9 +305,9 @@ int main(int argCount, const char** argv)
|
|||||||
argument++;
|
argument++;
|
||||||
while ((*argument >='0') && (*argument <='9'))
|
while ((*argument >='0') && (*argument <='9'))
|
||||||
bSize *= 10, bSize += *argument++ - '0';
|
bSize *= 10, bSize += *argument++ - '0';
|
||||||
if (*argument=='K') bSize<<=10, argument++; /* allows using KB notation */
|
if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */
|
||||||
if (*argument=='M') bSize<<=20, argument++;
|
if (toupper(*argument)=='M') bSize<<=20, argument++;
|
||||||
if (*argument=='B') argument++;
|
if (toupper(*argument)=='B') argument++;
|
||||||
BMK_setNotificationLevel(displayLevel);
|
BMK_setNotificationLevel(displayLevel);
|
||||||
BMK_SetBlockSize(bSize);
|
BMK_SetBlockSize(bSize);
|
||||||
}
|
}
|
||||||
@ -368,8 +369,7 @@ int main(int argCount, const char** argv)
|
|||||||
maxDictSize = 0;
|
maxDictSize = 0;
|
||||||
while ((*argument>='0') && (*argument<='9'))
|
while ((*argument>='0') && (*argument<='9'))
|
||||||
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
|
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
|
||||||
if (*argument=='k' || *argument=='K')
|
if (toupper(*argument)=='K') maxDictSize <<= 10;
|
||||||
maxDictSize <<= 10;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user