minor speed improvements 2

bench.c: block size has to be bigger than 32 bytes
zstdcli.c: support for e.g. -B16k -B16m
This commit is contained in:
inikep 2016-04-21 12:18:47 +02:00
parent ef51941822
commit 38654988f3
3 changed files with 29 additions and 30 deletions

View File

@ -260,7 +260,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_CCtx* zc, ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
U32 nbCompares, const U32 mls, U32 nbCompares, const U32 mls,
U32 extDict, ZSTD_match_t* matches) U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
{ {
const BYTE* const base = zc->base; const BYTE* const base = zc->base;
const U32 current = (U32)(ip-base); const U32 current = (U32)(ip-base);
@ -285,7 +285,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
U32 mnum = 0; U32 mnum = 0;
const U32 minMatch = (mls == 3) ? 3 : 4; const U32 minMatch = (mls == 3) ? 3 : 4;
size_t bestLength = minMatch-1; size_t bestLength = minMatchLen-1;
if (minMatch == 3) { /* HC3 match finder */ if (minMatch == 3) { /* HC3 match finder */
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
@ -385,26 +385,26 @@ update:
static U32 ZSTD_BtGetAllMatches ( static U32 ZSTD_BtGetAllMatches (
ZSTD_CCtx* zc, ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{ {
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
} }
static U32 ZSTD_BtGetAllMatches_selectMLS ( static U32 ZSTD_BtGetAllMatches_selectMLS (
ZSTD_CCtx* zc, /* Index table will be updated */ ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit, const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{ {
switch(matchLengthSearch) switch(matchLengthSearch)
{ {
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches); case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default : default :
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
} }
} }
@ -412,26 +412,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
static U32 ZSTD_BtGetAllMatches_extDict ( static U32 ZSTD_BtGetAllMatches_extDict (
ZSTD_CCtx* zc, ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{ {
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
} }
static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
ZSTD_CCtx* zc, /* Index table will be updated */ ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit, const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{ {
switch(matchLengthSearch) switch(matchLengthSearch)
{ {
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches); case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default : default :
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
} }
} }
@ -499,7 +499,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch); } while (mlen >= minMatch);
} } } }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
if (!last_pos && !match_num) { ip++; continue; } if (!last_pos && !match_num) { ip++; continue; }
@ -604,8 +604,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch); } while (mlen >= minMatch);
} } } }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches);
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
@ -799,7 +798,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch); } while (mlen >= minMatch);
} } } } } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
if (!last_pos && !match_num) { ip++; continue; } if (!last_pos && !match_num) { ip++; continue; }
@ -913,7 +912,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch); } while (mlen >= minMatch);
} } } } } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches); match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
if (match_num > 0 && matches[match_num-1].len > sufficient_len) { if (match_num > 0 && matches[match_num-1].len > sufficient_len) {

View File

@ -248,7 +248,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
const size_t* fileSizes, U32 nbFiles, const size_t* fileSizes, U32 nbFiles,
const void* dictBuffer, size_t dictBufferSize, benchResult_t *result) const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
{ {
size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
@ -488,9 +488,9 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
dictBuffer, dictBufferSize, &result); dictBuffer, dictBufferSize, &result);
if (g_displayLevel == 1) { if (g_displayLevel == 1) {
if (g_additionalParam) if (g_additionalParam)
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam); DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
else else
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
total.cSize += result.cSize; total.cSize += result.cSize;
total.cSpeed += result.cSpeed; total.cSpeed += result.cSpeed;
total.dSpeed += result.dSpeed; total.dSpeed += result.dSpeed;
@ -501,7 +501,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
total.cSpeed /= 1+cLevelLast-cLevel; total.cSpeed /= 1+cLevelLast-cLevel;
total.dSpeed /= 1+cLevelLast-cLevel; total.dSpeed /= 1+cLevelLast-cLevel;
total.ratio /= 1+cLevelLast-cLevel; total.ratio /= 1+cLevelLast-cLevel;
DISPLAY("avg%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName); DISPLAY("avg%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
} }
} }

View File

@ -41,6 +41,7 @@
#include <stdio.h> /* fprintf, getchar */ #include <stdio.h> /* fprintf, getchar */
#include <stdlib.h> /* exit, calloc, free */ #include <stdlib.h> /* exit, calloc, free */
#include <string.h> /* strcmp, strlen */ #include <string.h> /* strcmp, strlen */
#include <ctype.h> /* toupper */
#include "fileio.h" #include "fileio.h"
#ifndef ZSTD_NOBENCH #ifndef ZSTD_NOBENCH
# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ # include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
@ -304,9 +305,9 @@ int main(int argCount, const char** argv)
argument++; argument++;
while ((*argument >='0') && (*argument <='9')) while ((*argument >='0') && (*argument <='9'))
bSize *= 10, bSize += *argument++ - '0'; bSize *= 10, bSize += *argument++ - '0';
if (*argument=='K') bSize<<=10, argument++; /* allows using KB notation */ if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */
if (*argument=='M') bSize<<=20, argument++; if (toupper(*argument)=='M') bSize<<=20, argument++;
if (*argument=='B') argument++; if (toupper(*argument)=='B') argument++;
BMK_setNotificationLevel(displayLevel); BMK_setNotificationLevel(displayLevel);
BMK_SetBlockSize(bSize); BMK_SetBlockSize(bSize);
} }
@ -368,8 +369,7 @@ int main(int argCount, const char** argv)
maxDictSize = 0; maxDictSize = 0;
while ((*argument>='0') && (*argument<='9')) while ((*argument>='0') && (*argument<='9'))
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++; maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
if (*argument=='k' || *argument=='K') if (toupper(*argument)=='K') maxDictSize <<= 10;
maxDictSize <<= 10;
continue; continue;
} }