From 96aa3019b25bfa6c016e935e8f4e7a4944b766d8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 24 Mar 2017 16:04:29 -0700 Subject: [PATCH 1/2] changed advanced commands --maxdict= and --dictID= now works with the `=` variant, which is the recommended one. Old variant `--dictID #` still works, for compatibility with existing scripts. Long term objective is to remove the old variant.. --- lib/dictBuilder/zdict.c | 17 ++++++++--------- programs/zstd.1 | 4 ++-- programs/zstd.1.md | 4 ++-- programs/zstdcli.c | 14 ++++++++------ 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index ed53197a..0824b94d 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -361,11 +361,11 @@ static dictItem ZDICT_analyzePos( } -/*! ZDICT_checkMerge - check if dictItem can be merged, do it if possible - @return : id of destination elt, 0 if not merged -*/ -static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip) +/*! ZDICT_tryMerge() : + * check if dictItem can be merged, do it if possible + * @return : id of destination elt, 0 if not merged + */ +static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip) { const U32 tableSize = table->pos; const U32 eltEnd = elt.pos + elt.length; @@ -426,11 +426,11 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id) static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) { /* merge if possible */ - U32 mergeId = ZDICT_checkMerge(table, elt, 0); + U32 mergeId = ZDICT_tryMerge(table, elt, 0); if (mergeId) { U32 newMerge = 1; while (newMerge) { - newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId); + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId); /* merge existing elt */ if (newMerge) ZDICT_removeDictItem(table, mergeId); mergeId = newMerge; } @@ -810,7 +810,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, MEM_writeLE32(dstPtr+4, repStartValue[1]); MEM_writeLE32(dstPtr+8, repStartValue[2]); #endif - //dstPtr += 12; eSize += 12; _cleanup: @@ -829,7 +828,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, ZDICT_params_t params) { size_t hSize; -#define HBUFFSIZE 256 +#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */ BYTE header[HBUFFSIZE]; int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; U32 const notificationLevel = params.notificationLevel; diff --git a/programs/zstd.1 b/programs/zstd.1 index 02423358..8b418a0e 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -168,11 +168,11 @@ use FILEs as training set to create a dictionary\. The training set should conta dictionary saved into \fBfile\fR (default: dictionary) . .TP -\fB\-\-maxdict #\fR +\fB\-\-maxdict=#\fR limit dictionary to specified size (default : (112640) . .TP -\fB\-\-dictID #\fR +\fB\-\-dictID=#\fR A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. . .TP diff --git a/programs/zstd.1.md b/programs/zstd.1.md index c9ff3327..d1161a52 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -157,9 +157,9 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB). (for example, 10 MB for a 100 KB dictionary). * `-o file`: dictionary saved into `file` (default: dictionary) -* `--maxdict #`: +* `--maxdict=#`: limit dictionary to specified size (default : (112640) -* `--dictID #`: +* `--dictID=#`: A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary. By default, zstd will create a 4-bytes random number ID. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 281301bd..a3548508 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -148,9 +148,9 @@ static int usage_advanced(const char* programName) DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n"); DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n"); DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName); - DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); + DISPLAY( "--maxdict=# : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel); - DISPLAY( "--dictID ## : force dictionary ID to specified value (default: random)\n"); + DISPLAY( "--dictID=# : force dictionary ID to specified value (default: random)\n"); #endif #ifndef ZSTD_NOBENCH DISPLAY( "\n"); @@ -371,8 +371,8 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; } if (!strcmp(argument, "--test")) { operation=zom_test; continue; } if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; } - if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } - if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } + if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ + if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; } if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; } if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; } @@ -404,6 +404,8 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } /* fall-through, will trigger bad_usage() later on */ } @@ -533,14 +535,14 @@ int main(int argCount, const char* argv[]) continue; } /* if (argument[0]=='-') */ - if (nextArgumentIsMaxDict) { + if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */ nextArgumentIsMaxDict = 0; lastCommand = 0; maxDictSize = readU32FromChar(&argument); continue; } - if (nextArgumentIsDictID) { + if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */ nextArgumentIsDictID = 0; lastCommand = 0; dictID = readU32FromChar(&argument); From 4c41d37fcca57130127ca84f89e8cec401d718d0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 24 Mar 2017 18:36:56 -0700 Subject: [PATCH 2/2] changed test for new syntax --dictID= and --maxdict= --- lib/dictBuilder/zdict.c | 2 +- tests/playTests.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 0824b94d..6bc42526 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -427,7 +427,7 @@ static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) { /* merge if possible */ U32 mergeId = ZDICT_tryMerge(table, elt, 0); - if (mergeId) { + if (mergeId) { /* recursive : re-merge the newly merged elt */ U32 newMerge = 1; while (newMerge) { newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId); /* merge existing elt */ diff --git a/tests/playTests.sh b/tests/playTests.sh index 897a9015..5abbb14e 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -253,12 +253,12 @@ $ECHO "- Create second (different) dictionary " $ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" $ECHO "- Create dictionary with short dictID" -$ZSTD --train *.c ../programs/*.c --dictID 1 -o tmpDict1 +$ZSTD --train *.c ../programs/*.c --dictID=1 -o tmpDict1 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" $ECHO "- Create dictionary with wrong dictID parameter order (must fail)" $ZSTD --train *.c ../programs/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument " $ECHO "- Create dictionary with size limit" -$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict 4K -v +$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict=4K -v $ECHO "- Create dictionary with wrong parameter order (must fail)" $ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument " $ECHO "- Compress without dictID" @@ -303,10 +303,10 @@ $ECHO "- Create second (different) dictionary" $ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" $ECHO "- Create dictionary with short dictID" -$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1 +$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" $ECHO "- Create dictionary with size limit" -$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K +$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K rm tmp*