Merge branch 'dev' into LegacyDictBuilder
This commit is contained in:
commit
69017bf253
@ -367,7 +367,7 @@ static int isIncluded(const void* in, const void* container, size_t length)
|
||||
const char* const into = (const char*) container;
|
||||
size_t u;
|
||||
|
||||
for (u=0; u<length; u++) {
|
||||
for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
|
||||
if (ip[u] != into[u]) break;
|
||||
}
|
||||
|
||||
@ -378,7 +378,7 @@ static int isIncluded(const void* in, const void* container, size_t length)
|
||||
check if dictItem can be merged, do it if possible
|
||||
@return : id of destination elt, 0 if not merged
|
||||
*/
|
||||
static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
|
||||
static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
|
||||
{
|
||||
const U32 tableSize = table->pos;
|
||||
const U32 eltEnd = elt.pos + elt.length;
|
||||
@ -452,11 +452,11 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
|
||||
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
|
||||
{
|
||||
/* merge if possible */
|
||||
U32 mergeId = ZDICT_checkMerge(table, elt, 0, buffer);
|
||||
U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
|
||||
if (mergeId) {
|
||||
U32 newMerge = 1;
|
||||
while (newMerge) {
|
||||
newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId, buffer);
|
||||
newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
|
||||
if (newMerge) ZDICT_removeDictItem(table, mergeId);
|
||||
mergeId = newMerge;
|
||||
}
|
||||
@ -836,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
||||
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
||||
#endif
|
||||
//dstPtr += 12;
|
||||
eSize += 12;
|
||||
|
||||
_cleanup:
|
||||
@ -855,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_params_t params)
|
||||
{
|
||||
size_t hSize;
|
||||
#define HBUFFSIZE 256
|
||||
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
||||
BYTE header[HBUFFSIZE];
|
||||
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
||||
U32 const notificationLevel = params.notificationLevel;
|
||||
|
@ -168,11 +168,11 @@ use FILEs as training set to create a dictionary\. The training set should conta
|
||||
dictionary saved into \fBfile\fR (default: dictionary)
|
||||
.
|
||||
.TP
|
||||
\fB\-\-maxdict #\fR
|
||||
\fB\-\-maxdict=#\fR
|
||||
limit dictionary to specified size (default : (112640)
|
||||
.
|
||||
.TP
|
||||
\fB\-\-dictID #\fR
|
||||
\fB\-\-dictID=#\fR
|
||||
A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
|
||||
.
|
||||
.TP
|
||||
|
@ -157,9 +157,9 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB).
|
||||
(for example, 10 MB for a 100 KB dictionary).
|
||||
* `-o file`:
|
||||
dictionary saved into `file` (default: dictionary)
|
||||
* `--maxdict #`:
|
||||
* `--maxdict=#`:
|
||||
limit dictionary to specified size (default : (112640)
|
||||
* `--dictID #`:
|
||||
* `--dictID=#`:
|
||||
A dictionary ID is a locally unique ID that a decoder can use to verify it is
|
||||
using the right dictionary.
|
||||
By default, zstd will create a 4-bytes random number ID.
|
||||
|
@ -148,9 +148,9 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
|
||||
DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
|
||||
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
|
||||
DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
|
||||
DISPLAY( "--maxdict=# : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
|
||||
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
|
||||
DISPLAY( "--dictID ## : force dictionary ID to specified value (default: random)\n");
|
||||
DISPLAY( "--dictID=# : force dictionary ID to specified value (default: random)\n");
|
||||
#endif
|
||||
#ifndef ZSTD_NOBENCH
|
||||
DISPLAY( "\n");
|
||||
@ -179,6 +179,23 @@ static void waitEnter(void)
|
||||
(void)unused;
|
||||
}
|
||||
|
||||
static const char* lastNameFromPath(const char* path)
|
||||
{
|
||||
const char* name = path;
|
||||
if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
|
||||
if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
|
||||
return name;
|
||||
}
|
||||
|
||||
/*! exeNameMatch() :
|
||||
@return : a non-zero value if exeName matches test, excluding the extension
|
||||
*/
|
||||
static int exeNameMatch(const char* exeName, const char* test)
|
||||
{
|
||||
return !strncmp(exeName, test, strlen(test)) &&
|
||||
(exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
|
||||
}
|
||||
|
||||
/*! readU32FromChar() :
|
||||
@return : unsigned integer value read from input in `char` format
|
||||
allows and interprets K, KB, KiB, M, MB and MiB suffix.
|
||||
@ -318,20 +335,17 @@ int main(int argCount, const char* argv[])
|
||||
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
|
||||
filenameTable[0] = stdinmark;
|
||||
g_displayOut = stderr;
|
||||
/* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
|
||||
{ size_t pos;
|
||||
for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
|
||||
programName += pos;
|
||||
}
|
||||
|
||||
programName = lastNameFromPath(programName);
|
||||
|
||||
/* preset behaviors */
|
||||
if (!strcmp(programName, ZSTD_UNZSTD)) operation=zom_decompress;
|
||||
if (!strcmp(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; }
|
||||
if (!strcmp(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */
|
||||
if (!strcmp(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
|
||||
if (!strcmp(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat */
|
||||
if (!strcmp(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
|
||||
if (!strcmp(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
|
||||
if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
|
||||
if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; }
|
||||
if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */
|
||||
if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
|
||||
if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat */
|
||||
if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
|
||||
if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
|
||||
memset(&compressionParams, 0, sizeof(compressionParams));
|
||||
|
||||
/* command switches */
|
||||
@ -371,8 +385,8 @@ int main(int argCount, const char* argv[])
|
||||
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
|
||||
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
|
||||
if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
|
||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; }
|
||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; }
|
||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
|
||||
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
|
||||
@ -404,6 +418,8 @@ int main(int argCount, const char* argv[])
|
||||
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
/* fall-through, will trigger bad_usage() later on */
|
||||
}
|
||||
@ -533,14 +549,14 @@ int main(int argCount, const char* argv[])
|
||||
continue;
|
||||
} /* if (argument[0]=='-') */
|
||||
|
||||
if (nextArgumentIsMaxDict) {
|
||||
if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsMaxDict = 0;
|
||||
lastCommand = 0;
|
||||
maxDictSize = readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nextArgumentIsDictID) {
|
||||
if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsDictID = 0;
|
||||
lastCommand = 0;
|
||||
dictID = readU32FromChar(&argument);
|
||||
|
@ -253,12 +253,12 @@ $ECHO "- Create second (different) dictionary "
|
||||
$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
||||
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
||||
$ECHO "- Create dictionary with short dictID"
|
||||
$ZSTD --train *.c ../programs/*.c --dictID 1 -o tmpDict1
|
||||
$ZSTD --train *.c ../programs/*.c --dictID=1 -o tmpDict1
|
||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||
$ECHO "- Create dictionary with wrong dictID parameter order (must fail)"
|
||||
$ZSTD --train *.c ../programs/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument "
|
||||
$ECHO "- Create dictionary with size limit"
|
||||
$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict 4K -v
|
||||
$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict=4K -v
|
||||
$ECHO "- Create dictionary with wrong parameter order (must fail)"
|
||||
$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument "
|
||||
$ECHO "- Compress without dictID"
|
||||
@ -303,10 +303,10 @@ $ECHO "- Create second (different) dictionary"
|
||||
$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
||||
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
||||
$ECHO "- Create dictionary with short dictID"
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
|
||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||
$ECHO "- Create dictionary with size limit"
|
||||
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
|
||||
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
|
||||
rm tmp*
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user